LLVM 23.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
17#include "llvm/IR/IntrinsicsWebAssembly.h"
19
21using namespace llvm;
22
23#define DEBUG_TYPE "wasmtti"
24
26WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
27 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
29}
30
31unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
32 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
33
34 // For SIMD, use at least 16 registers, as a rough guess.
35 bool Vector = (ClassID == 1);
36 if (Vector)
37 Result = std::max(Result, 16u);
38
39 return Result;
40}
41
44 switch (K) {
46 return TypeSize::getFixed(64);
48 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
50 return TypeSize::getScalable(0);
51 }
52
53 llvm_unreachable("Unsupported register kind");
54}
55
57 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
59 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
60
61 if (ST->hasSIMD128()) {
62 static const CostTblEntry ArithCostTbl[]{
63 // extmul + (maybe awkward) shuffle
64 {ISD::MUL, MVT::v8i8, 4},
65 // 2x extmul + (okay) shuffle
66 {ISD::MUL, MVT::v16i8, 4},
67 // extmul
68 {ISD::MUL, MVT::v4i16, 1},
69 // extmul
70 {ISD::MUL, MVT::v2i32, 1},
71 };
72 EVT DstVT = TLI->getValueType(DL, Ty);
73 if (DstVT.isSimple()) {
74 int ISD = TLI->InstructionOpcodeToISD(Opcode);
75 if (const auto *Entry =
76 CostTableLookup(ArithCostTbl, ISD, DstVT.getSimpleVT()))
77 return Entry->Cost;
78 }
79 }
80
83 Opcode, Ty, CostKind, Op1Info, Op2Info);
84
85 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
86 switch (Opcode) {
87 case Instruction::LShr:
88 case Instruction::AShr:
89 case Instruction::Shl:
90 // SIMD128's shifts currently only accept a scalar shift count. For each
91 // element, we'll need to extract, op, insert. The following is a rough
92 // approximation.
93 if (!Op2Info.isUniform())
94 Cost =
95 cast<FixedVectorType>(VTy)->getNumElements() *
97 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
99 break;
100 }
101 }
102 return Cost;
103}
104
106 unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH,
108 int ISD = TLI->InstructionOpcodeToISD(Opcode);
109 auto SrcTy = TLI->getValueType(DL, Src);
110 auto DstTy = TLI->getValueType(DL, Dst);
111
112 if (!SrcTy.isSimple() || !DstTy.isSimple()) {
113 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
114 }
115
116 if (!ST->hasSIMD128()) {
117 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
118 }
119
120 auto DstVT = DstTy.getSimpleVT();
121 auto SrcVT = SrcTy.getSimpleVT();
122
123 if (I && I->hasOneUser()) {
124 auto *SingleUser = cast<Instruction>(*I->user_begin());
125 int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode());
126
127 // extmul_low support
128 if (UserISD == ISD::MUL &&
130 // Free low extensions.
131 if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) ||
132 (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) ||
133 (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) {
134 return 0;
135 }
136 // Will require an additional extlow operation for the intermediate
137 // i16/i32 value.
138 if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) ||
139 (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) {
140 return 1;
141 }
142 }
143 }
144
145 static constexpr TypeConversionCostTblEntry ConversionTbl[] = {
146 // extend_low
147 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1},
148 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1},
149 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1},
150 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1},
151 {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1},
152 {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1},
153 // 2 x extend_low
154 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2},
155 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2},
156 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2},
157 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2},
158 // extend_low, extend_high
159 {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2},
160 {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2},
161 {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2},
162 {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2},
163 {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2},
164 {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2},
165 // 2x extend_low, extend_high
166 {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4},
167 {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4},
168 {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4},
169 {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4},
170 // 6x extend_low, extend_high
171 {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6},
172 {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6},
173 // shuffle
174 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2},
175 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4},
176 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2},
177 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4},
178 // narrow, and
179 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2},
180 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2},
181 // narrow, 2x and
182 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3},
183 // 3x narrow, 4x and
184 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7},
185 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7},
186 // 7x narrow, 8x and
187 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15},
188 // convert_i32x4
189 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
190 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
191 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
192 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
193 // extend_low, convert
194 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
195 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
196 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
197 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
198 // extend_low x 2, convert
199 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
200 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
201 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
202 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
203 // several shuffles
204 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
205 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
206 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10},
207 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
208 /// trunc_sat, const, and, 3x narrow
209 {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6},
210 {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6},
211 {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6},
212 {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6},
213 /// trunc_sat, const, and, narrow
214 {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4},
215 {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4},
216 {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4},
217 {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4},
218 // 2x trunc_sat, const, 2x and, 3x narrow
219 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8},
220 {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8},
221 // 2x trunc_sat, const, 2x and, narrow
222 {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6},
223 {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6},
224 };
225
226 if (const auto *Entry =
227 ConvertCostTableLookup(ConversionTbl, ISD, DstVT, SrcVT)) {
228 return Entry->Cost;
229 }
230
231 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
232}
233
235WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
237
238 Options.AllowOverlappingLoads = true;
239
240 if (ST->hasSIMD128())
241 Options.LoadSizes.push_back(16);
242
243 Options.LoadSizes.append({8, 4, 2, 1});
244 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
245 Options.NumLoadsPerBlock = Options.MaxNumLoads;
246
247 return Options;
248}
249
251 unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
253 const Instruction *I) const {
254 // FIXME: Load latency isn't handled here
255 if (!ST->hasSIMD128() || !isa<FixedVectorType>(Ty) ||
256 (Opcode == Instruction::Load && CostKind == TTI::TCK_Latency)) {
257 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
258 CostKind);
259 }
260
261 EVT VT = TLI->getValueType(DL, Ty, true);
262 // Type legalization can't handle structs
263 if (VT == MVT::Other)
264 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
265 CostKind);
266
267 auto LT = getTypeLegalizationCost(Ty);
268 if (!LT.first.isValid())
270
271 int ISD = TLI->InstructionOpcodeToISD(Opcode);
272 unsigned width = VT.getSizeInBits();
273 if (ISD == ISD::LOAD) {
274 // 128-bit loads are a single instruction. 32-bit and 64-bit vector loads
275 // can be lowered to load32_zero and load64_zero respectively. Assume SIMD
276 // loads are twice as expensive as scalar.
277 switch (width) {
278 default:
279 break;
280 case 32:
281 case 64:
282 case 128:
283 return 2;
284 }
285 } else if (ISD == ISD::STORE) {
286 // For stores, we can use store lane operations.
287 switch (width) {
288 default:
289 break;
290 case 8:
291 case 16:
292 case 32:
293 case 64:
294 case 128:
295 return 2;
296 }
297 }
298
299 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind);
300}
301
303 TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
306 const Instruction *CxtI) const {
307 // Canonicalize the ShuffleKind in case optimizations didn't.
308 // Otherwise, we might end up with the wrong ShuffleKind to match against.
309
310 Kind = improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp);
311
312 // Wasm SIMD128 has native splat instructions for all lane types.
313 if (ST->hasSIMD128() && Kind == TTI::SK_Broadcast &&
315 return 1;
316
317 return BaseT::getShuffleCost(Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp,
318 Args, CxtI);
319}
320
322 unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef<unsigned> Indices,
323 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
324 bool UseMaskForCond, bool UseMaskForGaps) const {
325 assert(Factor >= 2 && "Invalid interleave factor");
326
327 auto *VecTy = cast<VectorType>(Ty);
328 if (!ST->hasSIMD128() || !isa<FixedVectorType>(VecTy)) {
330 }
331
332 if (UseMaskForCond || UseMaskForGaps)
333 return BaseT::getInterleavedMemoryOpCost(Opcode, Ty, Factor, Indices,
334 Alignment, AddressSpace, CostKind,
335 UseMaskForCond, UseMaskForGaps);
336
337 constexpr unsigned MaxInterleaveFactor = 4;
338 if (Factor <= MaxInterleaveFactor) {
339 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
340 // Ensure the number of vector elements is greater than 1.
341 if (MinElts < 2 || MinElts % Factor != 0)
343
344 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
345 // Ensure the element type is legal.
346 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
348
349 if (Factor != 2 && Factor != 4)
351
352 auto *SubVecTy =
353 VectorType::get(VecTy->getElementType(),
354 VecTy->getElementCount().divideCoefficientBy(Factor));
355 InstructionCost MemCost =
356 getMemoryOpCost(Opcode, SubVecTy, Alignment, AddressSpace, CostKind);
357
358 unsigned VecSize = DL.getTypeSizeInBits(SubVecTy);
359 unsigned MaxVecSize = 128;
360 unsigned NumAccesses =
361 std::max<unsigned>(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize);
362
363 // A stride of two is commonly supported via dedicated instructions, so it
364 // should be relatively cheap for all element sizes. A stride of four is
365 // more expensive as it will likely require more shuffles. Using two
366 // simd128 inputs is considered more expensive and we mainly account for
367 // shuffling two inputs (32 bytes), but we do model 4 x v4i32 to enable
368 // arithmetic kernels with smaller (i8/i16) inputs.
369 static const CostTblEntry ShuffleCostTbl[] = {
370 // One reg.
371 {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8
372 {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8
373 {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8
374 {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16
375 {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16
376 {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32
377 {2, MVT::v2f32, 1}, // interleave 2 x 2f32 into 4f32
378
379 // Two regs.
380 {2, MVT::v16i8, 2}, // interleave 2 x 16i8 into 32i8
381 {2, MVT::v8i16, 2}, // interleave 2 x 8i16 into 16i16
382 {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32
383 {2, MVT::v4f32, 2}, // interleave 2 x 4f32 into 8f32
384
385 // One reg.
386 {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8
387 {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8
388 {4, MVT::v2i16, 4}, // interleave 4 x 2i16 into 8i16
389
390 // Two regs.
391 {4, MVT::v8i8, 16}, // interleave 4 x 8i8 into 32i8
392 {4, MVT::v4i16, 8}, // interleave 4 x 4i16 into 16i16
393 {4, MVT::v2i32, 4}, // interleave 4 x 2i32 into 8i32
394 {4, MVT::v2f32, 4}, // interleave 4 x 2f32 into 8f32
395
396 // Four regs.
397 {4, MVT::v4i32, 16}, // interleave 4 x 4i32 into 16i32
398 };
399
400 EVT ETy = TLI->getValueType(DL, SubVecTy);
401 if (const auto *Entry =
402 CostTableLookup(ShuffleCostTbl, Factor, ETy.getSimpleVT()))
403 return Entry->Cost + (NumAccesses * MemCost);
404 }
405
406 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
407 Alignment, AddressSpace, CostKind,
408 UseMaskForCond, UseMaskForGaps);
409}
410
412 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
413 const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC) const {
415 Opcode, Val, CostKind, Index, Op0, Op1, VIC);
416
417 // SIMD128's insert/extract currently only take constant indices.
418 if (Index == -1u)
420
421 return Cost;
422}
423
425 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
427 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
428 TTI::TargetCostKind CostKind, std::optional<FastMathFlags> FMF) const {
430 if (!VF.isFixed() || !ST->hasSIMD128())
431 return Invalid;
432
434 return Invalid;
435
436 if (Opcode != Instruction::Add)
437 return Invalid;
438
439 EVT AccumEVT = EVT::getEVT(AccumType);
440 // TODO: Add i64 accumulator.
441 if (AccumEVT != MVT::i32)
442 return Invalid;
443
444 // Possible options:
445 // - i16x8.extadd_pairwise_i8x16_sx
446 // - i32x4.extadd_pairwise_i16x8_sx
447 // - i32x4.dot_i16x8_s
448 // Only try to support dot, for now.
449
450 EVT InputEVT = EVT::getEVT(InputTypeA);
451 if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||
452 (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {
453 return Invalid;
454 }
455
456 if (OpAExtend == TTI::PR_None)
457 return Invalid;
458
460 if (!BinOp)
461 return Cost;
462
463 if (OpAExtend != OpBExtend)
464 return Invalid;
465
466 if (*BinOp != Instruction::Mul)
467 return Invalid;
468
469 if (InputTypeA != InputTypeB)
470 return Invalid;
471
472 // Signed inputs can lower to dot
473 if (InputEVT == MVT::i16 && VF.getFixedValue() == 8)
474 return OpAExtend == TTI::PR_SignExtend ? Cost : Cost * 2;
475
476 // Double the size of the lowered sequence.
477 if (InputEVT == MVT::i8 && VF.getFixedValue() == 16)
478 return OpAExtend == TTI::PR_SignExtend ? Cost * 2 : Cost * 4;
479
480 return Invalid;
481}
482
484 const IntrinsicInst *II) const {
485
486 switch (II->getIntrinsicID()) {
487 default:
488 break;
489 case Intrinsic::vector_reduce_fadd:
491 }
493}
494
497 OptimizationRemarkEmitter *ORE) const {
498 // Scan the loop: don't unroll loops with calls. This is a standard approach
499 // for most (all?) targets.
500 for (BasicBlock *BB : L->blocks())
501 for (Instruction &I : *BB)
504 if (isLoweredToCall(F))
505 return;
506
507 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
508 // the various microarchitectures that use the BasicTTI implementation and
509 // has been selected through heuristics across multiple cores and runtimes.
510 UP.Partial = UP.Runtime = UP.UpperBound = true;
511 UP.PartialThreshold = 30;
512
513 // Avoid unrolling when optimizing for size.
514 UP.OptSizeThreshold = 0;
516
517 // Set number of instructions optimized when "back edge"
518 // becomes "fall through" to default value of 2.
519 UP.BEInsns = 2;
520}
521
523 return getST()->hasTailCall();
524}
525
528 using namespace llvm::PatternMatch;
529
530 if (!I->getType()->isVectorTy() || !I->isShift())
531 return false;
532
533 Value *V = I->getOperand(1);
534 // We dont need to sink constant splat.
535 if (isa<Constant>(V))
536 return false;
537
539 m_Value(), m_ZeroMask()))) {
540 // Sink insert
541 Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
542 // Sink shuffle
543 Ops.push_back(&I->getOperandUse(1));
544 return true;
545 }
546
547 return false;
548}
549
550/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is
551/// constant.
554 bool IsRelaxed) {
555 auto *V = dyn_cast<Constant>(II.getArgOperand(1));
556 if (!V)
557 return nullptr;
558
559 auto *VecTy = cast<FixedVectorType>(II.getType());
560 unsigned NumElts = VecTy->getNumElements();
561 assert(NumElts == 16);
562
563 // Construct a shuffle mask from constant integers or UNDEFs.
564 int Indexes[16];
565 bool AnyOutOfBounds = false;
566
567 for (unsigned I = 0; I < NumElts; ++I) {
568 Constant *COp = V->getAggregateElement(I);
569 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
570 return nullptr;
571
572 if (isa<UndefValue>(COp)) {
573 Indexes[I] = -1;
574 continue;
575 }
576
577 if (IsRelaxed && cast<ConstantInt>(COp)->getSExtValue() >= NumElts) {
578 // The relaxed_swizzle operation always returns 0 if the lane index is
579 // less than 0 when interpreted as a signed value. For lane indices above
580 // 15, however, it can choose between returning 0 or the lane at `Index %
581 // 16`. However, the choice must be made consistently. As the WebAssembly
582 // spec states:
583 //
584 // "The result of relaxed operators are implementation-dependent, because
585 // the set of possible results may depend on properties of the host
586 // environment, such as its hardware. Technically, their behaviour is
587 // controlled by a set of global parameters to the semantics that an
588 // implementation can instantiate in different ways. These choices are
589 // fixed, that is, parameters are constant during the execution of any
590 // given program."
591 //
592 // The WebAssembly runtime may choose differently from us, so we can't
593 // optimize a relaxed swizzle with lane indices above 15.
594 return nullptr;
595 }
596
597 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
598 if (Index >= NumElts) {
599 AnyOutOfBounds = true;
600 // If there are out-of-bounds indices, the swizzle instruction returns
601 // zeroes in those lanes. We'll provide an all-zeroes vector as the
602 // second argument to shufflevector and read the first element from it.
603 Indexes[I] = NumElts;
604 continue;
605 }
606
607 Indexes[I] = Index;
608 }
609
610 auto *V1 = II.getArgOperand(0);
611 auto *V2 =
612 AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy);
613
614 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
615}
616
617std::optional<Instruction *>
619 IntrinsicInst &II) const {
620 Intrinsic::ID IID = II.getIntrinsicID();
621 switch (IID) {
622 case Intrinsic::wasm_swizzle:
623 case Intrinsic::wasm_relaxed_swizzle:
624 if (Value *V = simplifyWasmSwizzle(
625 II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) {
626 return IC.replaceInstUsesWith(II, V);
627 }
628 break;
629 }
630
631 return std::nullopt;
632}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static const int MaxVecSize
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
static Value * simplifyWasmSwizzle(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsRelaxed)
Attempt to convert [relaxed_]swizzle to shufflevector if the mask is constant.
This file a TargetTransformInfoImplBase conforming object specific to the WebAssembly target machine.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
The core instruction combiner logic.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
BuilderTy & Builder
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool isLoweredToCall(const Function *F) const
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_Broadcast
Broadcast element 0 to all other elements.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
bool match(Val *V, const Pattern &P)
auto m_Value()
Match an arbitrary value and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is an optimization pass for GlobalISel generic memory operations.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition CostTable.h:35
InstructionCost Cost
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
Definition CostTable.h:61
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
CostTblEntryT< unsigned > CostTblEntry
Definition CostTable.h:30
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
Definition CostTable.h:66
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).