LLVM 23.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
17#include "llvm/IR/IntrinsicsWebAssembly.h"
19
21using namespace llvm;
22
23#define DEBUG_TYPE "wasmtti"
24
26WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
27 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
29}
30
31unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
32 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
33
34 // For SIMD, use at least 16 registers, as a rough guess.
35 bool Vector = (ClassID == 1);
36 if (Vector)
37 Result = std::max(Result, 16u);
38
39 return Result;
40}
41
44 switch (K) {
46 return TypeSize::getFixed(64);
48 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
50 return TypeSize::getScalable(0);
51 }
52
53 llvm_unreachable("Unsupported register kind");
54}
55
57 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
59 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
60
61 if (ST->hasSIMD128()) {
62 static const CostTblEntry ArithCostTbl[]{
63 // extmul + (maybe awkward) shuffle
64 {ISD::MUL, MVT::v8i8, 4},
65 // 2x extmul + (okay) shuffle
66 {ISD::MUL, MVT::v16i8, 4},
67 // extmul
68 {ISD::MUL, MVT::v4i16, 1},
69 // extmul
70 {ISD::MUL, MVT::v2i32, 1},
71 };
72 EVT DstVT = TLI->getValueType(DL, Ty);
73 if (DstVT.isSimple()) {
74 int ISD = TLI->InstructionOpcodeToISD(Opcode);
75 if (const auto *Entry =
76 CostTableLookup(ArithCostTbl, ISD, DstVT.getSimpleVT()))
77 return Entry->Cost;
78 }
79 }
80
83 Opcode, Ty, CostKind, Op1Info, Op2Info);
84
85 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
86 switch (Opcode) {
87 case Instruction::LShr:
88 case Instruction::AShr:
89 case Instruction::Shl:
90 // SIMD128's shifts currently only accept a scalar shift count. For each
91 // element, we'll need to extract, op, insert. The following is a rough
92 // approximation.
93 if (!Op2Info.isUniform())
94 Cost =
95 cast<FixedVectorType>(VTy)->getNumElements() *
97 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
99 break;
100 }
101 }
102 return Cost;
103}
104
106 unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH,
108 int ISD = TLI->InstructionOpcodeToISD(Opcode);
109 auto SrcTy = TLI->getValueType(DL, Src);
110 auto DstTy = TLI->getValueType(DL, Dst);
111
112 if (!SrcTy.isSimple() || !DstTy.isSimple()) {
113 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
114 }
115
116 if (!ST->hasSIMD128()) {
117 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
118 }
119
120 auto DstVT = DstTy.getSimpleVT();
121 auto SrcVT = SrcTy.getSimpleVT();
122
123 if (I && I->hasOneUser()) {
124 auto *SingleUser = cast<Instruction>(*I->user_begin());
125 int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode());
126
127 // extmul_low support
128 if (UserISD == ISD::MUL &&
130 // Free low extensions.
131 if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) ||
132 (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) ||
133 (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) {
134 return 0;
135 }
136 // Will require an additional extlow operation for the intermediate
137 // i16/i32 value.
138 if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) ||
139 (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) {
140 return 1;
141 }
142 }
143 }
144
145 static constexpr TypeConversionCostTblEntry ConversionTbl[] = {
146 // extend_low
147 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1},
148 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1},
149 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1},
150 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1},
151 {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1},
152 {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1},
153 // 2 x extend_low
154 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2},
155 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2},
156 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2},
157 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2},
158 // extend_low, extend_high
159 {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2},
160 {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2},
161 {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2},
162 {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2},
163 {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2},
164 {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2},
165 // 2x extend_low, extend_high
166 {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4},
167 {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4},
168 {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4},
169 {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4},
170 // shuffle
171 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2},
172 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4},
173 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2},
174 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4},
175 // narrow, and
176 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2},
177 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2},
178 // narrow, 2x and
179 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3},
180 // 3x narrow, 4x and
181 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7},
182 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7},
183 // 7x narrow, 8x and
184 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15},
185 // convert_i32x4
186 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
187 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
188 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
189 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
190 // extend_low, convert
191 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
192 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
193 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
194 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
195 // extend_low x 2, convert
196 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
197 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
198 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
199 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
200 // several shuffles
201 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
202 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
203 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10},
204 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
205 /// trunc_sat, const, and, 3x narrow
206 {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6},
207 {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6},
208 {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6},
209 {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6},
210 /// trunc_sat, const, and, narrow
211 {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4},
212 {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4},
213 {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4},
214 {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4},
215 // 2x trunc_sat, const, 2x and, 3x narrow
216 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8},
217 {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8},
218 // 2x trunc_sat, const, 2x and, narrow
219 {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6},
220 {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6},
221 };
222
223 if (const auto *Entry =
224 ConvertCostTableLookup(ConversionTbl, ISD, DstVT, SrcVT)) {
225 return Entry->Cost;
226 }
227
228 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
229}
230
232WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
234
235 Options.AllowOverlappingLoads = true;
236
237 if (ST->hasSIMD128())
238 Options.LoadSizes.push_back(16);
239
240 Options.LoadSizes.append({8, 4, 2, 1});
241 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
242 Options.NumLoadsPerBlock = Options.MaxNumLoads;
243
244 return Options;
245}
246
248 unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
250 const Instruction *I) const {
251 if (!ST->hasSIMD128() || !isa<FixedVectorType>(Ty)) {
252 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
253 CostKind);
254 }
255
256 EVT VT = TLI->getValueType(DL, Ty, true);
257 // Type legalization can't handle structs
258 if (VT == MVT::Other)
259 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
260 CostKind);
261
262 auto LT = getTypeLegalizationCost(Ty);
263 if (!LT.first.isValid())
265
266 int ISD = TLI->InstructionOpcodeToISD(Opcode);
267 unsigned width = VT.getSizeInBits();
268 if (ISD == ISD::LOAD) {
269 // 128-bit loads are a single instruction. 32-bit and 64-bit vector loads
270 // can be lowered to load32_zero and load64_zero respectively. Assume SIMD
271 // loads are twice as expensive as scalar.
272 switch (width) {
273 default:
274 break;
275 case 32:
276 case 64:
277 case 128:
278 return 2;
279 }
280 } else if (ISD == ISD::STORE) {
281 // For stores, we can use store lane operations.
282 switch (width) {
283 default:
284 break;
285 case 8:
286 case 16:
287 case 32:
288 case 64:
289 case 128:
290 return 2;
291 }
292 }
293
294 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind);
295}
296
298 unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef<unsigned> Indices,
299 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
300 bool UseMaskForCond, bool UseMaskForGaps) const {
301 assert(Factor >= 2 && "Invalid interleave factor");
302
303 auto *VecTy = cast<VectorType>(Ty);
304 if (!ST->hasSIMD128() || !isa<FixedVectorType>(VecTy)) {
306 }
307
308 if (UseMaskForCond || UseMaskForGaps)
309 return BaseT::getInterleavedMemoryOpCost(Opcode, Ty, Factor, Indices,
310 Alignment, AddressSpace, CostKind,
311 UseMaskForCond, UseMaskForGaps);
312
313 constexpr unsigned MaxInterleaveFactor = 4;
314 if (Factor <= MaxInterleaveFactor) {
315 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
316 // Ensure the number of vector elements is greater than 1.
317 if (MinElts < 2 || MinElts % Factor != 0)
319
320 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
321 // Ensure the element type is legal.
322 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
324
325 if (Factor != 2 && Factor != 4)
327
328 auto *SubVecTy =
329 VectorType::get(VecTy->getElementType(),
330 VecTy->getElementCount().divideCoefficientBy(Factor));
331 InstructionCost MemCost =
332 getMemoryOpCost(Opcode, SubVecTy, Alignment, AddressSpace, CostKind);
333
334 unsigned VecSize = DL.getTypeSizeInBits(SubVecTy);
335 unsigned MaxVecSize = 128;
336 unsigned NumAccesses =
337 std::max<unsigned>(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize);
338
339 // A stride of two is commonly supported via dedicated instructions, so it
340 // should be relatively cheap for all element sizes. A stride of four is
341 // more expensive as it will likely require more shuffles. Using two
342 // simd128 inputs is considered more expensive and we mainly account for
343 // shuffling two inputs (32 bytes), but we do model 4 x v4i32 to enable
344 // arithmetic kernels.
345 static const CostTblEntry ShuffleCostTbl[] = {
346 // One reg.
347 {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8
348 {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8
349 {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8
350 {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16
351 {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16
352 {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32
353
354 // Two regs.
355 {2, MVT::v16i8, 2}, // interleave 2 x 16i8 into 32i8
356 {2, MVT::v8i16, 2}, // interleave 2 x 8i16 into 16i16
357 {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32
358
359 // One reg.
360 {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8
361 {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8
362 {4, MVT::v2i16, 4}, // interleave 4 x 2i16 into 8i16
363
364 // Two regs.
365 {4, MVT::v8i8, 16}, // interleave 4 x 8i8 into 32i8
366 {4, MVT::v4i16, 8}, // interleave 4 x 4i16 into 16i16
367 {4, MVT::v2i32, 4}, // interleave 4 x 2i32 into 8i32
368
369 // Four regs.
370 {4, MVT::v4i32, 16}, // interleave 4 x 4i32 into 16i32
371 };
372
373 EVT ETy = TLI->getValueType(DL, SubVecTy);
374 if (const auto *Entry =
375 CostTableLookup(ShuffleCostTbl, Factor, ETy.getSimpleVT()))
376 return Entry->Cost + (NumAccesses * MemCost);
377 }
378
379 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
380 Alignment, AddressSpace, CostKind,
381 UseMaskForCond, UseMaskForGaps);
382}
383
385 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
386 const Value *Op0, const Value *Op1) const {
388 Opcode, Val, CostKind, Index, Op0, Op1);
389
390 // SIMD128's insert/extract currently only take constant indices.
391 if (Index == -1u)
393
394 return Cost;
395}
396
398 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
400 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
403 if (!VF.isFixed() || !ST->hasSIMD128())
404 return Invalid;
405
407 return Invalid;
408
409 if (Opcode != Instruction::Add)
410 return Invalid;
411
412 EVT AccumEVT = EVT::getEVT(AccumType);
413 // TODO: Add i64 accumulator.
414 if (AccumEVT != MVT::i32)
415 return Invalid;
416
417 // Possible options:
418 // - i16x8.extadd_pairwise_i8x16_sx
419 // - i32x4.extadd_pairwise_i16x8_sx
420 // - i32x4.dot_i16x8_s
421 // Only try to support dot, for now.
422
423 EVT InputEVT = EVT::getEVT(InputTypeA);
424 if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||
425 (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {
426 return Invalid;
427 }
428
429 if (OpAExtend == TTI::PR_None)
430 return Invalid;
431
433 if (!BinOp)
434 return Cost;
435
436 if (OpAExtend != OpBExtend)
437 return Invalid;
438
439 if (*BinOp != Instruction::Mul)
440 return Invalid;
441
442 if (InputTypeA != InputTypeB)
443 return Invalid;
444
445 // Signed inputs can lower to dot
446 if (InputEVT == MVT::i16 && VF.getFixedValue() == 8)
447 return OpAExtend == TTI::PR_SignExtend ? Cost : Cost * 2;
448
449 // Double the size of the lowered sequence.
450 if (InputEVT == MVT::i8 && VF.getFixedValue() == 16)
451 return OpAExtend == TTI::PR_SignExtend ? Cost * 2 : Cost * 4;
452
453 return Invalid;
454}
455
457 const IntrinsicInst *II) const {
458
459 switch (II->getIntrinsicID()) {
460 default:
461 break;
462 case Intrinsic::vector_reduce_fadd:
464 }
466}
467
470 OptimizationRemarkEmitter *ORE) const {
471 // Scan the loop: don't unroll loops with calls. This is a standard approach
472 // for most (all?) targets.
473 for (BasicBlock *BB : L->blocks())
474 for (Instruction &I : *BB)
477 if (isLoweredToCall(F))
478 return;
479
480 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
481 // the various microarchitectures that use the BasicTTI implementation and
482 // has been selected through heuristics across multiple cores and runtimes.
483 UP.Partial = UP.Runtime = UP.UpperBound = true;
484 UP.PartialThreshold = 30;
485
486 // Avoid unrolling when optimizing for size.
487 UP.OptSizeThreshold = 0;
489
490 // Set number of instructions optimized when "back edge"
491 // becomes "fall through" to default value of 2.
492 UP.BEInsns = 2;
493}
494
496 return getST()->hasTailCall();
497}
498
501 using namespace llvm::PatternMatch;
502
503 if (!I->getType()->isVectorTy() || !I->isShift())
504 return false;
505
506 Value *V = I->getOperand(1);
507 // We dont need to sink constant splat.
508 if (isa<Constant>(V))
509 return false;
510
512 m_Value(), m_ZeroMask()))) {
513 // Sink insert
514 Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
515 // Sink shuffle
516 Ops.push_back(&I->getOperandUse(1));
517 return true;
518 }
519
520 return false;
521}
522
523/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is
524/// constant.
527 bool IsRelaxed) {
528 auto *V = dyn_cast<Constant>(II.getArgOperand(1));
529 if (!V)
530 return nullptr;
531
532 auto *VecTy = cast<FixedVectorType>(II.getType());
533 unsigned NumElts = VecTy->getNumElements();
534 assert(NumElts == 16);
535
536 // Construct a shuffle mask from constant integers or UNDEFs.
537 int Indexes[16];
538 bool AnyOutOfBounds = false;
539
540 for (unsigned I = 0; I < NumElts; ++I) {
541 Constant *COp = V->getAggregateElement(I);
542 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
543 return nullptr;
544
545 if (isa<UndefValue>(COp)) {
546 Indexes[I] = -1;
547 continue;
548 }
549
550 if (IsRelaxed && cast<ConstantInt>(COp)->getSExtValue() >= NumElts) {
551 // The relaxed_swizzle operation always returns 0 if the lane index is
552 // less than 0 when interpreted as a signed value. For lane indices above
553 // 15, however, it can choose between returning 0 or the lane at `Index %
554 // 16`. However, the choice must be made consistently. As the WebAssembly
555 // spec states:
556 //
557 // "The result of relaxed operators are implementation-dependent, because
558 // the set of possible results may depend on properties of the host
559 // environment, such as its hardware. Technically, their behaviour is
560 // controlled by a set of global parameters to the semantics that an
561 // implementation can instantiate in different ways. These choices are
562 // fixed, that is, parameters are constant during the execution of any
563 // given program."
564 //
565 // The WebAssembly runtime may choose differently from us, so we can't
566 // optimize a relaxed swizzle with lane indices above 15.
567 return nullptr;
568 }
569
570 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
571 if (Index >= NumElts) {
572 AnyOutOfBounds = true;
573 // If there are out-of-bounds indices, the swizzle instruction returns
574 // zeroes in those lanes. We'll provide an all-zeroes vector as the
575 // second argument to shufflevector and read the first element from it.
576 Indexes[I] = NumElts;
577 continue;
578 }
579
580 Indexes[I] = Index;
581 }
582
583 auto *V1 = II.getArgOperand(0);
584 auto *V2 =
585 AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy);
586
587 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
588}
589
590std::optional<Instruction *>
592 IntrinsicInst &II) const {
593 Intrinsic::ID IID = II.getIntrinsicID();
594 switch (IID) {
595 case Intrinsic::wasm_swizzle:
596 case Intrinsic::wasm_relaxed_swizzle:
597 if (Value *V = simplifyWasmSwizzle(
598 II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) {
599 return IC.replaceInstUsesWith(II, V);
600 }
601 break;
602 }
603
604 return std::nullopt;
605}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static const int MaxVecSize
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
static Value * simplifyWasmSwizzle(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsRelaxed)
Attempt to convert [relaxed_]swizzle to shufflevector if the mask is constant.
This file a TargetTransformInfoImplBase conforming object specific to the WebAssembly target machine.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
The core instruction combiner logic.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
BuilderTy & Builder
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool isLoweredToCall(const Function *F) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition CostTable.h:35
InstructionCost Cost
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
Definition CostTable.h:61
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
CostTblEntryT< unsigned > CostTblEntry
Definition CostTable.h:30
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
Definition CostTable.h:66
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).