LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
162 setTruncStoreAction(T, MVT::f16, Expand);
163 }
164
165 // Expand unavailable integer operations.
166 for (auto Op :
170 for (auto T : {MVT::i32, MVT::i64})
172 if (Subtarget->hasSIMD128())
173 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175 }
176
177 if (Subtarget->hasWideArithmetic()) {
183 }
184
185 if (Subtarget->hasNontrappingFPToInt())
187 for (auto T : {MVT::i32, MVT::i64})
189
190 if (Subtarget->hasRelaxedSIMD()) {
193 {MVT::v4f32, MVT::v2f64}, Legal);
194 }
195 // SIMD-specific configuration
196 if (Subtarget->hasSIMD128()) {
197
199
200 // Combine wide-vector muls, with extend inputs, to extmul_half.
202
203 // Combine vector mask reductions into alltrue/anytrue
205
206 // Convert vector to integer bitcasts to bitmask
208
209 // Hoist bitcasts out of shuffles
211
212 // Combine extends of extract_subvectors into widening ops
214
215 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
216 // conversions ops
219
220 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
221 // into conversion ops
225
227
228 // Support saturating add/sub for i8x16 and i16x8
230 for (auto T : {MVT::v16i8, MVT::v8i16})
232
233 // Support integer abs
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
236
237 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
238 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
239 MVT::v2f64})
241
242 if (Subtarget->hasFP16())
244
245 // We have custom shuffle lowering to expose the shuffle mask
246 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
247 MVT::v2f64})
249
250 if (Subtarget->hasFP16())
252
253 // Support splatting
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
259
260 // Custom lowering since wasm shifts must have a scalar shift amount
261 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
264
265 // Custom lower lane accesses to expand out variable indices
267 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
268 MVT::v2f64})
270
271 // There is no i8x16.mul instruction
272 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
273
274 // There is no vector conditional select instruction
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
278
279 // Expand integer operations supported for scalars but not SIMD
280 for (auto Op :
282 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
284
285 // But we do have integer min and max operations
286 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
287 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
289
290 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
291 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
292 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
293 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
294
295 // Custom lower bit counting operations for other types to scalarize them.
296 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
297 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
299
300 // Expand float operations supported for scalars but not SIMD
303 for (auto T : {MVT::v4f32, MVT::v2f64})
305
306 // Unsigned comparison operations are unavailable for i64x2 vectors.
308 setCondCodeAction(CC, MVT::v2i64, Custom);
309
310 // 64x2 conversions are not in the spec
311 for (auto Op :
313 for (auto T : {MVT::v2i64, MVT::v2f64})
315
316 // But saturating fp_to_int converstions are
318 setOperationAction(Op, MVT::v4i32, Custom);
319 if (Subtarget->hasFP16()) {
320 setOperationAction(Op, MVT::v8i16, Custom);
321 }
322 }
323
324 // Support vector extending
329 }
330
331 if (Subtarget->hasFP16()) {
332 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
333 }
334
335 if (Subtarget->hasRelaxedSIMD()) {
338 }
339
340 // Partial MLA reductions.
342 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
343 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
344 }
345 }
346
347 // As a special case, these operators use the type to mean the type to
348 // sign-extend from.
350 if (!Subtarget->hasSignExt()) {
351 // Sign extends are legal only when extending a vector extract
352 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
353 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
355 }
358
359 // Dynamic stack allocation: use the default expansion.
363
367
368 // Expand these forms; we pattern-match the forms that we can handle in isel.
369 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
370 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
372
373 // We have custom switch handling.
375
376 // WebAssembly doesn't have:
377 // - Floating-point extending loads.
378 // - Floating-point truncating stores.
379 // - i1 extending loads.
380 // - truncating SIMD stores and most extending loads
381 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
382 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
383 for (auto T : MVT::integer_valuetypes())
384 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
385 setLoadExtAction(Ext, T, MVT::i1, Promote);
386 if (Subtarget->hasSIMD128()) {
387 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
388 MVT::v2f64}) {
389 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
390 if (MVT(T) != MemT) {
392 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
393 setLoadExtAction(Ext, T, MemT, Expand);
394 }
395 }
396 }
397 // But some vector extending loads are legal
398 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
399 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
400 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
401 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
402 }
403 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
404 }
405
406 // Don't do anything clever with build_pairs
408
409 // Trap lowers to wasm unreachable
410 setOperationAction(ISD::TRAP, MVT::Other, Legal);
412
413 // Exception handling intrinsics
417
419
420 // Always convert switches to br_tables unless there is only one case, which
421 // is equivalent to a simple branch. This reduces code size for wasm, and we
422 // defer possible jump table optimizations to the VM.
424}
425
434
443
445WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
446 const AtomicRMWInst *AI) const {
447 // We have wasm instructions for these
448 switch (AI->getOperation()) {
456 default:
457 break;
458 }
460}
461
462bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
463 // Implementation copied from X86TargetLowering.
464 unsigned Opc = VecOp.getOpcode();
465
466 // Assume target opcodes can't be scalarized.
467 // TODO - do we have any exceptions?
469 return false;
470
471 // If the vector op is not supported, try to convert to scalar.
472 EVT VecVT = VecOp.getValueType();
474 return true;
475
476 // If the vector op is supported, but the scalar op is not, the transform may
477 // not be worthwhile.
478 EVT ScalarVT = VecVT.getScalarType();
479 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
480}
481
482FastISel *WebAssemblyTargetLowering::createFastISel(
483 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
484 return WebAssembly::createFastISel(FuncInfo, LibInfo);
485}
486
487MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
488 EVT VT) const {
489 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
490 if (BitWidth > 1 && BitWidth < 8)
491 BitWidth = 8;
492
493 if (BitWidth > 64) {
494 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
495 // the count to be an i32.
496 BitWidth = 32;
498 "32-bit shift counts ought to be enough for anyone");
499 }
500
503 "Unable to represent scalar shift amount type");
504 return Result;
505}
506
507// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
508// undefined result on invalid/overflow, to the WebAssembly opcode, which
509// traps on invalid/overflow.
512 const TargetInstrInfo &TII,
513 bool IsUnsigned, bool Int64,
514 bool Float64, unsigned LoweredOpcode) {
516
517 Register OutReg = MI.getOperand(0).getReg();
518 Register InReg = MI.getOperand(1).getReg();
519
520 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
521 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
522 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
523 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
524 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
525 unsigned Eqz = WebAssembly::EQZ_I32;
526 unsigned And = WebAssembly::AND_I32;
527 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
528 int64_t Substitute = IsUnsigned ? 0 : Limit;
529 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
530 auto &Context = BB->getParent()->getFunction().getContext();
531 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
532
533 const BasicBlock *LLVMBB = BB->getBasicBlock();
534 MachineFunction *F = BB->getParent();
535 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
536 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
537 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
538
540 F->insert(It, FalseMBB);
541 F->insert(It, TrueMBB);
542 F->insert(It, DoneMBB);
543
544 // Transfer the remainder of BB and its successor edges to DoneMBB.
545 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
547
548 BB->addSuccessor(TrueMBB);
549 BB->addSuccessor(FalseMBB);
550 TrueMBB->addSuccessor(DoneMBB);
551 FalseMBB->addSuccessor(DoneMBB);
552
553 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
554 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
555 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
556 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
557 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
558 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
559 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
560
561 MI.eraseFromParent();
562 // For signed numbers, we can do a single comparison to determine whether
563 // fabs(x) is within range.
564 if (IsUnsigned) {
565 Tmp0 = InReg;
566 } else {
567 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
568 }
569 BuildMI(BB, DL, TII.get(FConst), Tmp1)
570 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
571 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
572
573 // For unsigned numbers, we have to do a separate comparison with zero.
574 if (IsUnsigned) {
575 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
576 Register SecondCmpReg =
577 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
578 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
579 BuildMI(BB, DL, TII.get(FConst), Tmp1)
580 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
581 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
582 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
583 CmpReg = AndReg;
584 }
585
586 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
587
588 // Create the CFG diamond to select between doing the conversion or using
589 // the substitute value.
590 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
591 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
592 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
593 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
594 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
595 .addReg(FalseReg)
596 .addMBB(FalseMBB)
597 .addReg(TrueReg)
598 .addMBB(TrueMBB);
599
600 return DoneMBB;
601}
602
603// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
604// instuction to handle the zero-length case.
607 const TargetInstrInfo &TII, bool Int64) {
609
610 MachineOperand DstMem = MI.getOperand(0);
611 MachineOperand SrcMem = MI.getOperand(1);
612 MachineOperand Dst = MI.getOperand(2);
613 MachineOperand Src = MI.getOperand(3);
614 MachineOperand Len = MI.getOperand(4);
615
616 // If the length is a constant, we don't actually need the check.
617 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
618 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
619 Def->getOpcode() == WebAssembly::CONST_I64) {
620 if (Def->getOperand(1).getImm() == 0) {
621 // A zero-length memcpy is a no-op.
622 MI.eraseFromParent();
623 return BB;
624 }
625 // A non-zero-length memcpy doesn't need a zero check.
626 unsigned MemoryCopy =
627 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
628 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
629 .add(DstMem)
630 .add(SrcMem)
631 .add(Dst)
632 .add(Src)
633 .add(Len);
634 MI.eraseFromParent();
635 return BB;
636 }
637 }
638
639 // We're going to add an extra use to `Len` to test if it's zero; that
640 // use shouldn't be a kill, even if the original use is.
641 MachineOperand NoKillLen = Len;
642 NoKillLen.setIsKill(false);
643
644 // Decide on which `MachineInstr` opcode we're going to use.
645 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
646 unsigned MemoryCopy =
647 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
648
649 // Create two new basic blocks; one for the new `memory.fill` that we can
650 // branch over, and one for the rest of the instructions after the original
651 // `memory.fill`.
652 const BasicBlock *LLVMBB = BB->getBasicBlock();
653 MachineFunction *F = BB->getParent();
654 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
655 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
656
658 F->insert(It, TrueMBB);
659 F->insert(It, DoneMBB);
660
661 // Transfer the remainder of BB and its successor edges to DoneMBB.
662 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
664
665 // Connect the CFG edges.
666 BB->addSuccessor(TrueMBB);
667 BB->addSuccessor(DoneMBB);
668 TrueMBB->addSuccessor(DoneMBB);
669
670 // Create a virtual register for the `Eqz` result.
671 unsigned EqzReg;
672 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
673
674 // Erase the original `memory.copy`.
675 MI.eraseFromParent();
676
677 // Test if `Len` is zero.
678 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
679
680 // Insert a new `memory.copy`.
681 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
682 .add(DstMem)
683 .add(SrcMem)
684 .add(Dst)
685 .add(Src)
686 .add(Len);
687
688 // Create the CFG triangle.
689 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
690 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
691
692 return DoneMBB;
693}
694
695// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
696// instuction to handle the zero-length case.
699 const TargetInstrInfo &TII, bool Int64) {
701
702 MachineOperand Mem = MI.getOperand(0);
703 MachineOperand Dst = MI.getOperand(1);
704 MachineOperand Val = MI.getOperand(2);
705 MachineOperand Len = MI.getOperand(3);
706
707 // If the length is a constant, we don't actually need the check.
708 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
709 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
710 Def->getOpcode() == WebAssembly::CONST_I64) {
711 if (Def->getOperand(1).getImm() == 0) {
712 // A zero-length memset is a no-op.
713 MI.eraseFromParent();
714 return BB;
715 }
716 // A non-zero-length memset doesn't need a zero check.
717 unsigned MemoryFill =
718 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
719 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
720 .add(Mem)
721 .add(Dst)
722 .add(Val)
723 .add(Len);
724 MI.eraseFromParent();
725 return BB;
726 }
727 }
728
729 // We're going to add an extra use to `Len` to test if it's zero; that
730 // use shouldn't be a kill, even if the original use is.
731 MachineOperand NoKillLen = Len;
732 NoKillLen.setIsKill(false);
733
734 // Decide on which `MachineInstr` opcode we're going to use.
735 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
736 unsigned MemoryFill =
737 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
738
739 // Create two new basic blocks; one for the new `memory.fill` that we can
740 // branch over, and one for the rest of the instructions after the original
741 // `memory.fill`.
742 const BasicBlock *LLVMBB = BB->getBasicBlock();
743 MachineFunction *F = BB->getParent();
744 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
745 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
746
748 F->insert(It, TrueMBB);
749 F->insert(It, DoneMBB);
750
751 // Transfer the remainder of BB and its successor edges to DoneMBB.
752 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
754
755 // Connect the CFG edges.
756 BB->addSuccessor(TrueMBB);
757 BB->addSuccessor(DoneMBB);
758 TrueMBB->addSuccessor(DoneMBB);
759
760 // Create a virtual register for the `Eqz` result.
761 unsigned EqzReg;
762 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
763
764 // Erase the original `memory.fill`.
765 MI.eraseFromParent();
766
767 // Test if `Len` is zero.
768 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
769
770 // Insert a new `memory.copy`.
771 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
772
773 // Create the CFG triangle.
774 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
775 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
776
777 return DoneMBB;
778}
779
780static MachineBasicBlock *
782 const WebAssemblySubtarget *Subtarget,
783 const TargetInstrInfo &TII) {
784 MachineInstr &CallParams = *CallResults.getPrevNode();
785 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
786 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
787 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
788
789 bool IsIndirect =
790 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
791 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
792
793 bool IsFuncrefCall = false;
794 if (IsIndirect && CallParams.getOperand(0).isReg()) {
795 Register Reg = CallParams.getOperand(0).getReg();
796 const MachineFunction *MF = BB->getParent();
797 const MachineRegisterInfo &MRI = MF->getRegInfo();
798 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
799 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
800 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
801 }
802
803 unsigned CallOp;
804 if (IsIndirect && IsRetCall) {
805 CallOp = WebAssembly::RET_CALL_INDIRECT;
806 } else if (IsIndirect) {
807 CallOp = WebAssembly::CALL_INDIRECT;
808 } else if (IsRetCall) {
809 CallOp = WebAssembly::RET_CALL;
810 } else {
811 CallOp = WebAssembly::CALL;
812 }
813
814 MachineFunction &MF = *BB->getParent();
815 const MCInstrDesc &MCID = TII.get(CallOp);
816 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
817
818 // Move the function pointer to the end of the arguments for indirect calls
819 if (IsIndirect) {
820 auto FnPtr = CallParams.getOperand(0);
821 CallParams.removeOperand(0);
822
823 // For funcrefs, call_indirect is done through __funcref_call_table and the
824 // funcref is always installed in slot 0 of the table, therefore instead of
825 // having the function pointer added at the end of the params list, a zero
826 // (the index in
827 // __funcref_call_table is added).
828 if (IsFuncrefCall) {
829 Register RegZero =
830 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
831 MachineInstrBuilder MIBC0 =
832 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
833
834 BB->insert(CallResults.getIterator(), MIBC0);
835 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
836 } else
837 CallParams.addOperand(FnPtr);
838 }
839
840 for (auto Def : CallResults.defs())
841 MIB.add(Def);
842
843 if (IsIndirect) {
844 // Placeholder for the type index.
845 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
846 MIB.addImm(0);
847 // The table into which this call_indirect indexes.
848 MCSymbolWasm *Table = IsFuncrefCall
850 MF.getContext(), Subtarget)
852 MF.getContext(), Subtarget);
853 if (Subtarget->hasCallIndirectOverlong()) {
854 MIB.addSym(Table);
855 } else {
856 // For the MVP there is at most one table whose number is 0, but we can't
857 // write a table symbol or issue relocations. Instead we just ensure the
858 // table is live and write a zero.
859 Table->setNoStrip();
860 MIB.addImm(0);
861 }
862 }
863
864 for (auto Use : CallParams.uses())
865 MIB.add(Use);
866
867 BB->insert(CallResults.getIterator(), MIB);
868 CallParams.eraseFromParent();
869 CallResults.eraseFromParent();
870
871 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
872 // table slot with ref.null upon call_indirect return.
873 //
874 // This generates the following code, which comes right after a call_indirect
875 // of a funcref:
876 //
877 // i32.const 0
878 // ref.null func
879 // table.set __funcref_call_table
880 if (IsIndirect && IsFuncrefCall) {
882 MF.getContext(), Subtarget);
883 Register RegZero =
884 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
885 MachineInstr *Const0 =
886 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
887 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
888
889 Register RegFuncref =
890 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
891 MachineInstr *RefNull =
892 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
893 BB->insertAfter(Const0->getIterator(), RefNull);
894
895 MachineInstr *TableSet =
896 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
897 .addSym(Table)
898 .addReg(RegZero)
899 .addReg(RegFuncref);
900 BB->insertAfter(RefNull->getIterator(), TableSet);
901 }
902
903 return BB;
904}
905
906MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
907 MachineInstr &MI, MachineBasicBlock *BB) const {
908 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
909 DebugLoc DL = MI.getDebugLoc();
910
911 switch (MI.getOpcode()) {
912 default:
913 llvm_unreachable("Unexpected instr type to insert");
914 case WebAssembly::FP_TO_SINT_I32_F32:
915 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
916 WebAssembly::I32_TRUNC_S_F32);
917 case WebAssembly::FP_TO_UINT_I32_F32:
918 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
919 WebAssembly::I32_TRUNC_U_F32);
920 case WebAssembly::FP_TO_SINT_I64_F32:
921 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
922 WebAssembly::I64_TRUNC_S_F32);
923 case WebAssembly::FP_TO_UINT_I64_F32:
924 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
925 WebAssembly::I64_TRUNC_U_F32);
926 case WebAssembly::FP_TO_SINT_I32_F64:
927 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
928 WebAssembly::I32_TRUNC_S_F64);
929 case WebAssembly::FP_TO_UINT_I32_F64:
930 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
931 WebAssembly::I32_TRUNC_U_F64);
932 case WebAssembly::FP_TO_SINT_I64_F64:
933 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
934 WebAssembly::I64_TRUNC_S_F64);
935 case WebAssembly::FP_TO_UINT_I64_F64:
936 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
937 WebAssembly::I64_TRUNC_U_F64);
938 case WebAssembly::MEMCPY_A32:
939 return LowerMemcpy(MI, DL, BB, TII, false);
940 case WebAssembly::MEMCPY_A64:
941 return LowerMemcpy(MI, DL, BB, TII, true);
942 case WebAssembly::MEMSET_A32:
943 return LowerMemset(MI, DL, BB, TII, false);
944 case WebAssembly::MEMSET_A64:
945 return LowerMemset(MI, DL, BB, TII, true);
946 case WebAssembly::CALL_RESULTS:
947 case WebAssembly::RET_CALL_RESULTS:
948 return LowerCallResults(MI, DL, BB, Subtarget, TII);
949 }
950}
951
952std::pair<unsigned, const TargetRegisterClass *>
953WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
954 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
955 // First, see if this is a constraint that directly corresponds to a
956 // WebAssembly register class.
957 if (Constraint.size() == 1) {
958 switch (Constraint[0]) {
959 case 'r':
960 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
961 if (Subtarget->hasSIMD128() && VT.isVector()) {
962 if (VT.getSizeInBits() == 128)
963 return std::make_pair(0U, &WebAssembly::V128RegClass);
964 }
965 if (VT.isInteger() && !VT.isVector()) {
966 if (VT.getSizeInBits() <= 32)
967 return std::make_pair(0U, &WebAssembly::I32RegClass);
968 if (VT.getSizeInBits() <= 64)
969 return std::make_pair(0U, &WebAssembly::I64RegClass);
970 }
971 if (VT.isFloatingPoint() && !VT.isVector()) {
972 switch (VT.getSizeInBits()) {
973 case 32:
974 return std::make_pair(0U, &WebAssembly::F32RegClass);
975 case 64:
976 return std::make_pair(0U, &WebAssembly::F64RegClass);
977 default:
978 break;
979 }
980 }
981 break;
982 default:
983 break;
984 }
985 }
986
988}
989
990bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
991 // Assume ctz is a relatively cheap operation.
992 return true;
993}
994
995bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
996 // Assume clz is a relatively cheap operation.
997 return true;
998}
999
1000bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1001 const AddrMode &AM,
1002 Type *Ty, unsigned AS,
1003 Instruction *I) const {
1004 // WebAssembly offsets are added as unsigned without wrapping. The
1005 // isLegalAddressingMode gives us no way to determine if wrapping could be
1006 // happening, so we approximate this by accepting only non-negative offsets.
1007 if (AM.BaseOffs < 0)
1008 return false;
1009
1010 // WebAssembly has no scale register operands.
1011 if (AM.Scale != 0)
1012 return false;
1013
1014 // Everything else is legal.
1015 return true;
1016}
1017
1018bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1019 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1020 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1021 // WebAssembly supports unaligned accesses, though it should be declared
1022 // with the p2align attribute on loads and stores which do so, and there
1023 // may be a performance impact. We tell LLVM they're "fast" because
1024 // for the kinds of things that LLVM uses this for (merging adjacent stores
1025 // of constants, etc.), WebAssembly implementations will either want the
1026 // unaligned access or they'll split anyway.
1027 if (Fast)
1028 *Fast = 1;
1029 return true;
1030}
1031
1032bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1033 AttributeList Attr) const {
1034 // The current thinking is that wasm engines will perform this optimization,
1035 // so we can save on code size.
1036 return true;
1037}
1038
1039bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1040 EVT ExtT = ExtVal.getValueType();
1041 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1042 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1043 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1044 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1045}
1046
1047bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1048 const GlobalAddressSDNode *GA) const {
1049 // Wasm doesn't support function addresses with offsets
1050 const GlobalValue *GV = GA->getGlobal();
1052}
1053
1054EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1055 LLVMContext &C,
1056 EVT VT) const {
1057 if (VT.isVector())
1059
1060 // So far, all branch instructions in Wasm take an I32 condition.
1061 // The default TargetLowering::getSetCCResultType returns the pointer size,
1062 // which would be useful to reduce instruction counts when testing
1063 // against 64-bit pointers/values if at some point Wasm supports that.
1064 return EVT::getIntegerVT(C, 32);
1065}
1066
1067bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1068 const CallBase &I,
1069 MachineFunction &MF,
1070 unsigned Intrinsic) const {
1071 switch (Intrinsic) {
1072 case Intrinsic::wasm_memory_atomic_notify:
1074 Info.memVT = MVT::i32;
1075 Info.ptrVal = I.getArgOperand(0);
1076 Info.offset = 0;
1077 Info.align = Align(4);
1078 // atomic.notify instruction does not really load the memory specified with
1079 // this argument, but MachineMemOperand should either be load or store, so
1080 // we set this to a load.
1081 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1082 // instructions are treated as volatiles in the backend, so we should be
1083 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1085 return true;
1086 case Intrinsic::wasm_memory_atomic_wait32:
1088 Info.memVT = MVT::i32;
1089 Info.ptrVal = I.getArgOperand(0);
1090 Info.offset = 0;
1091 Info.align = Align(4);
1093 return true;
1094 case Intrinsic::wasm_memory_atomic_wait64:
1096 Info.memVT = MVT::i64;
1097 Info.ptrVal = I.getArgOperand(0);
1098 Info.offset = 0;
1099 Info.align = Align(8);
1101 return true;
1102 case Intrinsic::wasm_loadf16_f32:
1104 Info.memVT = MVT::f16;
1105 Info.ptrVal = I.getArgOperand(0);
1106 Info.offset = 0;
1107 Info.align = Align(2);
1109 return true;
1110 case Intrinsic::wasm_storef16_f32:
1112 Info.memVT = MVT::f16;
1113 Info.ptrVal = I.getArgOperand(1);
1114 Info.offset = 0;
1115 Info.align = Align(2);
1117 return true;
1118 default:
1119 return false;
1120 }
1121}
1122
1123void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1124 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1125 const SelectionDAG &DAG, unsigned Depth) const {
1126 switch (Op.getOpcode()) {
1127 default:
1128 break;
1130 unsigned IntNo = Op.getConstantOperandVal(0);
1131 switch (IntNo) {
1132 default:
1133 break;
1134 case Intrinsic::wasm_bitmask: {
1135 unsigned BitWidth = Known.getBitWidth();
1136 EVT VT = Op.getOperand(1).getSimpleValueType();
1137 unsigned PossibleBits = VT.getVectorNumElements();
1138 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1139 Known.Zero |= ZeroMask;
1140 break;
1141 }
1142 }
1143 break;
1144 }
1145 case WebAssemblyISD::EXTEND_LOW_U:
1146 case WebAssemblyISD::EXTEND_HIGH_U: {
1147 // We know the high half, of each destination vector element, will be zero.
1148 SDValue SrcOp = Op.getOperand(0);
1149 EVT VT = SrcOp.getSimpleValueType();
1150 unsigned BitWidth = Known.getBitWidth();
1151 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1152 assert(BitWidth >= 8 && "Unexpected width!");
1154 Known.Zero |= Mask;
1155 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1156 assert(BitWidth >= 16 && "Unexpected width!");
1158 Known.Zero |= Mask;
1159 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1160 assert(BitWidth >= 32 && "Unexpected width!");
1162 Known.Zero |= Mask;
1163 }
1164 break;
1165 }
1166 // For 128-bit addition if the upper bits are all zero then it's known that
1167 // the upper bits of the result will have all bits guaranteed zero except the
1168 // first.
1169 case WebAssemblyISD::I64_ADD128:
1170 if (Op.getResNo() == 1) {
1171 SDValue LHS_HI = Op.getOperand(1);
1172 SDValue RHS_HI = Op.getOperand(3);
1173 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1174 Known.Zero.setBitsFrom(1);
1175 }
1176 break;
1177 }
1178}
1179
1181WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1182 if (VT.isFixedLengthVector()) {
1183 MVT EltVT = VT.getVectorElementType();
1184 // We have legal vector types with these lane types, so widening the
1185 // vector would let us use some of the lanes directly without having to
1186 // extend or truncate values.
1187 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1188 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1189 return TypeWidenVector;
1190 }
1191
1193}
1194
1195bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1196 const MachineFunction &MF, EVT VT) const {
1197 if (!Subtarget->hasFP16() || !VT.isVector())
1198 return false;
1199
1200 EVT ScalarVT = VT.getScalarType();
1201 if (!ScalarVT.isSimple())
1202 return false;
1203
1204 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1205}
1206
1207bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1208 SDValue Op, const TargetLoweringOpt &TLO) const {
1209 // ISel process runs DAGCombiner after legalization; this step is called
1210 // SelectionDAG optimization phase. This post-legalization combining process
1211 // runs DAGCombiner on each node, and if there was a change to be made,
1212 // re-runs legalization again on it and its user nodes to make sure
1213 // everythiing is in a legalized state.
1214 //
1215 // The legalization calls lowering routines, and we do our custom lowering for
1216 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1217 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1218 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1219 // turns unused vector elements into undefs. But this routine does not work
1220 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1221 // combination can result in a infinite loop, in which undefs are converted to
1222 // zeros in legalization and back to undefs in combining.
1223 //
1224 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1225 // running for build_vectors.
1226 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1227 return false;
1228 return true;
1229}
1230
1231//===----------------------------------------------------------------------===//
1232// WebAssembly Lowering private implementation.
1233//===----------------------------------------------------------------------===//
1234
1235//===----------------------------------------------------------------------===//
1236// Lowering Code
1237//===----------------------------------------------------------------------===//
1238
1239static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1241 DAG.getContext()->diagnose(
1242 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1243}
1244
1245// Test whether the given calling convention is supported.
1247 // We currently support the language-independent target-independent
1248 // conventions. We don't yet have a way to annotate calls with properties like
1249 // "cold", and we don't have any call-clobbered registers, so these are mostly
1250 // all handled the same.
1251 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1252 CallConv == CallingConv::Cold ||
1253 CallConv == CallingConv::PreserveMost ||
1254 CallConv == CallingConv::PreserveAll ||
1255 CallConv == CallingConv::CXX_FAST_TLS ||
1257 CallConv == CallingConv::Swift;
1258}
1259
1260SDValue
1261WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1262 SmallVectorImpl<SDValue> &InVals) const {
1263 SelectionDAG &DAG = CLI.DAG;
1264 SDLoc DL = CLI.DL;
1265 SDValue Chain = CLI.Chain;
1266 SDValue Callee = CLI.Callee;
1267 MachineFunction &MF = DAG.getMachineFunction();
1268 auto Layout = MF.getDataLayout();
1269
1270 CallingConv::ID CallConv = CLI.CallConv;
1271 if (!callingConvSupported(CallConv))
1272 fail(DL, DAG,
1273 "WebAssembly doesn't support language-specific or target-specific "
1274 "calling conventions yet");
1275 if (CLI.IsPatchPoint)
1276 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1277
1278 if (CLI.IsTailCall) {
1279 auto NoTail = [&](const char *Msg) {
1280 if (CLI.CB && CLI.CB->isMustTailCall())
1281 fail(DL, DAG, Msg);
1282 CLI.IsTailCall = false;
1283 };
1284
1285 if (!Subtarget->hasTailCall())
1286 NoTail("WebAssembly 'tail-call' feature not enabled");
1287
1288 // Varargs calls cannot be tail calls because the buffer is on the stack
1289 if (CLI.IsVarArg)
1290 NoTail("WebAssembly does not support varargs tail calls");
1291
1292 // Do not tail call unless caller and callee return types match
1293 const Function &F = MF.getFunction();
1294 const TargetMachine &TM = getTargetMachine();
1295 Type *RetTy = F.getReturnType();
1296 SmallVector<MVT, 4> CallerRetTys;
1297 SmallVector<MVT, 4> CalleeRetTys;
1298 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1299 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1300 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1301 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1302 CalleeRetTys.begin());
1303 if (!TypesMatch)
1304 NoTail("WebAssembly tail call requires caller and callee return types to "
1305 "match");
1306
1307 // If pointers to local stack values are passed, we cannot tail call
1308 if (CLI.CB) {
1309 for (auto &Arg : CLI.CB->args()) {
1310 Value *Val = Arg.get();
1311 // Trace the value back through pointer operations
1312 while (true) {
1313 Value *Src = Val->stripPointerCastsAndAliases();
1314 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1315 Src = GEP->getPointerOperand();
1316 if (Val == Src)
1317 break;
1318 Val = Src;
1319 }
1320 if (isa<AllocaInst>(Val)) {
1321 NoTail(
1322 "WebAssembly does not support tail calling with stack arguments");
1323 break;
1324 }
1325 }
1326 }
1327 }
1328
1329 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1330 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1331 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1332
1333 // The generic code may have added an sret argument. If we're lowering an
1334 // invoke function, the ABI requires that the function pointer be the first
1335 // argument, so we may have to swap the arguments.
1336 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1337 Outs[0].Flags.isSRet()) {
1338 std::swap(Outs[0], Outs[1]);
1339 std::swap(OutVals[0], OutVals[1]);
1340 }
1341
1342 bool HasSwiftSelfArg = false;
1343 bool HasSwiftErrorArg = false;
1344 unsigned NumFixedArgs = 0;
1345 for (unsigned I = 0; I < Outs.size(); ++I) {
1346 const ISD::OutputArg &Out = Outs[I];
1347 SDValue &OutVal = OutVals[I];
1348 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1349 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1350 if (Out.Flags.isNest())
1351 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1352 if (Out.Flags.isInAlloca())
1353 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1354 if (Out.Flags.isInConsecutiveRegs())
1355 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1357 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1358 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1359 auto &MFI = MF.getFrameInfo();
1360 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1362 /*isSS=*/false);
1363 SDValue SizeNode =
1364 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1365 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1366 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1368 /*isVolatile*/ false, /*AlwaysInline=*/false,
1369 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1370 MachinePointerInfo());
1371 OutVal = FINode;
1372 }
1373 // Count the number of fixed args *after* legalization.
1374 NumFixedArgs += !Out.Flags.isVarArg();
1375 }
1376
1377 bool IsVarArg = CLI.IsVarArg;
1378 auto PtrVT = getPointerTy(Layout);
1379
1380 // For swiftcc, emit additional swiftself and swifterror arguments
1381 // if there aren't. These additional arguments are also added for callee
1382 // signature They are necessary to match callee and caller signature for
1383 // indirect call.
1384 if (CallConv == CallingConv::Swift) {
1385 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1386 if (!HasSwiftSelfArg) {
1387 NumFixedArgs++;
1388 ISD::ArgFlagsTy Flags;
1389 Flags.setSwiftSelf();
1390 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1391 CLI.Outs.push_back(Arg);
1392 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1393 CLI.OutVals.push_back(ArgVal);
1394 }
1395 if (!HasSwiftErrorArg) {
1396 NumFixedArgs++;
1397 ISD::ArgFlagsTy Flags;
1398 Flags.setSwiftError();
1399 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1400 CLI.Outs.push_back(Arg);
1401 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1402 CLI.OutVals.push_back(ArgVal);
1403 }
1404 }
1405
1406 // Analyze operands of the call, assigning locations to each operand.
1408 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1409
1410 if (IsVarArg) {
1411 // Outgoing non-fixed arguments are placed in a buffer. First
1412 // compute their offsets and the total amount of buffer space needed.
1413 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1414 const ISD::OutputArg &Out = Outs[I];
1415 SDValue &Arg = OutVals[I];
1416 EVT VT = Arg.getValueType();
1417 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1418 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1419 Align Alignment =
1420 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1421 unsigned Offset =
1422 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1423 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1424 Offset, VT.getSimpleVT(),
1426 }
1427 }
1428
1429 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1430
1431 SDValue FINode;
1432 if (IsVarArg && NumBytes) {
1433 // For non-fixed arguments, next emit stores to store the argument values
1434 // to the stack buffer at the offsets computed above.
1435 MaybeAlign StackAlign = Layout.getStackAlignment();
1436 assert(StackAlign && "data layout string is missing stack alignment");
1437 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1438 /*isSS=*/false);
1439 unsigned ValNo = 0;
1441 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1442 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1443 "ArgLocs should remain in order and only hold varargs args");
1444 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1445 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1446 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1447 DAG.getConstant(Offset, DL, PtrVT));
1448 Chains.push_back(
1449 DAG.getStore(Chain, DL, Arg, Add,
1451 }
1452 if (!Chains.empty())
1453 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1454 } else if (IsVarArg) {
1455 FINode = DAG.getIntPtrConstant(0, DL);
1456 }
1457
1458 if (Callee->getOpcode() == ISD::GlobalAddress) {
1459 // If the callee is a GlobalAddress node (quite common, every direct call
1460 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1461 // doesn't at MO_GOT which is not needed for direct calls.
1462 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1465 GA->getOffset());
1466 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1467 getPointerTy(DAG.getDataLayout()), Callee);
1468 }
1469
1470 // Compute the operands for the CALLn node.
1472 Ops.push_back(Chain);
1473 Ops.push_back(Callee);
1474
1475 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1476 // isn't reliable.
1477 Ops.append(OutVals.begin(),
1478 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1479 // Add a pointer to the vararg buffer.
1480 if (IsVarArg)
1481 Ops.push_back(FINode);
1482
1483 SmallVector<EVT, 8> InTys;
1484 for (const auto &In : Ins) {
1485 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1486 assert(!In.Flags.isNest() && "nest is not valid for return values");
1487 if (In.Flags.isInAlloca())
1488 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1489 if (In.Flags.isInConsecutiveRegs())
1490 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1491 if (In.Flags.isInConsecutiveRegsLast())
1492 fail(DL, DAG,
1493 "WebAssembly hasn't implemented cons regs last return values");
1494 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1495 // registers.
1496 InTys.push_back(In.VT);
1497 }
1498
1499 // Lastly, if this is a call to a funcref we need to add an instruction
1500 // table.set to the chain and transform the call.
1502 CLI.CB->getCalledOperand()->getType())) {
1503 // In the absence of function references proposal where a funcref call is
1504 // lowered to call_ref, using reference types we generate a table.set to set
1505 // the funcref to a special table used solely for this purpose, followed by
1506 // a call_indirect. Here we just generate the table set, and return the
1507 // SDValue of the table.set so that LowerCall can finalize the lowering by
1508 // generating the call_indirect.
1509 SDValue Chain = Ops[0];
1510
1512 MF.getContext(), Subtarget);
1513 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1514 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1515 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1516 SDValue TableSet = DAG.getMemIntrinsicNode(
1517 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1518 MVT::funcref,
1519 // Machine Mem Operand args
1520 MachinePointerInfo(
1522 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1524
1525 Ops[0] = TableSet; // The new chain is the TableSet itself
1526 }
1527
1528 if (CLI.IsTailCall) {
1529 // ret_calls do not return values to the current frame
1530 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1531 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1532 }
1533
1534 InTys.push_back(MVT::Other);
1535 SDVTList InTyList = DAG.getVTList(InTys);
1536 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1537
1538 for (size_t I = 0; I < Ins.size(); ++I)
1539 InVals.push_back(Res.getValue(I));
1540
1541 // Return the chain
1542 return Res.getValue(Ins.size());
1543}
1544
1545bool WebAssemblyTargetLowering::CanLowerReturn(
1546 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1547 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1548 const Type *RetTy) const {
1549 // WebAssembly can only handle returning tuples with multivalue enabled
1550 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1551}
1552
1553SDValue WebAssemblyTargetLowering::LowerReturn(
1554 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1556 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1557 SelectionDAG &DAG) const {
1558 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1559 "MVP WebAssembly can only return up to one value");
1560 if (!callingConvSupported(CallConv))
1561 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1562
1563 SmallVector<SDValue, 4> RetOps(1, Chain);
1564 RetOps.append(OutVals.begin(), OutVals.end());
1565 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1566
1567 // Record the number and types of the return values.
1568 for (const ISD::OutputArg &Out : Outs) {
1569 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1570 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1571 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1572 if (Out.Flags.isInAlloca())
1573 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1574 if (Out.Flags.isInConsecutiveRegs())
1575 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1577 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1578 }
1579
1580 return Chain;
1581}
1582
1583SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1584 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1585 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1586 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1587 if (!callingConvSupported(CallConv))
1588 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1589
1590 MachineFunction &MF = DAG.getMachineFunction();
1591 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1592
1593 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1594 // of the incoming values before they're represented by virtual registers.
1595 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1596
1597 bool HasSwiftErrorArg = false;
1598 bool HasSwiftSelfArg = false;
1599 for (const ISD::InputArg &In : Ins) {
1600 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1601 HasSwiftErrorArg |= In.Flags.isSwiftError();
1602 if (In.Flags.isInAlloca())
1603 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1604 if (In.Flags.isNest())
1605 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1606 if (In.Flags.isInConsecutiveRegs())
1607 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1608 if (In.Flags.isInConsecutiveRegsLast())
1609 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1610 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1611 // registers.
1612 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1613 DAG.getTargetConstant(InVals.size(),
1614 DL, MVT::i32))
1615 : DAG.getUNDEF(In.VT));
1616
1617 // Record the number and types of arguments.
1618 MFI->addParam(In.VT);
1619 }
1620
1621 // For swiftcc, emit additional swiftself and swifterror arguments
1622 // if there aren't. These additional arguments are also added for callee
1623 // signature They are necessary to match callee and caller signature for
1624 // indirect call.
1625 auto PtrVT = getPointerTy(MF.getDataLayout());
1626 if (CallConv == CallingConv::Swift) {
1627 if (!HasSwiftSelfArg) {
1628 MFI->addParam(PtrVT);
1629 }
1630 if (!HasSwiftErrorArg) {
1631 MFI->addParam(PtrVT);
1632 }
1633 }
1634 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1635 // the buffer is passed as an argument.
1636 if (IsVarArg) {
1637 MVT PtrVT = getPointerTy(MF.getDataLayout());
1638 Register VarargVreg =
1640 MFI->setVarargBufferVreg(VarargVreg);
1641 Chain = DAG.getCopyToReg(
1642 Chain, DL, VarargVreg,
1643 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1644 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1645 MFI->addParam(PtrVT);
1646 }
1647
1648 // Record the number and types of arguments and results.
1649 SmallVector<MVT, 4> Params;
1652 MF.getFunction(), DAG.getTarget(), Params, Results);
1653 for (MVT VT : Results)
1654 MFI->addResult(VT);
1655 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1656 // the param logic here with ComputeSignatureVTs
1657 assert(MFI->getParams().size() == Params.size() &&
1658 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1659 Params.begin()));
1660
1661 return Chain;
1662}
1663
1664void WebAssemblyTargetLowering::ReplaceNodeResults(
1666 switch (N->getOpcode()) {
1668 // Do not add any results, signifying that N should not be custom lowered
1669 // after all. This happens because simd128 turns on custom lowering for
1670 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1671 // illegal type.
1672 break;
1675 // Do not add any results, signifying that N should not be custom lowered.
1676 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1677 break;
1678 case ISD::ADD:
1679 case ISD::SUB:
1680 Results.push_back(Replace128Op(N, DAG));
1681 break;
1682 default:
1684 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1685 }
1686}
1687
1688//===----------------------------------------------------------------------===//
1689// Custom lowering hooks.
1690//===----------------------------------------------------------------------===//
1691
1692SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1693 SelectionDAG &DAG) const {
1694 SDLoc DL(Op);
1695 switch (Op.getOpcode()) {
1696 default:
1697 llvm_unreachable("unimplemented operation lowering");
1698 return SDValue();
1699 case ISD::FrameIndex:
1700 return LowerFrameIndex(Op, DAG);
1701 case ISD::GlobalAddress:
1702 return LowerGlobalAddress(Op, DAG);
1704 return LowerGlobalTLSAddress(Op, DAG);
1706 return LowerExternalSymbol(Op, DAG);
1707 case ISD::JumpTable:
1708 return LowerJumpTable(Op, DAG);
1709 case ISD::BR_JT:
1710 return LowerBR_JT(Op, DAG);
1711 case ISD::VASTART:
1712 return LowerVASTART(Op, DAG);
1713 case ISD::BlockAddress:
1714 case ISD::BRIND:
1715 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1716 return SDValue();
1717 case ISD::RETURNADDR:
1718 return LowerRETURNADDR(Op, DAG);
1719 case ISD::FRAMEADDR:
1720 return LowerFRAMEADDR(Op, DAG);
1721 case ISD::CopyToReg:
1722 return LowerCopyToReg(Op, DAG);
1725 return LowerAccessVectorElement(Op, DAG);
1729 return LowerIntrinsic(Op, DAG);
1731 return LowerSIGN_EXTEND_INREG(Op, DAG);
1735 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1736 case ISD::BUILD_VECTOR:
1737 return LowerBUILD_VECTOR(Op, DAG);
1739 return LowerVECTOR_SHUFFLE(Op, DAG);
1740 case ISD::SETCC:
1741 return LowerSETCC(Op, DAG);
1742 case ISD::SHL:
1743 case ISD::SRA:
1744 case ISD::SRL:
1745 return LowerShift(Op, DAG);
1748 return LowerFP_TO_INT_SAT(Op, DAG);
1749 case ISD::LOAD:
1750 return LowerLoad(Op, DAG);
1751 case ISD::STORE:
1752 return LowerStore(Op, DAG);
1753 case ISD::CTPOP:
1754 case ISD::CTLZ:
1755 case ISD::CTTZ:
1756 return DAG.UnrollVectorOp(Op.getNode());
1757 case ISD::CLEAR_CACHE:
1758 report_fatal_error("llvm.clear_cache is not supported on wasm");
1759 case ISD::SMUL_LOHI:
1760 case ISD::UMUL_LOHI:
1761 return LowerMUL_LOHI(Op, DAG);
1762 case ISD::UADDO:
1763 return LowerUADDO(Op, DAG);
1764 }
1765}
1766
1770
1771 return false;
1772}
1773
1774static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1775 SelectionDAG &DAG) {
1777 if (!FI)
1778 return std::nullopt;
1779
1780 auto &MF = DAG.getMachineFunction();
1782}
1783
1784SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1785 SelectionDAG &DAG) const {
1786 SDLoc DL(Op);
1787 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1788 const SDValue &Value = SN->getValue();
1789 const SDValue &Base = SN->getBasePtr();
1790 const SDValue &Offset = SN->getOffset();
1791
1793 if (!Offset->isUndef())
1794 report_fatal_error("unexpected offset when storing to webassembly global",
1795 false);
1796
1797 SDVTList Tys = DAG.getVTList(MVT::Other);
1798 SDValue Ops[] = {SN->getChain(), Value, Base};
1799 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1800 SN->getMemoryVT(), SN->getMemOperand());
1801 }
1802
1803 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1804 if (!Offset->isUndef())
1805 report_fatal_error("unexpected offset when storing to webassembly local",
1806 false);
1807
1808 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1809 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1810 SDValue Ops[] = {SN->getChain(), Idx, Value};
1811 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1812 }
1813
1816 "Encountered an unlowerable store to the wasm_var address space",
1817 false);
1818
1819 return Op;
1820}
1821
1822SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1823 SelectionDAG &DAG) const {
1824 SDLoc DL(Op);
1825 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1826 const SDValue &Base = LN->getBasePtr();
1827 const SDValue &Offset = LN->getOffset();
1828
1830 if (!Offset->isUndef())
1832 "unexpected offset when loading from webassembly global", false);
1833
1834 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1835 SDValue Ops[] = {LN->getChain(), Base};
1836 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1837 LN->getMemoryVT(), LN->getMemOperand());
1838 }
1839
1840 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1841 if (!Offset->isUndef())
1843 "unexpected offset when loading from webassembly local", false);
1844
1845 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1846 EVT LocalVT = LN->getValueType(0);
1847 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1848 {LN->getChain(), Idx});
1849 }
1850
1853 "Encountered an unlowerable load from the wasm_var address space",
1854 false);
1855
1856 return Op;
1857}
1858
1859SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1860 SelectionDAG &DAG) const {
1861 assert(Subtarget->hasWideArithmetic());
1862 assert(Op.getValueType() == MVT::i64);
1863 SDLoc DL(Op);
1864 unsigned Opcode;
1865 switch (Op.getOpcode()) {
1866 case ISD::UMUL_LOHI:
1867 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1868 break;
1869 case ISD::SMUL_LOHI:
1870 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1871 break;
1872 default:
1873 llvm_unreachable("unexpected opcode");
1874 }
1875 SDValue LHS = Op.getOperand(0);
1876 SDValue RHS = Op.getOperand(1);
1877 SDValue Lo =
1878 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1879 SDValue Hi(Lo.getNode(), 1);
1880 SDValue Ops[] = {Lo, Hi};
1881 return DAG.getMergeValues(Ops, DL);
1882}
1883
1884// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1885//
1886// This enables generating a single wasm instruction for this operation where
1887// the upper half of both operands are constant zeros. The upper half of the
1888// result is then whether the overflow happened.
1889SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1890 SelectionDAG &DAG) const {
1891 assert(Subtarget->hasWideArithmetic());
1892 assert(Op.getValueType() == MVT::i64);
1893 assert(Op.getOpcode() == ISD::UADDO);
1894 SDLoc DL(Op);
1895 SDValue LHS = Op.getOperand(0);
1896 SDValue RHS = Op.getOperand(1);
1897 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1898 SDValue Result =
1899 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1900 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1901 SDValue CarryI64(Result.getNode(), 1);
1902 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1903 SDValue Ops[] = {Result, CarryI32};
1904 return DAG.getMergeValues(Ops, DL);
1905}
1906
1907SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1908 SelectionDAG &DAG) const {
1909 assert(Subtarget->hasWideArithmetic());
1910 assert(N->getValueType(0) == MVT::i128);
1911 SDLoc DL(N);
1912 unsigned Opcode;
1913 switch (N->getOpcode()) {
1914 case ISD::ADD:
1915 Opcode = WebAssemblyISD::I64_ADD128;
1916 break;
1917 case ISD::SUB:
1918 Opcode = WebAssemblyISD::I64_SUB128;
1919 break;
1920 default:
1921 llvm_unreachable("unexpected opcode");
1922 }
1923 SDValue LHS = N->getOperand(0);
1924 SDValue RHS = N->getOperand(1);
1925
1926 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1927 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1928 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1929 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1930 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1931 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1932 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1933 LHS_0, LHS_1, RHS_0, RHS_1);
1934 SDValue Result_HI(Result_LO.getNode(), 1);
1935 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1936}
1937
1938SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1939 SelectionDAG &DAG) const {
1940 SDValue Src = Op.getOperand(2);
1941 if (isa<FrameIndexSDNode>(Src.getNode())) {
1942 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1943 // the FI to some LEA-like instruction, but since we don't have that, we
1944 // need to insert some kind of instruction that can take an FI operand and
1945 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1946 // local.copy between Op and its FI operand.
1947 SDValue Chain = Op.getOperand(0);
1948 SDLoc DL(Op);
1949 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1950 EVT VT = Src.getValueType();
1951 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1952 : WebAssembly::COPY_I64,
1953 DL, VT, Src),
1954 0);
1955 return Op.getNode()->getNumValues() == 1
1956 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1957 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1958 Op.getNumOperands() == 4 ? Op.getOperand(3)
1959 : SDValue());
1960 }
1961 return SDValue();
1962}
1963
1964SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1965 SelectionDAG &DAG) const {
1966 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1967 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1968}
1969
1970SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1971 SelectionDAG &DAG) const {
1972 SDLoc DL(Op);
1973
1974 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1975 fail(DL, DAG,
1976 "Non-Emscripten WebAssembly hasn't implemented "
1977 "__builtin_return_address");
1978 return SDValue();
1979 }
1980
1981 unsigned Depth = Op.getConstantOperandVal(0);
1982 MakeLibCallOptions CallOptions;
1983 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1984 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1985 .first;
1986}
1987
1988SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1989 SelectionDAG &DAG) const {
1990 // Non-zero depths are not supported by WebAssembly currently. Use the
1991 // legalizer's default expansion, which is to return 0 (what this function is
1992 // documented to do).
1993 if (Op.getConstantOperandVal(0) > 0)
1994 return SDValue();
1995
1997 EVT VT = Op.getValueType();
1998 Register FP =
1999 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2000 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2001}
2002
2003SDValue
2004WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2005 SelectionDAG &DAG) const {
2006 SDLoc DL(Op);
2007 const auto *GA = cast<GlobalAddressSDNode>(Op);
2008
2009 MachineFunction &MF = DAG.getMachineFunction();
2010 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2011 report_fatal_error("cannot use thread-local storage without bulk memory",
2012 false);
2013
2014 const GlobalValue *GV = GA->getGlobal();
2015
2016 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2017 // on other targets, if we have thread-local storage, only the local-exec
2018 // model is possible.
2019 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2020 ? GV->getThreadLocalMode()
2022
2023 // Unsupported TLS modes
2026
2027 if (model == GlobalValue::LocalExecTLSModel ||
2030 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2031 // For DSO-local TLS variables we use offset from __tls_base
2032
2033 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2034 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2035 : WebAssembly::GLOBAL_GET_I32;
2036 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2037
2039 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2040 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2041 0);
2042
2043 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2044 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2045 SDValue SymOffset =
2046 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2047
2048 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2049 }
2050
2052
2053 EVT VT = Op.getValueType();
2054 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2055 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2056 GA->getOffset(),
2058}
2059
2060SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2061 SelectionDAG &DAG) const {
2062 SDLoc DL(Op);
2063 const auto *GA = cast<GlobalAddressSDNode>(Op);
2064 EVT VT = Op.getValueType();
2065 assert(GA->getTargetFlags() == 0 &&
2066 "Unexpected target flags on generic GlobalAddressSDNode");
2068 fail(DL, DAG, "Invalid address space for WebAssembly target");
2069
2070 unsigned OperandFlags = 0;
2071 const GlobalValue *GV = GA->getGlobal();
2072 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2073 // need special treatment for tables in PIC mode.
2074 if (isPositionIndependent() &&
2076 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2077 MachineFunction &MF = DAG.getMachineFunction();
2078 MVT PtrVT = getPointerTy(MF.getDataLayout());
2079 const char *BaseName;
2080 if (GV->getValueType()->isFunctionTy()) {
2081 BaseName = MF.createExternalSymbolName("__table_base");
2083 } else {
2084 BaseName = MF.createExternalSymbolName("__memory_base");
2086 }
2088 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2089 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2090
2091 SDValue SymAddr = DAG.getNode(
2092 WebAssemblyISD::WrapperREL, DL, VT,
2093 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2094 OperandFlags));
2095
2096 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2097 }
2099 }
2100
2101 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2102 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2103 GA->getOffset(), OperandFlags));
2104}
2105
2106SDValue
2107WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2108 SelectionDAG &DAG) const {
2109 SDLoc DL(Op);
2110 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2111 EVT VT = Op.getValueType();
2112 assert(ES->getTargetFlags() == 0 &&
2113 "Unexpected target flags on generic ExternalSymbolSDNode");
2114 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2115 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2116}
2117
2118SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2119 SelectionDAG &DAG) const {
2120 // There's no need for a Wrapper node because we always incorporate a jump
2121 // table operand into a BR_TABLE instruction, rather than ever
2122 // materializing it in a register.
2123 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2124 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2125 JT->getTargetFlags());
2126}
2127
2128SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2129 SelectionDAG &DAG) const {
2130 SDLoc DL(Op);
2131 SDValue Chain = Op.getOperand(0);
2132 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2133 SDValue Index = Op.getOperand(2);
2134 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2135
2137 Ops.push_back(Chain);
2138 Ops.push_back(Index);
2139
2140 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2141 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2142
2143 // Add an operand for each case.
2144 for (auto *MBB : MBBs)
2145 Ops.push_back(DAG.getBasicBlock(MBB));
2146
2147 // Add the first MBB as a dummy default target for now. This will be replaced
2148 // with the proper default target (and the preceding range check eliminated)
2149 // if possible by WebAssemblyFixBrTableDefaults.
2150 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2151 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2152}
2153
2154SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2155 SelectionDAG &DAG) const {
2156 SDLoc DL(Op);
2157 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2158
2159 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2160 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2161
2162 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2163 MFI->getVarargBufferVreg(), PtrVT);
2164 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2165 MachinePointerInfo(SV));
2166}
2167
2168SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2169 SelectionDAG &DAG) const {
2170 MachineFunction &MF = DAG.getMachineFunction();
2171 unsigned IntNo;
2172 switch (Op.getOpcode()) {
2175 IntNo = Op.getConstantOperandVal(1);
2176 break;
2178 IntNo = Op.getConstantOperandVal(0);
2179 break;
2180 default:
2181 llvm_unreachable("Invalid intrinsic");
2182 }
2183 SDLoc DL(Op);
2184
2185 switch (IntNo) {
2186 default:
2187 return SDValue(); // Don't custom lower most intrinsics.
2188
2189 case Intrinsic::wasm_lsda: {
2190 auto PtrVT = getPointerTy(MF.getDataLayout());
2191 const char *SymName = MF.createExternalSymbolName(
2192 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2193 if (isPositionIndependent()) {
2195 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2196 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2198 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2199 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2200 SDValue SymAddr =
2201 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2202 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2203 }
2204 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2205 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2206 }
2207
2208 case Intrinsic::wasm_shuffle: {
2209 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2210 SDValue Ops[18];
2211 size_t OpIdx = 0;
2212 Ops[OpIdx++] = Op.getOperand(1);
2213 Ops[OpIdx++] = Op.getOperand(2);
2214 while (OpIdx < 18) {
2215 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2216 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2217 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2218 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2219 } else {
2220 Ops[OpIdx++] = MaskIdx;
2221 }
2222 }
2223 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2224 }
2225
2226 case Intrinsic::thread_pointer: {
2227 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2228 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2229 : WebAssembly::GLOBAL_GET_I32;
2230 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2231 return SDValue(
2232 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2233 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2234 0);
2235 }
2236 }
2237}
2238
2239SDValue
2240WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2241 SelectionDAG &DAG) const {
2242 SDLoc DL(Op);
2243 // If sign extension operations are disabled, allow sext_inreg only if operand
2244 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2245 // extension operations, but allowing sext_inreg in this context lets us have
2246 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2247 // everywhere would be simpler in this file, but would necessitate large and
2248 // brittle patterns to undo the expansion and select extract_lane_s
2249 // instructions.
2250 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2251 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2252 return SDValue();
2253
2254 const SDValue &Extract = Op.getOperand(0);
2255 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2256 if (VecT.getVectorElementType().getSizeInBits() > 32)
2257 return SDValue();
2258 MVT ExtractedLaneT =
2259 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2260 MVT ExtractedVecT =
2261 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2262 if (ExtractedVecT == VecT)
2263 return Op;
2264
2265 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2266 const SDNode *Index = Extract.getOperand(1).getNode();
2267 if (!isa<ConstantSDNode>(Index))
2268 return SDValue();
2269 unsigned IndexVal = Index->getAsZExtVal();
2270 unsigned Scale =
2271 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2272 assert(Scale > 1);
2273 SDValue NewIndex =
2274 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2275 SDValue NewExtract = DAG.getNode(
2277 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2278 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2279 Op.getOperand(1));
2280}
2281
2282static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2283 SelectionDAG &DAG) {
2284 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2285 return SDValue();
2286
2287 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2288 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2289 "expected extend_low");
2290 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2291
2292 ArrayRef<int> Mask = Shuffle->getMask();
2293 // Look for a shuffle which moves from the high half to the low half.
2294 size_t FirstIdx = Mask.size() / 2;
2295 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2296 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2297 return SDValue();
2298 }
2299 }
2300
2301 SDLoc DL(Op);
2302 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2303 ? WebAssemblyISD::EXTEND_HIGH_S
2304 : WebAssemblyISD::EXTEND_HIGH_U;
2305 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2306}
2307
2308SDValue
2309WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2310 SelectionDAG &DAG) const {
2311 SDLoc DL(Op);
2312 EVT VT = Op.getValueType();
2313 SDValue Src = Op.getOperand(0);
2314 EVT SrcVT = Src.getValueType();
2315
2316 if (SrcVT.getVectorElementType() == MVT::i1 ||
2317 SrcVT.getVectorElementType() == MVT::i64)
2318 return SDValue();
2319
2320 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2321 "Unexpected extension factor.");
2322 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2323
2324 if (Scale != 2 && Scale != 4 && Scale != 8)
2325 return SDValue();
2326
2327 unsigned Ext;
2328 switch (Op.getOpcode()) {
2329 default:
2330 llvm_unreachable("unexpected opcode");
2333 Ext = WebAssemblyISD::EXTEND_LOW_U;
2334 break;
2336 Ext = WebAssemblyISD::EXTEND_LOW_S;
2337 break;
2338 }
2339
2340 if (Scale == 2) {
2341 // See if we can use EXTEND_HIGH.
2342 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2343 return ExtendHigh;
2344 }
2345
2346 SDValue Ret = Src;
2347 while (Scale != 1) {
2348 Ret = DAG.getNode(Ext, DL,
2349 Ret.getValueType()
2352 Ret);
2353 Scale /= 2;
2354 }
2355 assert(Ret.getValueType() == VT);
2356 return Ret;
2357}
2358
2360 SDLoc DL(Op);
2361 if (Op.getValueType() != MVT::v2f64)
2362 return SDValue();
2363
2364 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2365 unsigned &Index) -> bool {
2366 switch (Op.getOpcode()) {
2367 case ISD::SINT_TO_FP:
2368 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2369 break;
2370 case ISD::UINT_TO_FP:
2371 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2372 break;
2373 case ISD::FP_EXTEND:
2374 Opcode = WebAssemblyISD::PROMOTE_LOW;
2375 break;
2376 default:
2377 return false;
2378 }
2379
2380 auto ExtractVector = Op.getOperand(0);
2381 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2382 return false;
2383
2384 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2385 return false;
2386
2387 SrcVec = ExtractVector.getOperand(0);
2388 Index = ExtractVector.getConstantOperandVal(1);
2389 return true;
2390 };
2391
2392 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2393 SDValue LHSSrcVec, RHSSrcVec;
2394 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2395 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2396 return SDValue();
2397
2398 if (LHSOpcode != RHSOpcode)
2399 return SDValue();
2400
2401 MVT ExpectedSrcVT;
2402 switch (LHSOpcode) {
2403 case WebAssemblyISD::CONVERT_LOW_S:
2404 case WebAssemblyISD::CONVERT_LOW_U:
2405 ExpectedSrcVT = MVT::v4i32;
2406 break;
2407 case WebAssemblyISD::PROMOTE_LOW:
2408 ExpectedSrcVT = MVT::v4f32;
2409 break;
2410 }
2411 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2412 return SDValue();
2413
2414 auto Src = LHSSrcVec;
2415 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2416 // Shuffle the source vector so that the converted lanes are the low lanes.
2417 Src = DAG.getVectorShuffle(
2418 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2419 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2420 }
2421 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2422}
2423
2424SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2425 SelectionDAG &DAG) const {
2426 MVT VT = Op.getSimpleValueType();
2427 if (VT == MVT::v8f16) {
2428 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2429 // FP16 type, so cast them to I16s.
2430 MVT IVT = VT.changeVectorElementType(MVT::i16);
2432 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2433 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2434 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2435 return DAG.getBitcast(VT, Res);
2436 }
2437
2438 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2439 return ConvertLow;
2440
2441 SDLoc DL(Op);
2442 const EVT VecT = Op.getValueType();
2443 const EVT LaneT = Op.getOperand(0).getValueType();
2444 const size_t Lanes = Op.getNumOperands();
2445 bool CanSwizzle = VecT == MVT::v16i8;
2446
2447 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2448 // possible number of lanes at once followed by a sequence of replace_lane
2449 // instructions to individually initialize any remaining lanes.
2450
2451 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2452 // swizzled lanes should be given greater weight.
2453
2454 // TODO: Investigate looping rather than always extracting/replacing specific
2455 // lanes to fill gaps.
2456
2457 auto IsConstant = [](const SDValue &V) {
2458 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2459 };
2460
2461 // Returns the source vector and index vector pair if they exist. Checks for:
2462 // (extract_vector_elt
2463 // $src,
2464 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2465 // )
2466 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2467 auto Bail = std::make_pair(SDValue(), SDValue());
2468 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2469 return Bail;
2470 const SDValue &SwizzleSrc = Lane->getOperand(0);
2471 const SDValue &IndexExt = Lane->getOperand(1);
2472 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2473 return Bail;
2474 const SDValue &Index = IndexExt->getOperand(0);
2475 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2476 return Bail;
2477 const SDValue &SwizzleIndices = Index->getOperand(0);
2478 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2479 SwizzleIndices.getValueType() != MVT::v16i8 ||
2480 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2481 Index->getConstantOperandVal(1) != I)
2482 return Bail;
2483 return std::make_pair(SwizzleSrc, SwizzleIndices);
2484 };
2485
2486 // If the lane is extracted from another vector at a constant index, return
2487 // that vector. The source vector must not have more lanes than the dest
2488 // because the shufflevector indices are in terms of the destination lanes and
2489 // would not be able to address the smaller individual source lanes.
2490 auto GetShuffleSrc = [&](const SDValue &Lane) {
2491 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2492 return SDValue();
2493 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2494 return SDValue();
2495 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2496 VecT.getVectorNumElements())
2497 return SDValue();
2498 return Lane->getOperand(0);
2499 };
2500
2501 using ValueEntry = std::pair<SDValue, size_t>;
2502 SmallVector<ValueEntry, 16> SplatValueCounts;
2503
2504 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2505 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2506
2507 using ShuffleEntry = std::pair<SDValue, size_t>;
2508 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2509
2510 auto AddCount = [](auto &Counts, const auto &Val) {
2511 auto CountIt =
2512 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2513 if (CountIt == Counts.end()) {
2514 Counts.emplace_back(Val, 1);
2515 } else {
2516 CountIt->second++;
2517 }
2518 };
2519
2520 auto GetMostCommon = [](auto &Counts) {
2521 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2522 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2523 return *CommonIt;
2524 };
2525
2526 size_t NumConstantLanes = 0;
2527
2528 // Count eligible lanes for each type of vector creation op
2529 for (size_t I = 0; I < Lanes; ++I) {
2530 const SDValue &Lane = Op->getOperand(I);
2531 if (Lane.isUndef())
2532 continue;
2533
2534 AddCount(SplatValueCounts, Lane);
2535
2536 if (IsConstant(Lane))
2537 NumConstantLanes++;
2538 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2539 AddCount(ShuffleCounts, ShuffleSrc);
2540 if (CanSwizzle) {
2541 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2542 if (SwizzleSrcs.first)
2543 AddCount(SwizzleCounts, SwizzleSrcs);
2544 }
2545 }
2546
2547 SDValue SplatValue;
2548 size_t NumSplatLanes;
2549 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2550
2551 SDValue SwizzleSrc;
2552 SDValue SwizzleIndices;
2553 size_t NumSwizzleLanes = 0;
2554 if (SwizzleCounts.size())
2555 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2556 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2557
2558 // Shuffles can draw from up to two vectors, so find the two most common
2559 // sources.
2560 SDValue ShuffleSrc1, ShuffleSrc2;
2561 size_t NumShuffleLanes = 0;
2562 if (ShuffleCounts.size()) {
2563 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2564 llvm::erase_if(ShuffleCounts,
2565 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2566 }
2567 if (ShuffleCounts.size()) {
2568 size_t AdditionalShuffleLanes;
2569 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2570 GetMostCommon(ShuffleCounts);
2571 NumShuffleLanes += AdditionalShuffleLanes;
2572 }
2573
2574 // Predicate returning true if the lane is properly initialized by the
2575 // original instruction
2576 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2578 // Prefer swizzles over shuffles over vector consts over splats
2579 if (NumSwizzleLanes >= NumShuffleLanes &&
2580 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2581 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2582 SwizzleIndices);
2583 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2584 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2585 return Swizzled == GetSwizzleSrcs(I, Lane);
2586 };
2587 } else if (NumShuffleLanes >= NumConstantLanes &&
2588 NumShuffleLanes >= NumSplatLanes) {
2589 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2590 size_t DestLaneCount = VecT.getVectorNumElements();
2591 size_t Scale1 = 1;
2592 size_t Scale2 = 1;
2593 SDValue Src1 = ShuffleSrc1;
2594 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2595 if (Src1.getValueType() != VecT) {
2596 size_t LaneSize =
2598 assert(LaneSize > DestLaneSize);
2599 Scale1 = LaneSize / DestLaneSize;
2600 Src1 = DAG.getBitcast(VecT, Src1);
2601 }
2602 if (Src2.getValueType() != VecT) {
2603 size_t LaneSize =
2605 assert(LaneSize > DestLaneSize);
2606 Scale2 = LaneSize / DestLaneSize;
2607 Src2 = DAG.getBitcast(VecT, Src2);
2608 }
2609
2610 int Mask[16];
2611 assert(DestLaneCount <= 16);
2612 for (size_t I = 0; I < DestLaneCount; ++I) {
2613 const SDValue &Lane = Op->getOperand(I);
2614 SDValue Src = GetShuffleSrc(Lane);
2615 if (Src == ShuffleSrc1) {
2616 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2617 } else if (Src && Src == ShuffleSrc2) {
2618 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2619 } else {
2620 Mask[I] = -1;
2621 }
2622 }
2623 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2624 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2625 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2626 auto Src = GetShuffleSrc(Lane);
2627 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2628 };
2629 } else if (NumConstantLanes >= NumSplatLanes) {
2630 SmallVector<SDValue, 16> ConstLanes;
2631 for (const SDValue &Lane : Op->op_values()) {
2632 if (IsConstant(Lane)) {
2633 // Values may need to be fixed so that they will sign extend to be
2634 // within the expected range during ISel. Check whether the value is in
2635 // bounds based on the lane bit width and if it is out of bounds, lop
2636 // off the extra bits.
2637 uint64_t LaneBits = 128 / Lanes;
2638 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2639 ConstLanes.push_back(DAG.getConstant(
2640 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2641 SDLoc(Lane), LaneT));
2642 } else {
2643 ConstLanes.push_back(Lane);
2644 }
2645 } else if (LaneT.isFloatingPoint()) {
2646 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2647 } else {
2648 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2649 }
2650 }
2651 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2652 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2653 return IsConstant(Lane);
2654 };
2655 } else {
2656 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2657 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2658 (DestLaneSize == 32 || DestLaneSize == 64)) {
2659 // Could be selected to load_zero.
2660 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2661 } else {
2662 // Use a splat (which might be selected as a load splat)
2663 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2664 }
2665 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2666 return Lane == SplatValue;
2667 };
2668 }
2669
2670 assert(Result);
2671 assert(IsLaneConstructed);
2672
2673 // Add replace_lane instructions for any unhandled values
2674 for (size_t I = 0; I < Lanes; ++I) {
2675 const SDValue &Lane = Op->getOperand(I);
2676 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2677 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2678 DAG.getConstant(I, DL, MVT::i32));
2679 }
2680
2681 return Result;
2682}
2683
2684SDValue
2685WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2686 SelectionDAG &DAG) const {
2687 SDLoc DL(Op);
2688 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2689 MVT VecType = Op.getOperand(0).getSimpleValueType();
2690 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2691 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2692
2693 // Space for two vector args and sixteen mask indices
2694 SDValue Ops[18];
2695 size_t OpIdx = 0;
2696 Ops[OpIdx++] = Op.getOperand(0);
2697 Ops[OpIdx++] = Op.getOperand(1);
2698
2699 // Expand mask indices to byte indices and materialize them as operands
2700 for (int M : Mask) {
2701 for (size_t J = 0; J < LaneBytes; ++J) {
2702 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2703 // whole lane of vector input, to allow further reduction at VM. E.g.
2704 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2705 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2706 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2707 }
2708 }
2709
2710 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2711}
2712
2713SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2714 SelectionDAG &DAG) const {
2715 SDLoc DL(Op);
2716 // The legalizer does not know how to expand the unsupported comparison modes
2717 // of i64x2 vectors, so we manually unroll them here.
2718 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2720 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2721 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2722 const SDValue &CC = Op->getOperand(2);
2723 auto MakeLane = [&](unsigned I) {
2724 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2725 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2726 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2727 };
2728 return DAG.getBuildVector(Op->getValueType(0), DL,
2729 {MakeLane(0), MakeLane(1)});
2730}
2731
2732SDValue
2733WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2734 SelectionDAG &DAG) const {
2735 // Allow constant lane indices, expand variable lane indices
2736 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2737 if (isa<ConstantSDNode>(IdxNode)) {
2738 // Ensure the index type is i32 to match the tablegen patterns
2739 uint64_t Idx = IdxNode->getAsZExtVal();
2740 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2741 Ops[Op.getNumOperands() - 1] =
2742 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2743 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2744 }
2745 // Perform default expansion
2746 return SDValue();
2747}
2748
2750 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2751 // 32-bit and 64-bit unrolled shifts will have proper semantics
2752 if (LaneT.bitsGE(MVT::i32))
2753 return DAG.UnrollVectorOp(Op.getNode());
2754 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2755 SDLoc DL(Op);
2756 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2757 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2758 unsigned ShiftOpcode = Op.getOpcode();
2759 SmallVector<SDValue, 16> ShiftedElements;
2760 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2761 SmallVector<SDValue, 16> ShiftElements;
2762 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2763 SmallVector<SDValue, 16> UnrolledOps;
2764 for (size_t i = 0; i < NumLanes; ++i) {
2765 SDValue MaskedShiftValue =
2766 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2767 SDValue ShiftedValue = ShiftedElements[i];
2768 if (ShiftOpcode == ISD::SRA)
2769 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2770 ShiftedValue, DAG.getValueType(LaneT));
2771 UnrolledOps.push_back(
2772 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2773 }
2774 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2775}
2776
2777SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2778 SelectionDAG &DAG) const {
2779 SDLoc DL(Op);
2780
2781 // Only manually lower vector shifts
2782 assert(Op.getSimpleValueType().isVector());
2783
2784 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2785 auto ShiftVal = Op.getOperand(1);
2786
2787 // Try to skip bitmask operation since it is implied inside shift instruction
2788 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2789 if (MaskOp.getOpcode() != ISD::AND)
2790 return MaskOp;
2791 SDValue LHS = MaskOp.getOperand(0);
2792 SDValue RHS = MaskOp.getOperand(1);
2793 if (MaskOp.getValueType().isVector()) {
2794 APInt MaskVal;
2795 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2796 std::swap(LHS, RHS);
2797
2798 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2799 MaskVal == MaskBits)
2800 MaskOp = LHS;
2801 } else {
2802 if (!isa<ConstantSDNode>(RHS.getNode()))
2803 std::swap(LHS, RHS);
2804
2805 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2806 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2807 MaskOp = LHS;
2808 }
2809
2810 return MaskOp;
2811 };
2812
2813 // Skip vector and operation
2814 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2815 ShiftVal = DAG.getSplatValue(ShiftVal);
2816 if (!ShiftVal)
2817 return unrollVectorShift(Op, DAG);
2818
2819 // Skip scalar and operation
2820 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2821 // Use anyext because none of the high bits can affect the shift
2822 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2823
2824 unsigned Opcode;
2825 switch (Op.getOpcode()) {
2826 case ISD::SHL:
2827 Opcode = WebAssemblyISD::VEC_SHL;
2828 break;
2829 case ISD::SRA:
2830 Opcode = WebAssemblyISD::VEC_SHR_S;
2831 break;
2832 case ISD::SRL:
2833 Opcode = WebAssemblyISD::VEC_SHR_U;
2834 break;
2835 default:
2836 llvm_unreachable("unexpected opcode");
2837 }
2838
2839 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2840}
2841
2842SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2843 SelectionDAG &DAG) const {
2844 EVT ResT = Op.getValueType();
2845 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2846
2847 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2848 (SatVT == MVT::i32 || SatVT == MVT::i64))
2849 return Op;
2850
2851 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2852 return Op;
2853
2854 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2855 return Op;
2856
2857 return SDValue();
2858}
2859
2860//===----------------------------------------------------------------------===//
2861// Custom DAG combine hooks
2862//===----------------------------------------------------------------------===//
2863static SDValue
2865 auto &DAG = DCI.DAG;
2866 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2867
2868 // Hoist vector bitcasts that don't change the number of lanes out of unary
2869 // shuffles, where they are less likely to get in the way of other combines.
2870 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2871 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2872 SDValue Bitcast = N->getOperand(0);
2873 if (Bitcast.getOpcode() != ISD::BITCAST)
2874 return SDValue();
2875 if (!N->getOperand(1).isUndef())
2876 return SDValue();
2877 SDValue CastOp = Bitcast.getOperand(0);
2878 EVT SrcType = CastOp.getValueType();
2879 EVT DstType = Bitcast.getValueType();
2880 if (!SrcType.is128BitVector() ||
2881 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2882 return SDValue();
2883 SDValue NewShuffle = DAG.getVectorShuffle(
2884 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2885 return DAG.getBitcast(DstType, NewShuffle);
2886}
2887
2888/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2889/// split up into scalar instructions during legalization, and the vector
2890/// extending instructions are selected in performVectorExtendCombine below.
2891static SDValue
2894 auto &DAG = DCI.DAG;
2895 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2896 N->getOpcode() == ISD::SINT_TO_FP);
2897
2898 EVT InVT = N->getOperand(0)->getValueType(0);
2899 EVT ResVT = N->getValueType(0);
2900 MVT ExtVT;
2901 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2902 ExtVT = MVT::v4i32;
2903 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2904 ExtVT = MVT::v2i32;
2905 else
2906 return SDValue();
2907
2908 unsigned Op =
2910 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2911 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2912}
2913
2914static SDValue
2917 auto &DAG = DCI.DAG;
2918
2919 SDNodeFlags Flags = N->getFlags();
2920 SDValue Op0 = N->getOperand(0);
2921 EVT VT = N->getValueType(0);
2922
2923 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2924 // Depending on the target (runtime) backend, this might be performance
2925 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2926 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2927 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2928 }
2929
2930 return SDValue();
2931}
2932
2933static SDValue
2935 auto &DAG = DCI.DAG;
2936 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2937 N->getOpcode() == ISD::ZERO_EXTEND);
2938
2939 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2940 // possible before the extract_subvector can be expanded.
2941 auto Extract = N->getOperand(0);
2942 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2943 return SDValue();
2944 auto Source = Extract.getOperand(0);
2945 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2946 if (IndexNode == nullptr)
2947 return SDValue();
2948 auto Index = IndexNode->getZExtValue();
2949
2950 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2951 // extracted subvector is the low or high half of its source.
2952 EVT ResVT = N->getValueType(0);
2953 if (ResVT == MVT::v8i16) {
2954 if (Extract.getValueType() != MVT::v8i8 ||
2955 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2956 return SDValue();
2957 } else if (ResVT == MVT::v4i32) {
2958 if (Extract.getValueType() != MVT::v4i16 ||
2959 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2960 return SDValue();
2961 } else if (ResVT == MVT::v2i64) {
2962 if (Extract.getValueType() != MVT::v2i32 ||
2963 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2964 return SDValue();
2965 } else {
2966 return SDValue();
2967 }
2968
2969 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2970 bool IsLow = Index == 0;
2971
2972 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2973 : WebAssemblyISD::EXTEND_HIGH_S)
2974 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2975 : WebAssemblyISD::EXTEND_HIGH_U);
2976
2977 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2978}
2979
2980static SDValue
2982 auto &DAG = DCI.DAG;
2983
2984 auto GetWasmConversionOp = [](unsigned Op) {
2985 switch (Op) {
2987 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2989 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2990 case ISD::FP_ROUND:
2991 return WebAssemblyISD::DEMOTE_ZERO;
2992 }
2993 llvm_unreachable("unexpected op");
2994 };
2995
2996 auto IsZeroSplat = [](SDValue SplatVal) {
2997 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
2998 APInt SplatValue, SplatUndef;
2999 unsigned SplatBitSize;
3000 bool HasAnyUndefs;
3001 // Endianness doesn't matter in this context because we are looking for
3002 // an all-zero value.
3003 return Splat &&
3004 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3005 HasAnyUndefs) &&
3006 SplatValue == 0;
3007 };
3008
3009 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3010 // Combine this:
3011 //
3012 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3013 //
3014 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3015 //
3016 // Or this:
3017 //
3018 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3019 //
3020 // into (f32x4.demote_zero_f64x2 $x).
3021 EVT ResVT;
3022 EVT ExpectedConversionType;
3023 auto Conversion = N->getOperand(0);
3024 auto ConversionOp = Conversion.getOpcode();
3025 switch (ConversionOp) {
3028 ResVT = MVT::v4i32;
3029 ExpectedConversionType = MVT::v2i32;
3030 break;
3031 case ISD::FP_ROUND:
3032 ResVT = MVT::v4f32;
3033 ExpectedConversionType = MVT::v2f32;
3034 break;
3035 default:
3036 return SDValue();
3037 }
3038
3039 if (N->getValueType(0) != ResVT)
3040 return SDValue();
3041
3042 if (Conversion.getValueType() != ExpectedConversionType)
3043 return SDValue();
3044
3045 auto Source = Conversion.getOperand(0);
3046 if (Source.getValueType() != MVT::v2f64)
3047 return SDValue();
3048
3049 if (!IsZeroSplat(N->getOperand(1)) ||
3050 N->getOperand(1).getValueType() != ExpectedConversionType)
3051 return SDValue();
3052
3053 unsigned Op = GetWasmConversionOp(ConversionOp);
3054 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3055 }
3056
3057 // Combine this:
3058 //
3059 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3060 //
3061 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3062 //
3063 // Or this:
3064 //
3065 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3066 //
3067 // into (f32x4.demote_zero_f64x2 $x).
3068 EVT ResVT;
3069 auto ConversionOp = N->getOpcode();
3070 switch (ConversionOp) {
3073 ResVT = MVT::v4i32;
3074 break;
3075 case ISD::FP_ROUND:
3076 ResVT = MVT::v4f32;
3077 break;
3078 default:
3079 llvm_unreachable("unexpected op");
3080 }
3081
3082 if (N->getValueType(0) != ResVT)
3083 return SDValue();
3084
3085 auto Concat = N->getOperand(0);
3086 if (Concat.getValueType() != MVT::v4f64)
3087 return SDValue();
3088
3089 auto Source = Concat.getOperand(0);
3090 if (Source.getValueType() != MVT::v2f64)
3091 return SDValue();
3092
3093 if (!IsZeroSplat(Concat.getOperand(1)) ||
3094 Concat.getOperand(1).getValueType() != MVT::v2f64)
3095 return SDValue();
3096
3097 unsigned Op = GetWasmConversionOp(ConversionOp);
3098 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3099}
3100
3101// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3102static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3103 const SDLoc &DL, unsigned VectorWidth) {
3104 EVT VT = Vec.getValueType();
3105 EVT ElVT = VT.getVectorElementType();
3106 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3107 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3108 VT.getVectorNumElements() / Factor);
3109
3110 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3111 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3112 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3113
3114 // This is the index of the first element of the VectorWidth-bit chunk
3115 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3116 IdxVal &= ~(ElemsPerChunk - 1);
3117
3118 // If the input is a buildvector just emit a smaller one.
3119 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3120 return DAG.getBuildVector(ResultVT, DL,
3121 Vec->ops().slice(IdxVal, ElemsPerChunk));
3122
3123 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3124 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3125}
3126
3127// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3128// is the expected destination value type after recursion. In is the initial
3129// input. Note that the input should have enough leading zero bits to prevent
3130// NARROW_U from saturating results.
3132 SelectionDAG &DAG) {
3133 EVT SrcVT = In.getValueType();
3134
3135 // No truncation required, we might get here due to recursive calls.
3136 if (SrcVT == DstVT)
3137 return In;
3138
3139 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3140 unsigned NumElems = SrcVT.getVectorNumElements();
3141 if (!isPowerOf2_32(NumElems))
3142 return SDValue();
3143 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3144 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3145
3146 LLVMContext &Ctx = *DAG.getContext();
3147 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3148
3149 // Narrow to the largest type possible:
3150 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3151 EVT InVT = MVT::i16, OutVT = MVT::i8;
3152 if (SrcVT.getScalarSizeInBits() > 16) {
3153 InVT = MVT::i32;
3154 OutVT = MVT::i16;
3155 }
3156 unsigned SubSizeInBits = SrcSizeInBits / 2;
3157 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3158 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3159
3160 // Split lower/upper subvectors.
3161 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3162 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3163
3164 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3165 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3166 Lo = DAG.getBitcast(InVT, Lo);
3167 Hi = DAG.getBitcast(InVT, Hi);
3168 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3169 return DAG.getBitcast(DstVT, Res);
3170 }
3171
3172 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3173 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3174 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3175 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3176
3177 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3178 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3179 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3180}
3181
3184 auto &DAG = DCI.DAG;
3185
3186 SDValue In = N->getOperand(0);
3187 EVT InVT = In.getValueType();
3188 if (!InVT.isSimple())
3189 return SDValue();
3190
3191 EVT OutVT = N->getValueType(0);
3192 if (!OutVT.isVector())
3193 return SDValue();
3194
3195 EVT OutSVT = OutVT.getVectorElementType();
3196 EVT InSVT = InVT.getVectorElementType();
3197 // Currently only cover truncate to v16i8 or v8i16.
3198 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3199 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3200 return SDValue();
3201
3202 SDLoc DL(N);
3204 OutVT.getScalarSizeInBits());
3205 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3206 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3207}
3208
3211 using namespace llvm::SDPatternMatch;
3212 auto &DAG = DCI.DAG;
3213 SDLoc DL(N);
3214 SDValue Src = N->getOperand(0);
3215 EVT VT = N->getValueType(0);
3216 EVT SrcVT = Src.getValueType();
3217
3218 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3219 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3220 return SDValue();
3221
3222 unsigned NumElts = SrcVT.getVectorNumElements();
3223 EVT Width = MVT::getIntegerVT(128 / NumElts);
3224
3225 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3226 // ==> bitmask
3227 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3228 return DAG.getZExtOrTrunc(
3229 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3230 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3231 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3232 SrcVT.changeVectorElementType(
3233 *DAG.getContext(), Width))}),
3234 DL, VT);
3235 }
3236
3237 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3238 if (NumElts == 32 || NumElts == 64) {
3239 // Strategy: We will setcc them separately in v16i8 -> v16i1
3240 // Bitcast them to i16, extend them to either i32 or i64.
3241 // Add them together, shifting left 1 by 1.
3242 SDValue Concat, SetCCVector;
3243 ISD::CondCode SetCond;
3244
3245 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3246 m_CondCode(SetCond)))))
3247 return SDValue();
3248 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3249 return SDValue();
3250
3251 uint64_t ElementWidth =
3253
3254 SmallVector<SDValue> VectorsToShuffle;
3255 for (size_t I = 0; I < Concat->ops().size(); I++) {
3256 VectorsToShuffle.push_back(DAG.getBitcast(
3257 MVT::i16,
3258 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3259 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3260 DAG, DL, 128),
3261 SetCond)));
3262 }
3263
3264 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3265 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3266
3267 for (SDValue V : VectorsToShuffle) {
3268 ReturningInteger = DAG.getNode(
3269 ISD::SHL, DL, ReturnType,
3270 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3271
3272 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3273 ReturningInteger =
3274 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3275 }
3276
3277 return ReturningInteger;
3278 }
3279
3280 return SDValue();
3281}
3282
3284 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3285 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3286 // any_true (setcc <X>, 0, ne) => (any_true X)
3287 // all_true (setcc <X>, 0, ne) => (all_true X)
3288 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3289 using namespace llvm::SDPatternMatch;
3290
3291 SDValue LHS;
3292 if (N->getNumOperands() < 2 ||
3293 !sd_match(N->getOperand(1),
3295 return SDValue();
3296 EVT LT = LHS.getValueType();
3297 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3298 return SDValue();
3299
3300 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3301 ISD::CondCode SetType,
3302 Intrinsic::WASMIntrinsics InPost) {
3303 if (N->getConstantOperandVal(0) != InPre)
3304 return SDValue();
3305
3306 SDValue LHS;
3307 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3308 m_SpecificCondCode(SetType))))
3309 return SDValue();
3310
3311 SDLoc DL(N);
3312 SDValue Ret = DAG.getZExtOrTrunc(
3313 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3314 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3315 DL, MVT::i1);
3316 if (SetType == ISD::SETEQ)
3317 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3318 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3319 };
3320
3321 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3322 Intrinsic::wasm_alltrue))
3323 return AnyTrueEQ;
3324 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3325 Intrinsic::wasm_anytrue))
3326 return AllTrueEQ;
3327 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3328 Intrinsic::wasm_anytrue))
3329 return AnyTrueNE;
3330 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3331 Intrinsic::wasm_alltrue))
3332 return AllTrueNE;
3333
3334 return SDValue();
3335}
3336
3337template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3338 Intrinsic::ID Intrin>
3340 SDValue LHS = N->getOperand(0);
3341 SDValue RHS = N->getOperand(1);
3342 SDValue Cond = N->getOperand(2);
3343 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3344 return SDValue();
3345
3346 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3347 return SDValue();
3348
3349 SDLoc DL(N);
3350 SDValue Ret = DAG.getZExtOrTrunc(
3351 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3352 {DAG.getConstant(Intrin, DL, MVT::i32),
3353 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3354 DL, MVT::i1);
3355 if (RequiresNegate)
3356 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3357 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3358}
3359
3360/// Try to convert a i128 comparison to a v16i8 comparison before type
3361/// legalization splits it up into chunks
3362static SDValue
3364 const WebAssemblySubtarget *Subtarget) {
3365
3366 SDLoc DL(N);
3367 SDValue X = N->getOperand(0);
3368 SDValue Y = N->getOperand(1);
3369 EVT VT = N->getValueType(0);
3370 EVT OpVT = X.getValueType();
3371
3372 SelectionDAG &DAG = DCI.DAG;
3374 Attribute::NoImplicitFloat))
3375 return SDValue();
3376
3377 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3378 // We're looking for an oversized integer equality comparison with SIMD
3379 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3380 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3381 return SDValue();
3382
3383 // Don't perform this combine if constructing the vector will be expensive.
3384 auto IsVectorBitCastCheap = [](SDValue X) {
3386 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3387 };
3388
3389 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3390 return SDValue();
3391
3392 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3393 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3394 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3395
3396 SDValue Intr =
3397 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3398 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3399 : Intrinsic::wasm_anytrue,
3400 DL, MVT::i32),
3401 Cmp});
3402
3403 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3404 ISD::SETNE);
3405}
3406
3409 const WebAssemblySubtarget *Subtarget) {
3410 if (!DCI.isBeforeLegalize())
3411 return SDValue();
3412
3413 EVT VT = N->getValueType(0);
3414 if (!VT.isScalarInteger())
3415 return SDValue();
3416
3417 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3418 return V;
3419
3420 SDValue LHS = N->getOperand(0);
3421 if (LHS->getOpcode() != ISD::BITCAST)
3422 return SDValue();
3423
3424 EVT FromVT = LHS->getOperand(0).getValueType();
3425 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3426 return SDValue();
3427
3428 unsigned NumElts = FromVT.getVectorNumElements();
3429 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3430 return SDValue();
3431
3432 if (!cast<ConstantSDNode>(N->getOperand(1)))
3433 return SDValue();
3434
3435 auto &DAG = DCI.DAG;
3436 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3437 MVT::getIntegerVT(128 / NumElts));
3438 // setcc (iN (bitcast (vNi1 X))), 0, ne
3439 // ==> any_true (vNi1 X)
3441 N, VecVT, DAG)) {
3442 return Match;
3443 }
3444 // setcc (iN (bitcast (vNi1 X))), 0, eq
3445 // ==> xor (any_true (vNi1 X)), -1
3447 N, VecVT, DAG)) {
3448 return Match;
3449 }
3450 // setcc (iN (bitcast (vNi1 X))), -1, eq
3451 // ==> all_true (vNi1 X)
3453 N, VecVT, DAG)) {
3454 return Match;
3455 }
3456 // setcc (iN (bitcast (vNi1 X))), -1, ne
3457 // ==> xor (all_true (vNi1 X)), -1
3459 N, VecVT, DAG)) {
3460 return Match;
3461 }
3462 return SDValue();
3463}
3464
3466 EVT VT = N->getValueType(0);
3467 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3468 return SDValue();
3469
3470 // Mul with extending inputs.
3471 SDValue LHS = N->getOperand(0);
3472 SDValue RHS = N->getOperand(1);
3473 if (LHS.getOpcode() != RHS.getOpcode())
3474 return SDValue();
3475
3476 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3477 LHS.getOpcode() != ISD::ZERO_EXTEND)
3478 return SDValue();
3479
3480 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3481 return SDValue();
3482
3483 EVT FromVT = LHS->getOperand(0).getValueType();
3484 EVT EltTy = FromVT.getVectorElementType();
3485 if (EltTy != MVT::i8)
3486 return SDValue();
3487
3488 // For an input DAG that looks like this
3489 // %a = input_type
3490 // %b = input_type
3491 // %lhs = extend %a to output_type
3492 // %rhs = extend %b to output_type
3493 // %mul = mul %lhs, %rhs
3494
3495 // input_type | output_type | instructions
3496 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3497 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3498 // | | %low_low = i32x4.ext_low_i16x8_ %low
3499 // | | %low_high = i32x4.ext_high_i16x8_ %low
3500 // | | %high_low = i32x4.ext_low_i16x8_ %high
3501 // | | %high_high = i32x4.ext_high_i16x8_ %high
3502 // | | %res = concat_vector(...)
3503 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3504 // | | %low_low = i32x4.ext_low_i16x8_ %low
3505 // | | %low_high = i32x4.ext_high_i16x8_ %low
3506 // | | %res = concat_vector(%low_low, %low_high)
3507
3508 SDLoc DL(N);
3509 unsigned NumElts = VT.getVectorNumElements();
3510 SDValue ExtendInLHS = LHS->getOperand(0);
3511 SDValue ExtendInRHS = RHS->getOperand(0);
3512 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3513 unsigned ExtendLowOpc =
3514 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3515 unsigned ExtendHighOpc =
3516 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3517
3518 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3519 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3520 };
3521 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3522 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3523 };
3524
3525 if (NumElts == 16) {
3526 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3527 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3528 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3529 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3530 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3531 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3532 SDValue SubVectors[] = {
3533 GetExtendLow(MVT::v4i32, MulLow),
3534 GetExtendHigh(MVT::v4i32, MulLow),
3535 GetExtendLow(MVT::v4i32, MulHigh),
3536 GetExtendHigh(MVT::v4i32, MulHigh),
3537 };
3538 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3539 } else {
3540 assert(NumElts == 8);
3541 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3542 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3543 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3544 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3545 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3546 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3547 }
3548 return SDValue();
3549}
3550
3553 assert(N->getOpcode() == ISD::MUL);
3554 EVT VT = N->getValueType(0);
3555 if (!VT.isVector())
3556 return SDValue();
3557
3558 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3559 return Res;
3560
3561 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3562 // extend them to v8i16.
3563 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3564 return SDValue();
3565
3566 SDLoc DL(N);
3567 SelectionDAG &DAG = DCI.DAG;
3568 SDValue LHS = N->getOperand(0);
3569 SDValue RHS = N->getOperand(1);
3570 EVT MulVT = MVT::v8i16;
3571
3572 if (VT == MVT::v8i8) {
3573 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3574 DAG.getUNDEF(MVT::v8i8));
3575 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3576 DAG.getUNDEF(MVT::v8i8));
3577 SDValue LowLHS =
3578 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3579 SDValue LowRHS =
3580 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3581 SDValue MulLow = DAG.getBitcast(
3582 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3583 // Take the low byte of each lane.
3584 SDValue Shuffle = DAG.getVectorShuffle(
3585 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3586 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3587 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3588 } else {
3589 assert(VT == MVT::v16i8 && "Expected v16i8");
3590 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3591 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3592 SDValue HighLHS =
3593 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3594 SDValue HighRHS =
3595 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3596
3597 SDValue MulLow =
3598 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3599 SDValue MulHigh =
3600 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3601
3602 // Take the low byte of each lane.
3603 return DAG.getVectorShuffle(
3604 VT, DL, MulLow, MulHigh,
3605 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3606 }
3607}
3608
3609SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3610 SelectionDAG &DAG) {
3611 SDLoc DL(In);
3612 LLVMContext &Ctx = *DAG.getContext();
3613 EVT InVT = In.getValueType();
3614 unsigned NumElems = InVT.getVectorNumElements() * 2;
3615 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3616 SDValue Concat =
3617 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3618 if (NumElems < RequiredNumElems) {
3619 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3620 }
3621 return Concat;
3622}
3623
3625 EVT OutVT = N->getValueType(0);
3626 if (!OutVT.isVector())
3627 return SDValue();
3628
3629 EVT OutElTy = OutVT.getVectorElementType();
3630 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3631 return SDValue();
3632
3633 unsigned NumElems = OutVT.getVectorNumElements();
3634 if (!isPowerOf2_32(NumElems))
3635 return SDValue();
3636
3637 EVT FPVT = N->getOperand(0)->getValueType(0);
3638 if (FPVT.getVectorElementType() != MVT::f32)
3639 return SDValue();
3640
3641 SDLoc DL(N);
3642
3643 // First, convert to i32.
3644 LLVMContext &Ctx = *DAG.getContext();
3645 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3646 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3648 OutVT.getScalarSizeInBits());
3649 // Mask out the top MSBs.
3650 SDValue Masked =
3651 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3652
3653 if (OutVT.getSizeInBits() < 128) {
3654 // Create a wide enough vector that we can use narrow.
3655 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3656 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3657 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3658 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3659 return DAG.getBitcast(
3660 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3661 } else {
3662 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3663 }
3664 return SDValue();
3665}
3666
3667SDValue
3668WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3669 DAGCombinerInfo &DCI) const {
3670 switch (N->getOpcode()) {
3671 default:
3672 return SDValue();
3673 case ISD::BITCAST:
3674 return performBitcastCombine(N, DCI);
3675 case ISD::SETCC:
3676 return performSETCCCombine(N, DCI, Subtarget);
3678 return performVECTOR_SHUFFLECombine(N, DCI);
3679 case ISD::SIGN_EXTEND:
3680 case ISD::ZERO_EXTEND:
3681 return performVectorExtendCombine(N, DCI);
3682 case ISD::UINT_TO_FP:
3683 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3684 return ExtCombine;
3685 return performVectorNonNegToFPCombine(N, DCI);
3686 case ISD::SINT_TO_FP:
3687 return performVectorExtendToFPCombine(N, DCI);
3690 case ISD::FP_ROUND:
3692 return performVectorTruncZeroCombine(N, DCI);
3693 case ISD::FP_TO_SINT:
3694 case ISD::FP_TO_UINT:
3695 return performConvertFPCombine(N, DCI.DAG);
3696 case ISD::TRUNCATE:
3697 return performTruncateCombine(N, DCI);
3699 return performAnyAllCombine(N, DCI.DAG);
3700 case ISD::MUL:
3701 return performMulCombine(N, DCI);
3702 }
3703}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1394
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:712
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:778
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:909
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:992
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:810
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:898
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:887
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:977
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:958
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:920
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:944
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:832
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isValidAddressSpace(unsigned AS)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2078
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:444
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.