LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
162 setTruncStoreAction(T, MVT::f16, Expand);
163 }
164
165 // Expand unavailable integer operations.
166 for (auto Op :
170 for (auto T : {MVT::i32, MVT::i64})
172 if (Subtarget->hasSIMD128())
173 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175 }
176
177 if (Subtarget->hasWideArithmetic()) {
183 }
184
185 if (Subtarget->hasNontrappingFPToInt())
187 for (auto T : {MVT::i32, MVT::i64})
189
190 if (Subtarget->hasRelaxedSIMD()) {
193 {MVT::v4f32, MVT::v2f64}, Legal);
194 }
195 // SIMD-specific configuration
196 if (Subtarget->hasSIMD128()) {
197
199
200 // Combine wide-vector muls, with extend inputs, to extmul_half.
202
203 // Combine vector mask reductions into alltrue/anytrue
205
206 // Convert vector to integer bitcasts to bitmask
208
209 // Hoist bitcasts out of shuffles
211
212 // Combine extends of extract_subvectors into widening ops
214
215 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
216 // conversions ops
219
220 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
221 // into conversion ops
225
227
228 // Support saturating add/sub for i8x16 and i16x8
230 for (auto T : {MVT::v16i8, MVT::v8i16})
232
233 // Support integer abs
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
236
237 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
238 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
239 MVT::v2f64})
241
242 if (Subtarget->hasFP16())
244
245 // We have custom shuffle lowering to expose the shuffle mask
246 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
247 MVT::v2f64})
249
250 if (Subtarget->hasFP16())
252
253 // Support splatting
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
259
260 // Custom lowering since wasm shifts must have a scalar shift amount
261 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
264
265 // Custom lower lane accesses to expand out variable indices
267 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
268 MVT::v2f64})
270
271 // There is no i8x16.mul instruction
272 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
273
274 // There is no vector conditional select instruction
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
278
279 // Expand integer operations supported for scalars but not SIMD
280 for (auto Op :
282 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
284
285 // But we do have integer min and max operations
286 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
287 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
289
290 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
291 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
292 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
293 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
294
295 // Custom lower bit counting operations for other types to scalarize them.
296 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
297 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
299
300 // Expand float operations supported for scalars but not SIMD
303 for (auto T : {MVT::v4f32, MVT::v2f64})
305
306 // Unsigned comparison operations are unavailable for i64x2 vectors.
308 setCondCodeAction(CC, MVT::v2i64, Custom);
309
310 // 64x2 conversions are not in the spec
311 for (auto Op :
313 for (auto T : {MVT::v2i64, MVT::v2f64})
315
316 // But saturating fp_to_int converstions are
318 setOperationAction(Op, MVT::v4i32, Custom);
319 if (Subtarget->hasFP16()) {
320 setOperationAction(Op, MVT::v8i16, Custom);
321 }
322 }
323
324 // Support vector extending
329 }
330
331 if (Subtarget->hasFP16()) {
332 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
333 }
334
335 if (Subtarget->hasRelaxedSIMD()) {
338 }
339
340 // Partial MLA reductions.
342 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
343 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
344 }
345 }
346
347 // As a special case, these operators use the type to mean the type to
348 // sign-extend from.
350 if (!Subtarget->hasSignExt()) {
351 // Sign extends are legal only when extending a vector extract
352 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
353 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
355 }
358
359 // Dynamic stack allocation: use the default expansion.
363
367
368 // Expand these forms; we pattern-match the forms that we can handle in isel.
369 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
370 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
372
373 // We have custom switch handling.
375
376 // WebAssembly doesn't have:
377 // - Floating-point extending loads.
378 // - Floating-point truncating stores.
379 // - i1 extending loads.
380 // - truncating SIMD stores and most extending loads
381 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
382 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
383 for (auto T : MVT::integer_valuetypes())
384 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
385 setLoadExtAction(Ext, T, MVT::i1, Promote);
386 if (Subtarget->hasSIMD128()) {
387 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
388 MVT::v2f64}) {
389 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
390 if (MVT(T) != MemT) {
392 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
393 setLoadExtAction(Ext, T, MemT, Expand);
394 }
395 }
396 }
397 // But some vector extending loads are legal
398 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
399 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
400 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
401 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
402 }
403 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
404 }
405
406 // Don't do anything clever with build_pairs
408
409 // Trap lowers to wasm unreachable
410 setOperationAction(ISD::TRAP, MVT::Other, Legal);
412
413 // Exception handling intrinsics
417
419
420 // Always convert switches to br_tables unless there is only one case, which
421 // is equivalent to a simple branch. This reduces code size for wasm, and we
422 // defer possible jump table optimizations to the VM.
424}
425
434
443
445WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
446 const AtomicRMWInst *AI) const {
447 // We have wasm instructions for these
448 switch (AI->getOperation()) {
456 default:
457 break;
458 }
460}
461
462bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
463 // Implementation copied from X86TargetLowering.
464 unsigned Opc = VecOp.getOpcode();
465
466 // Assume target opcodes can't be scalarized.
467 // TODO - do we have any exceptions?
469 return false;
470
471 // If the vector op is not supported, try to convert to scalar.
472 EVT VecVT = VecOp.getValueType();
474 return true;
475
476 // If the vector op is supported, but the scalar op is not, the transform may
477 // not be worthwhile.
478 EVT ScalarVT = VecVT.getScalarType();
479 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
480}
481
482FastISel *WebAssemblyTargetLowering::createFastISel(
483 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
484 const LibcallLoweringInfo *LibcallLowering) const {
485 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
486}
487
488MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
489 EVT VT) const {
490 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
491 if (BitWidth > 1 && BitWidth < 8)
492 BitWidth = 8;
493
494 if (BitWidth > 64) {
495 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
496 // the count to be an i32.
497 BitWidth = 32;
499 "32-bit shift counts ought to be enough for anyone");
500 }
501
504 "Unable to represent scalar shift amount type");
505 return Result;
506}
507
508// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
509// undefined result on invalid/overflow, to the WebAssembly opcode, which
510// traps on invalid/overflow.
513 const TargetInstrInfo &TII,
514 bool IsUnsigned, bool Int64,
515 bool Float64, unsigned LoweredOpcode) {
517
518 Register OutReg = MI.getOperand(0).getReg();
519 Register InReg = MI.getOperand(1).getReg();
520
521 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
522 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
523 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
524 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
525 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
526 unsigned Eqz = WebAssembly::EQZ_I32;
527 unsigned And = WebAssembly::AND_I32;
528 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
529 int64_t Substitute = IsUnsigned ? 0 : Limit;
530 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
531 auto &Context = BB->getParent()->getFunction().getContext();
532 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
533
534 const BasicBlock *LLVMBB = BB->getBasicBlock();
535 MachineFunction *F = BB->getParent();
536 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
537 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
538 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
539
541 F->insert(It, FalseMBB);
542 F->insert(It, TrueMBB);
543 F->insert(It, DoneMBB);
544
545 // Transfer the remainder of BB and its successor edges to DoneMBB.
546 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
548
549 BB->addSuccessor(TrueMBB);
550 BB->addSuccessor(FalseMBB);
551 TrueMBB->addSuccessor(DoneMBB);
552 FalseMBB->addSuccessor(DoneMBB);
553
554 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
555 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
556 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
557 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
558 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
559 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
560 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
561
562 MI.eraseFromParent();
563 // For signed numbers, we can do a single comparison to determine whether
564 // fabs(x) is within range.
565 if (IsUnsigned) {
566 Tmp0 = InReg;
567 } else {
568 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
569 }
570 BuildMI(BB, DL, TII.get(FConst), Tmp1)
571 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
572 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
573
574 // For unsigned numbers, we have to do a separate comparison with zero.
575 if (IsUnsigned) {
576 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
577 Register SecondCmpReg =
578 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
579 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
580 BuildMI(BB, DL, TII.get(FConst), Tmp1)
581 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
582 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
583 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
584 CmpReg = AndReg;
585 }
586
587 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
588
589 // Create the CFG diamond to select between doing the conversion or using
590 // the substitute value.
591 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
592 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
593 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
594 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
595 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
596 .addReg(FalseReg)
597 .addMBB(FalseMBB)
598 .addReg(TrueReg)
599 .addMBB(TrueMBB);
600
601 return DoneMBB;
602}
603
604// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
605// instuction to handle the zero-length case.
608 const TargetInstrInfo &TII, bool Int64) {
610
611 MachineOperand DstMem = MI.getOperand(0);
612 MachineOperand SrcMem = MI.getOperand(1);
613 MachineOperand Dst = MI.getOperand(2);
614 MachineOperand Src = MI.getOperand(3);
615 MachineOperand Len = MI.getOperand(4);
616
617 // If the length is a constant, we don't actually need the check.
618 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
619 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
620 Def->getOpcode() == WebAssembly::CONST_I64) {
621 if (Def->getOperand(1).getImm() == 0) {
622 // A zero-length memcpy is a no-op.
623 MI.eraseFromParent();
624 return BB;
625 }
626 // A non-zero-length memcpy doesn't need a zero check.
627 unsigned MemoryCopy =
628 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
629 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
630 .add(DstMem)
631 .add(SrcMem)
632 .add(Dst)
633 .add(Src)
634 .add(Len);
635 MI.eraseFromParent();
636 return BB;
637 }
638 }
639
640 // We're going to add an extra use to `Len` to test if it's zero; that
641 // use shouldn't be a kill, even if the original use is.
642 MachineOperand NoKillLen = Len;
643 NoKillLen.setIsKill(false);
644
645 // Decide on which `MachineInstr` opcode we're going to use.
646 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
647 unsigned MemoryCopy =
648 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
649
650 // Create two new basic blocks; one for the new `memory.fill` that we can
651 // branch over, and one for the rest of the instructions after the original
652 // `memory.fill`.
653 const BasicBlock *LLVMBB = BB->getBasicBlock();
654 MachineFunction *F = BB->getParent();
655 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
656 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
657
659 F->insert(It, TrueMBB);
660 F->insert(It, DoneMBB);
661
662 // Transfer the remainder of BB and its successor edges to DoneMBB.
663 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
665
666 // Connect the CFG edges.
667 BB->addSuccessor(TrueMBB);
668 BB->addSuccessor(DoneMBB);
669 TrueMBB->addSuccessor(DoneMBB);
670
671 // Create a virtual register for the `Eqz` result.
672 unsigned EqzReg;
673 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
674
675 // Erase the original `memory.copy`.
676 MI.eraseFromParent();
677
678 // Test if `Len` is zero.
679 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
680
681 // Insert a new `memory.copy`.
682 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
683 .add(DstMem)
684 .add(SrcMem)
685 .add(Dst)
686 .add(Src)
687 .add(Len);
688
689 // Create the CFG triangle.
690 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
691 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
692
693 return DoneMBB;
694}
695
696// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
697// instuction to handle the zero-length case.
700 const TargetInstrInfo &TII, bool Int64) {
702
703 MachineOperand Mem = MI.getOperand(0);
704 MachineOperand Dst = MI.getOperand(1);
705 MachineOperand Val = MI.getOperand(2);
706 MachineOperand Len = MI.getOperand(3);
707
708 // If the length is a constant, we don't actually need the check.
709 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
710 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
711 Def->getOpcode() == WebAssembly::CONST_I64) {
712 if (Def->getOperand(1).getImm() == 0) {
713 // A zero-length memset is a no-op.
714 MI.eraseFromParent();
715 return BB;
716 }
717 // A non-zero-length memset doesn't need a zero check.
718 unsigned MemoryFill =
719 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
720 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
721 .add(Mem)
722 .add(Dst)
723 .add(Val)
724 .add(Len);
725 MI.eraseFromParent();
726 return BB;
727 }
728 }
729
730 // We're going to add an extra use to `Len` to test if it's zero; that
731 // use shouldn't be a kill, even if the original use is.
732 MachineOperand NoKillLen = Len;
733 NoKillLen.setIsKill(false);
734
735 // Decide on which `MachineInstr` opcode we're going to use.
736 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
737 unsigned MemoryFill =
738 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
739
740 // Create two new basic blocks; one for the new `memory.fill` that we can
741 // branch over, and one for the rest of the instructions after the original
742 // `memory.fill`.
743 const BasicBlock *LLVMBB = BB->getBasicBlock();
744 MachineFunction *F = BB->getParent();
745 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
746 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
747
749 F->insert(It, TrueMBB);
750 F->insert(It, DoneMBB);
751
752 // Transfer the remainder of BB and its successor edges to DoneMBB.
753 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
755
756 // Connect the CFG edges.
757 BB->addSuccessor(TrueMBB);
758 BB->addSuccessor(DoneMBB);
759 TrueMBB->addSuccessor(DoneMBB);
760
761 // Create a virtual register for the `Eqz` result.
762 unsigned EqzReg;
763 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
764
765 // Erase the original `memory.fill`.
766 MI.eraseFromParent();
767
768 // Test if `Len` is zero.
769 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
770
771 // Insert a new `memory.copy`.
772 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
773
774 // Create the CFG triangle.
775 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
776 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
777
778 return DoneMBB;
779}
780
781static MachineBasicBlock *
783 const WebAssemblySubtarget *Subtarget,
784 const TargetInstrInfo &TII) {
785 MachineInstr &CallParams = *CallResults.getPrevNode();
786 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
787 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
788 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
789
790 bool IsIndirect =
791 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
792 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
793
794 bool IsFuncrefCall = false;
795 if (IsIndirect && CallParams.getOperand(0).isReg()) {
796 Register Reg = CallParams.getOperand(0).getReg();
797 const MachineFunction *MF = BB->getParent();
798 const MachineRegisterInfo &MRI = MF->getRegInfo();
799 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
800 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
801 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
802 }
803
804 unsigned CallOp;
805 if (IsIndirect && IsRetCall) {
806 CallOp = WebAssembly::RET_CALL_INDIRECT;
807 } else if (IsIndirect) {
808 CallOp = WebAssembly::CALL_INDIRECT;
809 } else if (IsRetCall) {
810 CallOp = WebAssembly::RET_CALL;
811 } else {
812 CallOp = WebAssembly::CALL;
813 }
814
815 MachineFunction &MF = *BB->getParent();
816 const MCInstrDesc &MCID = TII.get(CallOp);
817 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
818
819 // Move the function pointer to the end of the arguments for indirect calls
820 if (IsIndirect) {
821 auto FnPtr = CallParams.getOperand(0);
822 CallParams.removeOperand(0);
823
824 // For funcrefs, call_indirect is done through __funcref_call_table and the
825 // funcref is always installed in slot 0 of the table, therefore instead of
826 // having the function pointer added at the end of the params list, a zero
827 // (the index in
828 // __funcref_call_table is added).
829 if (IsFuncrefCall) {
830 Register RegZero =
831 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
832 MachineInstrBuilder MIBC0 =
833 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
834
835 BB->insert(CallResults.getIterator(), MIBC0);
836 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
837 } else
838 CallParams.addOperand(FnPtr);
839 }
840
841 for (auto Def : CallResults.defs())
842 MIB.add(Def);
843
844 if (IsIndirect) {
845 // Placeholder for the type index.
846 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
847 MIB.addImm(0);
848 // The table into which this call_indirect indexes.
849 MCSymbolWasm *Table = IsFuncrefCall
851 MF.getContext(), Subtarget)
853 MF.getContext(), Subtarget);
854 if (Subtarget->hasCallIndirectOverlong()) {
855 MIB.addSym(Table);
856 } else {
857 // For the MVP there is at most one table whose number is 0, but we can't
858 // write a table symbol or issue relocations. Instead we just ensure the
859 // table is live and write a zero.
860 Table->setNoStrip();
861 MIB.addImm(0);
862 }
863 }
864
865 for (auto Use : CallParams.uses())
866 MIB.add(Use);
867
868 BB->insert(CallResults.getIterator(), MIB);
869 CallParams.eraseFromParent();
870 CallResults.eraseFromParent();
871
872 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
873 // table slot with ref.null upon call_indirect return.
874 //
875 // This generates the following code, which comes right after a call_indirect
876 // of a funcref:
877 //
878 // i32.const 0
879 // ref.null func
880 // table.set __funcref_call_table
881 if (IsIndirect && IsFuncrefCall) {
883 MF.getContext(), Subtarget);
884 Register RegZero =
885 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
886 MachineInstr *Const0 =
887 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
888 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
889
890 Register RegFuncref =
891 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
892 MachineInstr *RefNull =
893 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
894 BB->insertAfter(Const0->getIterator(), RefNull);
895
896 MachineInstr *TableSet =
897 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
898 .addSym(Table)
899 .addReg(RegZero)
900 .addReg(RegFuncref);
901 BB->insertAfter(RefNull->getIterator(), TableSet);
902 }
903
904 return BB;
905}
906
907MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
908 MachineInstr &MI, MachineBasicBlock *BB) const {
909 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
910 DebugLoc DL = MI.getDebugLoc();
911
912 switch (MI.getOpcode()) {
913 default:
914 llvm_unreachable("Unexpected instr type to insert");
915 case WebAssembly::FP_TO_SINT_I32_F32:
916 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
917 WebAssembly::I32_TRUNC_S_F32);
918 case WebAssembly::FP_TO_UINT_I32_F32:
919 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
920 WebAssembly::I32_TRUNC_U_F32);
921 case WebAssembly::FP_TO_SINT_I64_F32:
922 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
923 WebAssembly::I64_TRUNC_S_F32);
924 case WebAssembly::FP_TO_UINT_I64_F32:
925 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
926 WebAssembly::I64_TRUNC_U_F32);
927 case WebAssembly::FP_TO_SINT_I32_F64:
928 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
929 WebAssembly::I32_TRUNC_S_F64);
930 case WebAssembly::FP_TO_UINT_I32_F64:
931 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
932 WebAssembly::I32_TRUNC_U_F64);
933 case WebAssembly::FP_TO_SINT_I64_F64:
934 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
935 WebAssembly::I64_TRUNC_S_F64);
936 case WebAssembly::FP_TO_UINT_I64_F64:
937 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
938 WebAssembly::I64_TRUNC_U_F64);
939 case WebAssembly::MEMCPY_A32:
940 return LowerMemcpy(MI, DL, BB, TII, false);
941 case WebAssembly::MEMCPY_A64:
942 return LowerMemcpy(MI, DL, BB, TII, true);
943 case WebAssembly::MEMSET_A32:
944 return LowerMemset(MI, DL, BB, TII, false);
945 case WebAssembly::MEMSET_A64:
946 return LowerMemset(MI, DL, BB, TII, true);
947 case WebAssembly::CALL_RESULTS:
948 case WebAssembly::RET_CALL_RESULTS:
949 return LowerCallResults(MI, DL, BB, Subtarget, TII);
950 }
951}
952
953std::pair<unsigned, const TargetRegisterClass *>
954WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
955 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
956 // First, see if this is a constraint that directly corresponds to a
957 // WebAssembly register class.
958 if (Constraint.size() == 1) {
959 switch (Constraint[0]) {
960 case 'r':
961 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
962 if (Subtarget->hasSIMD128() && VT.isVector()) {
963 if (VT.getSizeInBits() == 128)
964 return std::make_pair(0U, &WebAssembly::V128RegClass);
965 }
966 if (VT.isInteger() && !VT.isVector()) {
967 if (VT.getSizeInBits() <= 32)
968 return std::make_pair(0U, &WebAssembly::I32RegClass);
969 if (VT.getSizeInBits() <= 64)
970 return std::make_pair(0U, &WebAssembly::I64RegClass);
971 }
972 if (VT.isFloatingPoint() && !VT.isVector()) {
973 switch (VT.getSizeInBits()) {
974 case 32:
975 return std::make_pair(0U, &WebAssembly::F32RegClass);
976 case 64:
977 return std::make_pair(0U, &WebAssembly::F64RegClass);
978 default:
979 break;
980 }
981 }
982 break;
983 default:
984 break;
985 }
986 }
987
989}
990
991bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
992 // Assume ctz is a relatively cheap operation.
993 return true;
994}
995
996bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
997 // Assume clz is a relatively cheap operation.
998 return true;
999}
1000
1001bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1002 const AddrMode &AM,
1003 Type *Ty, unsigned AS,
1004 Instruction *I) const {
1005 // WebAssembly offsets are added as unsigned without wrapping. The
1006 // isLegalAddressingMode gives us no way to determine if wrapping could be
1007 // happening, so we approximate this by accepting only non-negative offsets.
1008 if (AM.BaseOffs < 0)
1009 return false;
1010
1011 // WebAssembly has no scale register operands.
1012 if (AM.Scale != 0)
1013 return false;
1014
1015 // Everything else is legal.
1016 return true;
1017}
1018
1019bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1020 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1021 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1022 // WebAssembly supports unaligned accesses, though it should be declared
1023 // with the p2align attribute on loads and stores which do so, and there
1024 // may be a performance impact. We tell LLVM they're "fast" because
1025 // for the kinds of things that LLVM uses this for (merging adjacent stores
1026 // of constants, etc.), WebAssembly implementations will either want the
1027 // unaligned access or they'll split anyway.
1028 if (Fast)
1029 *Fast = 1;
1030 return true;
1031}
1032
1033bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1034 AttributeList Attr) const {
1035 // The current thinking is that wasm engines will perform this optimization,
1036 // so we can save on code size.
1037 return true;
1038}
1039
1040bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1041 EVT ExtT = ExtVal.getValueType();
1042 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1043 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1044 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1045 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1046}
1047
1048bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1049 const GlobalAddressSDNode *GA) const {
1050 // Wasm doesn't support function addresses with offsets
1051 const GlobalValue *GV = GA->getGlobal();
1053}
1054
1055EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1056 LLVMContext &C,
1057 EVT VT) const {
1058 if (VT.isVector())
1060
1061 // So far, all branch instructions in Wasm take an I32 condition.
1062 // The default TargetLowering::getSetCCResultType returns the pointer size,
1063 // which would be useful to reduce instruction counts when testing
1064 // against 64-bit pointers/values if at some point Wasm supports that.
1065 return EVT::getIntegerVT(C, 32);
1066}
1067
1068bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1069 const CallBase &I,
1070 MachineFunction &MF,
1071 unsigned Intrinsic) const {
1072 switch (Intrinsic) {
1073 case Intrinsic::wasm_memory_atomic_notify:
1075 Info.memVT = MVT::i32;
1076 Info.ptrVal = I.getArgOperand(0);
1077 Info.offset = 0;
1078 Info.align = Align(4);
1079 // atomic.notify instruction does not really load the memory specified with
1080 // this argument, but MachineMemOperand should either be load or store, so
1081 // we set this to a load.
1082 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1083 // instructions are treated as volatiles in the backend, so we should be
1084 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1086 return true;
1087 case Intrinsic::wasm_memory_atomic_wait32:
1089 Info.memVT = MVT::i32;
1090 Info.ptrVal = I.getArgOperand(0);
1091 Info.offset = 0;
1092 Info.align = Align(4);
1094 return true;
1095 case Intrinsic::wasm_memory_atomic_wait64:
1097 Info.memVT = MVT::i64;
1098 Info.ptrVal = I.getArgOperand(0);
1099 Info.offset = 0;
1100 Info.align = Align(8);
1102 return true;
1103 case Intrinsic::wasm_loadf16_f32:
1105 Info.memVT = MVT::f16;
1106 Info.ptrVal = I.getArgOperand(0);
1107 Info.offset = 0;
1108 Info.align = Align(2);
1110 return true;
1111 case Intrinsic::wasm_storef16_f32:
1113 Info.memVT = MVT::f16;
1114 Info.ptrVal = I.getArgOperand(1);
1115 Info.offset = 0;
1116 Info.align = Align(2);
1118 return true;
1119 default:
1120 return false;
1121 }
1122}
1123
1124void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1125 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1126 const SelectionDAG &DAG, unsigned Depth) const {
1127 switch (Op.getOpcode()) {
1128 default:
1129 break;
1131 unsigned IntNo = Op.getConstantOperandVal(0);
1132 switch (IntNo) {
1133 default:
1134 break;
1135 case Intrinsic::wasm_bitmask: {
1136 unsigned BitWidth = Known.getBitWidth();
1137 EVT VT = Op.getOperand(1).getSimpleValueType();
1138 unsigned PossibleBits = VT.getVectorNumElements();
1139 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1140 Known.Zero |= ZeroMask;
1141 break;
1142 }
1143 }
1144 break;
1145 }
1146 case WebAssemblyISD::EXTEND_LOW_U:
1147 case WebAssemblyISD::EXTEND_HIGH_U: {
1148 // We know the high half, of each destination vector element, will be zero.
1149 SDValue SrcOp = Op.getOperand(0);
1150 EVT VT = SrcOp.getSimpleValueType();
1151 unsigned BitWidth = Known.getBitWidth();
1152 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1153 assert(BitWidth >= 8 && "Unexpected width!");
1155 Known.Zero |= Mask;
1156 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1157 assert(BitWidth >= 16 && "Unexpected width!");
1159 Known.Zero |= Mask;
1160 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1161 assert(BitWidth >= 32 && "Unexpected width!");
1163 Known.Zero |= Mask;
1164 }
1165 break;
1166 }
1167 // For 128-bit addition if the upper bits are all zero then it's known that
1168 // the upper bits of the result will have all bits guaranteed zero except the
1169 // first.
1170 case WebAssemblyISD::I64_ADD128:
1171 if (Op.getResNo() == 1) {
1172 SDValue LHS_HI = Op.getOperand(1);
1173 SDValue RHS_HI = Op.getOperand(3);
1174 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1175 Known.Zero.setBitsFrom(1);
1176 }
1177 break;
1178 }
1179}
1180
1182WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1183 if (VT.isFixedLengthVector()) {
1184 MVT EltVT = VT.getVectorElementType();
1185 // We have legal vector types with these lane types, so widening the
1186 // vector would let us use some of the lanes directly without having to
1187 // extend or truncate values.
1188 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1189 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1190 return TypeWidenVector;
1191 }
1192
1194}
1195
1196bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1197 const MachineFunction &MF, EVT VT) const {
1198 if (!Subtarget->hasFP16() || !VT.isVector())
1199 return false;
1200
1201 EVT ScalarVT = VT.getScalarType();
1202 if (!ScalarVT.isSimple())
1203 return false;
1204
1205 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1206}
1207
1208bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1209 SDValue Op, const TargetLoweringOpt &TLO) const {
1210 // ISel process runs DAGCombiner after legalization; this step is called
1211 // SelectionDAG optimization phase. This post-legalization combining process
1212 // runs DAGCombiner on each node, and if there was a change to be made,
1213 // re-runs legalization again on it and its user nodes to make sure
1214 // everythiing is in a legalized state.
1215 //
1216 // The legalization calls lowering routines, and we do our custom lowering for
1217 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1218 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1219 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1220 // turns unused vector elements into undefs. But this routine does not work
1221 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1222 // combination can result in a infinite loop, in which undefs are converted to
1223 // zeros in legalization and back to undefs in combining.
1224 //
1225 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1226 // running for build_vectors.
1227 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1228 return false;
1229 return true;
1230}
1231
1232//===----------------------------------------------------------------------===//
1233// WebAssembly Lowering private implementation.
1234//===----------------------------------------------------------------------===//
1235
1236//===----------------------------------------------------------------------===//
1237// Lowering Code
1238//===----------------------------------------------------------------------===//
1239
1240static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1242 DAG.getContext()->diagnose(
1243 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1244}
1245
1246// Test whether the given calling convention is supported.
1248 // We currently support the language-independent target-independent
1249 // conventions. We don't yet have a way to annotate calls with properties like
1250 // "cold", and we don't have any call-clobbered registers, so these are mostly
1251 // all handled the same.
1252 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1253 CallConv == CallingConv::Cold ||
1254 CallConv == CallingConv::PreserveMost ||
1255 CallConv == CallingConv::PreserveAll ||
1256 CallConv == CallingConv::CXX_FAST_TLS ||
1258 CallConv == CallingConv::Swift;
1259}
1260
1261SDValue
1262WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1263 SmallVectorImpl<SDValue> &InVals) const {
1264 SelectionDAG &DAG = CLI.DAG;
1265 SDLoc DL = CLI.DL;
1266 SDValue Chain = CLI.Chain;
1267 SDValue Callee = CLI.Callee;
1268 MachineFunction &MF = DAG.getMachineFunction();
1269 auto Layout = MF.getDataLayout();
1270
1271 CallingConv::ID CallConv = CLI.CallConv;
1272 if (!callingConvSupported(CallConv))
1273 fail(DL, DAG,
1274 "WebAssembly doesn't support language-specific or target-specific "
1275 "calling conventions yet");
1276 if (CLI.IsPatchPoint)
1277 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1278
1279 if (CLI.IsTailCall) {
1280 auto NoTail = [&](const char *Msg) {
1281 if (CLI.CB && CLI.CB->isMustTailCall())
1282 fail(DL, DAG, Msg);
1283 CLI.IsTailCall = false;
1284 };
1285
1286 if (!Subtarget->hasTailCall())
1287 NoTail("WebAssembly 'tail-call' feature not enabled");
1288
1289 // Varargs calls cannot be tail calls because the buffer is on the stack
1290 if (CLI.IsVarArg)
1291 NoTail("WebAssembly does not support varargs tail calls");
1292
1293 // Do not tail call unless caller and callee return types match
1294 const Function &F = MF.getFunction();
1295 const TargetMachine &TM = getTargetMachine();
1296 Type *RetTy = F.getReturnType();
1297 SmallVector<MVT, 4> CallerRetTys;
1298 SmallVector<MVT, 4> CalleeRetTys;
1299 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1300 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1301 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1302 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1303 CalleeRetTys.begin());
1304 if (!TypesMatch)
1305 NoTail("WebAssembly tail call requires caller and callee return types to "
1306 "match");
1307
1308 // If pointers to local stack values are passed, we cannot tail call
1309 if (CLI.CB) {
1310 for (auto &Arg : CLI.CB->args()) {
1311 Value *Val = Arg.get();
1312 // Trace the value back through pointer operations
1313 while (true) {
1314 Value *Src = Val->stripPointerCastsAndAliases();
1315 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1316 Src = GEP->getPointerOperand();
1317 if (Val == Src)
1318 break;
1319 Val = Src;
1320 }
1321 if (isa<AllocaInst>(Val)) {
1322 NoTail(
1323 "WebAssembly does not support tail calling with stack arguments");
1324 break;
1325 }
1326 }
1327 }
1328 }
1329
1330 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1331 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1332 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1333
1334 // The generic code may have added an sret argument. If we're lowering an
1335 // invoke function, the ABI requires that the function pointer be the first
1336 // argument, so we may have to swap the arguments.
1337 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1338 Outs[0].Flags.isSRet()) {
1339 std::swap(Outs[0], Outs[1]);
1340 std::swap(OutVals[0], OutVals[1]);
1341 }
1342
1343 bool HasSwiftSelfArg = false;
1344 bool HasSwiftErrorArg = false;
1345 unsigned NumFixedArgs = 0;
1346 for (unsigned I = 0; I < Outs.size(); ++I) {
1347 const ISD::OutputArg &Out = Outs[I];
1348 SDValue &OutVal = OutVals[I];
1349 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1350 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1351 if (Out.Flags.isNest())
1352 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1353 if (Out.Flags.isInAlloca())
1354 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1355 if (Out.Flags.isInConsecutiveRegs())
1356 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1358 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1359 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1360 auto &MFI = MF.getFrameInfo();
1361 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1363 /*isSS=*/false);
1364 SDValue SizeNode =
1365 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1366 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1367 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1369 /*isVolatile*/ false, /*AlwaysInline=*/false,
1370 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1371 MachinePointerInfo());
1372 OutVal = FINode;
1373 }
1374 // Count the number of fixed args *after* legalization.
1375 NumFixedArgs += !Out.Flags.isVarArg();
1376 }
1377
1378 bool IsVarArg = CLI.IsVarArg;
1379 auto PtrVT = getPointerTy(Layout);
1380
1381 // For swiftcc, emit additional swiftself and swifterror arguments
1382 // if there aren't. These additional arguments are also added for callee
1383 // signature They are necessary to match callee and caller signature for
1384 // indirect call.
1385 if (CallConv == CallingConv::Swift) {
1386 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1387 if (!HasSwiftSelfArg) {
1388 NumFixedArgs++;
1389 ISD::ArgFlagsTy Flags;
1390 Flags.setSwiftSelf();
1391 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1392 CLI.Outs.push_back(Arg);
1393 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1394 CLI.OutVals.push_back(ArgVal);
1395 }
1396 if (!HasSwiftErrorArg) {
1397 NumFixedArgs++;
1398 ISD::ArgFlagsTy Flags;
1399 Flags.setSwiftError();
1400 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1401 CLI.Outs.push_back(Arg);
1402 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1403 CLI.OutVals.push_back(ArgVal);
1404 }
1405 }
1406
1407 // Analyze operands of the call, assigning locations to each operand.
1409 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1410
1411 if (IsVarArg) {
1412 // Outgoing non-fixed arguments are placed in a buffer. First
1413 // compute their offsets and the total amount of buffer space needed.
1414 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1415 const ISD::OutputArg &Out = Outs[I];
1416 SDValue &Arg = OutVals[I];
1417 EVT VT = Arg.getValueType();
1418 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1419 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1420 Align Alignment =
1421 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1422 unsigned Offset =
1423 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1424 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1425 Offset, VT.getSimpleVT(),
1427 }
1428 }
1429
1430 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1431
1432 SDValue FINode;
1433 if (IsVarArg && NumBytes) {
1434 // For non-fixed arguments, next emit stores to store the argument values
1435 // to the stack buffer at the offsets computed above.
1436 MaybeAlign StackAlign = Layout.getStackAlignment();
1437 assert(StackAlign && "data layout string is missing stack alignment");
1438 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1439 /*isSS=*/false);
1440 unsigned ValNo = 0;
1442 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1443 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1444 "ArgLocs should remain in order and only hold varargs args");
1445 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1446 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1447 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1448 DAG.getConstant(Offset, DL, PtrVT));
1449 Chains.push_back(
1450 DAG.getStore(Chain, DL, Arg, Add,
1452 }
1453 if (!Chains.empty())
1454 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1455 } else if (IsVarArg) {
1456 FINode = DAG.getIntPtrConstant(0, DL);
1457 }
1458
1459 if (Callee->getOpcode() == ISD::GlobalAddress) {
1460 // If the callee is a GlobalAddress node (quite common, every direct call
1461 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1462 // doesn't at MO_GOT which is not needed for direct calls.
1463 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1466 GA->getOffset());
1467 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1468 getPointerTy(DAG.getDataLayout()), Callee);
1469 }
1470
1471 // Compute the operands for the CALLn node.
1473 Ops.push_back(Chain);
1474 Ops.push_back(Callee);
1475
1476 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1477 // isn't reliable.
1478 Ops.append(OutVals.begin(),
1479 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1480 // Add a pointer to the vararg buffer.
1481 if (IsVarArg)
1482 Ops.push_back(FINode);
1483
1484 SmallVector<EVT, 8> InTys;
1485 for (const auto &In : Ins) {
1486 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1487 assert(!In.Flags.isNest() && "nest is not valid for return values");
1488 if (In.Flags.isInAlloca())
1489 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1490 if (In.Flags.isInConsecutiveRegs())
1491 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1492 if (In.Flags.isInConsecutiveRegsLast())
1493 fail(DL, DAG,
1494 "WebAssembly hasn't implemented cons regs last return values");
1495 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1496 // registers.
1497 InTys.push_back(In.VT);
1498 }
1499
1500 // Lastly, if this is a call to a funcref we need to add an instruction
1501 // table.set to the chain and transform the call.
1503 CLI.CB->getCalledOperand()->getType())) {
1504 // In the absence of function references proposal where a funcref call is
1505 // lowered to call_ref, using reference types we generate a table.set to set
1506 // the funcref to a special table used solely for this purpose, followed by
1507 // a call_indirect. Here we just generate the table set, and return the
1508 // SDValue of the table.set so that LowerCall can finalize the lowering by
1509 // generating the call_indirect.
1510 SDValue Chain = Ops[0];
1511
1513 MF.getContext(), Subtarget);
1514 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1515 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1516 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1517 SDValue TableSet = DAG.getMemIntrinsicNode(
1518 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1519 MVT::funcref,
1520 // Machine Mem Operand args
1521 MachinePointerInfo(
1523 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1525
1526 Ops[0] = TableSet; // The new chain is the TableSet itself
1527 }
1528
1529 if (CLI.IsTailCall) {
1530 // ret_calls do not return values to the current frame
1531 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1532 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1533 }
1534
1535 InTys.push_back(MVT::Other);
1536 SDVTList InTyList = DAG.getVTList(InTys);
1537 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1538
1539 for (size_t I = 0; I < Ins.size(); ++I)
1540 InVals.push_back(Res.getValue(I));
1541
1542 // Return the chain
1543 return Res.getValue(Ins.size());
1544}
1545
1546bool WebAssemblyTargetLowering::CanLowerReturn(
1547 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1548 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1549 const Type *RetTy) const {
1550 // WebAssembly can only handle returning tuples with multivalue enabled
1551 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1552}
1553
1554SDValue WebAssemblyTargetLowering::LowerReturn(
1555 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1557 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1558 SelectionDAG &DAG) const {
1559 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1560 "MVP WebAssembly can only return up to one value");
1561 if (!callingConvSupported(CallConv))
1562 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1563
1564 SmallVector<SDValue, 4> RetOps(1, Chain);
1565 RetOps.append(OutVals.begin(), OutVals.end());
1566 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1567
1568 // Record the number and types of the return values.
1569 for (const ISD::OutputArg &Out : Outs) {
1570 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1571 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1572 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1573 if (Out.Flags.isInAlloca())
1574 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1575 if (Out.Flags.isInConsecutiveRegs())
1576 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1578 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1579 }
1580
1581 return Chain;
1582}
1583
1584SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1585 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1586 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1587 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1588 if (!callingConvSupported(CallConv))
1589 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1590
1591 MachineFunction &MF = DAG.getMachineFunction();
1592 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1593
1594 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1595 // of the incoming values before they're represented by virtual registers.
1596 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1597
1598 bool HasSwiftErrorArg = false;
1599 bool HasSwiftSelfArg = false;
1600 for (const ISD::InputArg &In : Ins) {
1601 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1602 HasSwiftErrorArg |= In.Flags.isSwiftError();
1603 if (In.Flags.isInAlloca())
1604 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1605 if (In.Flags.isNest())
1606 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1607 if (In.Flags.isInConsecutiveRegs())
1608 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1609 if (In.Flags.isInConsecutiveRegsLast())
1610 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1611 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1612 // registers.
1613 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1614 DAG.getTargetConstant(InVals.size(),
1615 DL, MVT::i32))
1616 : DAG.getUNDEF(In.VT));
1617
1618 // Record the number and types of arguments.
1619 MFI->addParam(In.VT);
1620 }
1621
1622 // For swiftcc, emit additional swiftself and swifterror arguments
1623 // if there aren't. These additional arguments are also added for callee
1624 // signature They are necessary to match callee and caller signature for
1625 // indirect call.
1626 auto PtrVT = getPointerTy(MF.getDataLayout());
1627 if (CallConv == CallingConv::Swift) {
1628 if (!HasSwiftSelfArg) {
1629 MFI->addParam(PtrVT);
1630 }
1631 if (!HasSwiftErrorArg) {
1632 MFI->addParam(PtrVT);
1633 }
1634 }
1635 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1636 // the buffer is passed as an argument.
1637 if (IsVarArg) {
1638 MVT PtrVT = getPointerTy(MF.getDataLayout());
1639 Register VarargVreg =
1641 MFI->setVarargBufferVreg(VarargVreg);
1642 Chain = DAG.getCopyToReg(
1643 Chain, DL, VarargVreg,
1644 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1645 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1646 MFI->addParam(PtrVT);
1647 }
1648
1649 // Record the number and types of arguments and results.
1650 SmallVector<MVT, 4> Params;
1653 MF.getFunction(), DAG.getTarget(), Params, Results);
1654 for (MVT VT : Results)
1655 MFI->addResult(VT);
1656 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1657 // the param logic here with ComputeSignatureVTs
1658 assert(MFI->getParams().size() == Params.size() &&
1659 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1660 Params.begin()));
1661
1662 return Chain;
1663}
1664
1665void WebAssemblyTargetLowering::ReplaceNodeResults(
1667 switch (N->getOpcode()) {
1669 // Do not add any results, signifying that N should not be custom lowered
1670 // after all. This happens because simd128 turns on custom lowering for
1671 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1672 // illegal type.
1673 break;
1676 // Do not add any results, signifying that N should not be custom lowered.
1677 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1678 break;
1679 case ISD::ADD:
1680 case ISD::SUB:
1681 Results.push_back(Replace128Op(N, DAG));
1682 break;
1683 default:
1685 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1686 }
1687}
1688
1689//===----------------------------------------------------------------------===//
1690// Custom lowering hooks.
1691//===----------------------------------------------------------------------===//
1692
1693SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1694 SelectionDAG &DAG) const {
1695 SDLoc DL(Op);
1696 switch (Op.getOpcode()) {
1697 default:
1698 llvm_unreachable("unimplemented operation lowering");
1699 return SDValue();
1700 case ISD::FrameIndex:
1701 return LowerFrameIndex(Op, DAG);
1702 case ISD::GlobalAddress:
1703 return LowerGlobalAddress(Op, DAG);
1705 return LowerGlobalTLSAddress(Op, DAG);
1707 return LowerExternalSymbol(Op, DAG);
1708 case ISD::JumpTable:
1709 return LowerJumpTable(Op, DAG);
1710 case ISD::BR_JT:
1711 return LowerBR_JT(Op, DAG);
1712 case ISD::VASTART:
1713 return LowerVASTART(Op, DAG);
1714 case ISD::BlockAddress:
1715 case ISD::BRIND:
1716 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1717 return SDValue();
1718 case ISD::RETURNADDR:
1719 return LowerRETURNADDR(Op, DAG);
1720 case ISD::FRAMEADDR:
1721 return LowerFRAMEADDR(Op, DAG);
1722 case ISD::CopyToReg:
1723 return LowerCopyToReg(Op, DAG);
1726 return LowerAccessVectorElement(Op, DAG);
1730 return LowerIntrinsic(Op, DAG);
1732 return LowerSIGN_EXTEND_INREG(Op, DAG);
1736 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1737 case ISD::BUILD_VECTOR:
1738 return LowerBUILD_VECTOR(Op, DAG);
1740 return LowerVECTOR_SHUFFLE(Op, DAG);
1741 case ISD::SETCC:
1742 return LowerSETCC(Op, DAG);
1743 case ISD::SHL:
1744 case ISD::SRA:
1745 case ISD::SRL:
1746 return LowerShift(Op, DAG);
1749 return LowerFP_TO_INT_SAT(Op, DAG);
1750 case ISD::LOAD:
1751 return LowerLoad(Op, DAG);
1752 case ISD::STORE:
1753 return LowerStore(Op, DAG);
1754 case ISD::CTPOP:
1755 case ISD::CTLZ:
1756 case ISD::CTTZ:
1757 return DAG.UnrollVectorOp(Op.getNode());
1758 case ISD::CLEAR_CACHE:
1759 report_fatal_error("llvm.clear_cache is not supported on wasm");
1760 case ISD::SMUL_LOHI:
1761 case ISD::UMUL_LOHI:
1762 return LowerMUL_LOHI(Op, DAG);
1763 case ISD::UADDO:
1764 return LowerUADDO(Op, DAG);
1765 }
1766}
1767
1771
1772 return false;
1773}
1774
1775static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1776 SelectionDAG &DAG) {
1778 if (!FI)
1779 return std::nullopt;
1780
1781 auto &MF = DAG.getMachineFunction();
1783}
1784
1785SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1786 SelectionDAG &DAG) const {
1787 SDLoc DL(Op);
1788 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1789 const SDValue &Value = SN->getValue();
1790 const SDValue &Base = SN->getBasePtr();
1791 const SDValue &Offset = SN->getOffset();
1792
1794 if (!Offset->isUndef())
1795 report_fatal_error("unexpected offset when storing to webassembly global",
1796 false);
1797
1798 SDVTList Tys = DAG.getVTList(MVT::Other);
1799 SDValue Ops[] = {SN->getChain(), Value, Base};
1800 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1801 SN->getMemoryVT(), SN->getMemOperand());
1802 }
1803
1804 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1805 if (!Offset->isUndef())
1806 report_fatal_error("unexpected offset when storing to webassembly local",
1807 false);
1808
1809 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1810 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1811 SDValue Ops[] = {SN->getChain(), Idx, Value};
1812 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1813 }
1814
1817 "Encountered an unlowerable store to the wasm_var address space",
1818 false);
1819
1820 return Op;
1821}
1822
1823SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1824 SelectionDAG &DAG) const {
1825 SDLoc DL(Op);
1826 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1827 const SDValue &Base = LN->getBasePtr();
1828 const SDValue &Offset = LN->getOffset();
1829
1831 if (!Offset->isUndef())
1833 "unexpected offset when loading from webassembly global", false);
1834
1835 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1836 SDValue Ops[] = {LN->getChain(), Base};
1837 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1838 LN->getMemoryVT(), LN->getMemOperand());
1839 }
1840
1841 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1842 if (!Offset->isUndef())
1844 "unexpected offset when loading from webassembly local", false);
1845
1846 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1847 EVT LocalVT = LN->getValueType(0);
1848 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1849 {LN->getChain(), Idx});
1850 }
1851
1854 "Encountered an unlowerable load from the wasm_var address space",
1855 false);
1856
1857 return Op;
1858}
1859
1860SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1861 SelectionDAG &DAG) const {
1862 assert(Subtarget->hasWideArithmetic());
1863 assert(Op.getValueType() == MVT::i64);
1864 SDLoc DL(Op);
1865 unsigned Opcode;
1866 switch (Op.getOpcode()) {
1867 case ISD::UMUL_LOHI:
1868 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1869 break;
1870 case ISD::SMUL_LOHI:
1871 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1872 break;
1873 default:
1874 llvm_unreachable("unexpected opcode");
1875 }
1876 SDValue LHS = Op.getOperand(0);
1877 SDValue RHS = Op.getOperand(1);
1878 SDValue Lo =
1879 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1880 SDValue Hi(Lo.getNode(), 1);
1881 SDValue Ops[] = {Lo, Hi};
1882 return DAG.getMergeValues(Ops, DL);
1883}
1884
1885// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1886//
1887// This enables generating a single wasm instruction for this operation where
1888// the upper half of both operands are constant zeros. The upper half of the
1889// result is then whether the overflow happened.
1890SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1891 SelectionDAG &DAG) const {
1892 assert(Subtarget->hasWideArithmetic());
1893 assert(Op.getValueType() == MVT::i64);
1894 assert(Op.getOpcode() == ISD::UADDO);
1895 SDLoc DL(Op);
1896 SDValue LHS = Op.getOperand(0);
1897 SDValue RHS = Op.getOperand(1);
1898 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1899 SDValue Result =
1900 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1901 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1902 SDValue CarryI64(Result.getNode(), 1);
1903 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1904 SDValue Ops[] = {Result, CarryI32};
1905 return DAG.getMergeValues(Ops, DL);
1906}
1907
1908SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1909 SelectionDAG &DAG) const {
1910 assert(Subtarget->hasWideArithmetic());
1911 assert(N->getValueType(0) == MVT::i128);
1912 SDLoc DL(N);
1913 unsigned Opcode;
1914 switch (N->getOpcode()) {
1915 case ISD::ADD:
1916 Opcode = WebAssemblyISD::I64_ADD128;
1917 break;
1918 case ISD::SUB:
1919 Opcode = WebAssemblyISD::I64_SUB128;
1920 break;
1921 default:
1922 llvm_unreachable("unexpected opcode");
1923 }
1924 SDValue LHS = N->getOperand(0);
1925 SDValue RHS = N->getOperand(1);
1926
1927 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1928 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1929 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1930 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1931 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1932 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1933 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1934 LHS_0, LHS_1, RHS_0, RHS_1);
1935 SDValue Result_HI(Result_LO.getNode(), 1);
1936 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1937}
1938
1939SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1940 SelectionDAG &DAG) const {
1941 SDValue Src = Op.getOperand(2);
1942 if (isa<FrameIndexSDNode>(Src.getNode())) {
1943 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1944 // the FI to some LEA-like instruction, but since we don't have that, we
1945 // need to insert some kind of instruction that can take an FI operand and
1946 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1947 // local.copy between Op and its FI operand.
1948 SDValue Chain = Op.getOperand(0);
1949 SDLoc DL(Op);
1950 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1951 EVT VT = Src.getValueType();
1952 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1953 : WebAssembly::COPY_I64,
1954 DL, VT, Src),
1955 0);
1956 return Op.getNode()->getNumValues() == 1
1957 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1958 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1959 Op.getNumOperands() == 4 ? Op.getOperand(3)
1960 : SDValue());
1961 }
1962 return SDValue();
1963}
1964
1965SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1966 SelectionDAG &DAG) const {
1967 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1968 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1969}
1970
1971SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1972 SelectionDAG &DAG) const {
1973 SDLoc DL(Op);
1974
1975 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1976 fail(DL, DAG,
1977 "Non-Emscripten WebAssembly hasn't implemented "
1978 "__builtin_return_address");
1979 return SDValue();
1980 }
1981
1982 unsigned Depth = Op.getConstantOperandVal(0);
1983 MakeLibCallOptions CallOptions;
1984 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1985 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1986 .first;
1987}
1988
1989SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1990 SelectionDAG &DAG) const {
1991 // Non-zero depths are not supported by WebAssembly currently. Use the
1992 // legalizer's default expansion, which is to return 0 (what this function is
1993 // documented to do).
1994 if (Op.getConstantOperandVal(0) > 0)
1995 return SDValue();
1996
1998 EVT VT = Op.getValueType();
1999 Register FP =
2000 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2001 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2002}
2003
2004SDValue
2005WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2006 SelectionDAG &DAG) const {
2007 SDLoc DL(Op);
2008 const auto *GA = cast<GlobalAddressSDNode>(Op);
2009
2010 MachineFunction &MF = DAG.getMachineFunction();
2011 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2012 report_fatal_error("cannot use thread-local storage without bulk memory",
2013 false);
2014
2015 const GlobalValue *GV = GA->getGlobal();
2016
2017 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2018 // on other targets, if we have thread-local storage, only the local-exec
2019 // model is possible.
2020 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2021 ? GV->getThreadLocalMode()
2023
2024 // Unsupported TLS modes
2027
2028 if (model == GlobalValue::LocalExecTLSModel ||
2031 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2032 // For DSO-local TLS variables we use offset from __tls_base
2033
2034 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2035 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2036 : WebAssembly::GLOBAL_GET_I32;
2037 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2038
2040 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2041 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2042 0);
2043
2044 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2045 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2046 SDValue SymOffset =
2047 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2048
2049 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2050 }
2051
2053
2054 EVT VT = Op.getValueType();
2055 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2056 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2057 GA->getOffset(),
2059}
2060
2061SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2062 SelectionDAG &DAG) const {
2063 SDLoc DL(Op);
2064 const auto *GA = cast<GlobalAddressSDNode>(Op);
2065 EVT VT = Op.getValueType();
2066 assert(GA->getTargetFlags() == 0 &&
2067 "Unexpected target flags on generic GlobalAddressSDNode");
2069 fail(DL, DAG, "Invalid address space for WebAssembly target");
2070
2071 unsigned OperandFlags = 0;
2072 const GlobalValue *GV = GA->getGlobal();
2073 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2074 // need special treatment for tables in PIC mode.
2075 if (isPositionIndependent() &&
2077 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2078 MachineFunction &MF = DAG.getMachineFunction();
2079 MVT PtrVT = getPointerTy(MF.getDataLayout());
2080 const char *BaseName;
2081 if (GV->getValueType()->isFunctionTy()) {
2082 BaseName = MF.createExternalSymbolName("__table_base");
2084 } else {
2085 BaseName = MF.createExternalSymbolName("__memory_base");
2087 }
2089 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2090 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2091
2092 SDValue SymAddr = DAG.getNode(
2093 WebAssemblyISD::WrapperREL, DL, VT,
2094 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2095 OperandFlags));
2096
2097 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2098 }
2100 }
2101
2102 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2103 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2104 GA->getOffset(), OperandFlags));
2105}
2106
2107SDValue
2108WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2109 SelectionDAG &DAG) const {
2110 SDLoc DL(Op);
2111 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2112 EVT VT = Op.getValueType();
2113 assert(ES->getTargetFlags() == 0 &&
2114 "Unexpected target flags on generic ExternalSymbolSDNode");
2115 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2116 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2117}
2118
2119SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2120 SelectionDAG &DAG) const {
2121 // There's no need for a Wrapper node because we always incorporate a jump
2122 // table operand into a BR_TABLE instruction, rather than ever
2123 // materializing it in a register.
2124 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2125 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2126 JT->getTargetFlags());
2127}
2128
2129SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2130 SelectionDAG &DAG) const {
2131 SDLoc DL(Op);
2132 SDValue Chain = Op.getOperand(0);
2133 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2134 SDValue Index = Op.getOperand(2);
2135 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2136
2138 Ops.push_back(Chain);
2139 Ops.push_back(Index);
2140
2141 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2142 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2143
2144 // Add an operand for each case.
2145 for (auto *MBB : MBBs)
2146 Ops.push_back(DAG.getBasicBlock(MBB));
2147
2148 // Add the first MBB as a dummy default target for now. This will be replaced
2149 // with the proper default target (and the preceding range check eliminated)
2150 // if possible by WebAssemblyFixBrTableDefaults.
2151 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2152 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2153}
2154
2155SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2156 SelectionDAG &DAG) const {
2157 SDLoc DL(Op);
2158 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2159
2160 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2161 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2162
2163 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2164 MFI->getVarargBufferVreg(), PtrVT);
2165 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2166 MachinePointerInfo(SV));
2167}
2168
2169SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2170 SelectionDAG &DAG) const {
2171 MachineFunction &MF = DAG.getMachineFunction();
2172 unsigned IntNo;
2173 switch (Op.getOpcode()) {
2176 IntNo = Op.getConstantOperandVal(1);
2177 break;
2179 IntNo = Op.getConstantOperandVal(0);
2180 break;
2181 default:
2182 llvm_unreachable("Invalid intrinsic");
2183 }
2184 SDLoc DL(Op);
2185
2186 switch (IntNo) {
2187 default:
2188 return SDValue(); // Don't custom lower most intrinsics.
2189
2190 case Intrinsic::wasm_lsda: {
2191 auto PtrVT = getPointerTy(MF.getDataLayout());
2192 const char *SymName = MF.createExternalSymbolName(
2193 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2194 if (isPositionIndependent()) {
2196 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2197 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2199 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2200 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2201 SDValue SymAddr =
2202 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2203 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2204 }
2205 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2206 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2207 }
2208
2209 case Intrinsic::wasm_shuffle: {
2210 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2211 SDValue Ops[18];
2212 size_t OpIdx = 0;
2213 Ops[OpIdx++] = Op.getOperand(1);
2214 Ops[OpIdx++] = Op.getOperand(2);
2215 while (OpIdx < 18) {
2216 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2217 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2218 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2219 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2220 } else {
2221 Ops[OpIdx++] = MaskIdx;
2222 }
2223 }
2224 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2225 }
2226
2227 case Intrinsic::thread_pointer: {
2228 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2229 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2230 : WebAssembly::GLOBAL_GET_I32;
2231 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2232 return SDValue(
2233 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2234 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2235 0);
2236 }
2237 }
2238}
2239
2240SDValue
2241WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2242 SelectionDAG &DAG) const {
2243 SDLoc DL(Op);
2244 // If sign extension operations are disabled, allow sext_inreg only if operand
2245 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2246 // extension operations, but allowing sext_inreg in this context lets us have
2247 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2248 // everywhere would be simpler in this file, but would necessitate large and
2249 // brittle patterns to undo the expansion and select extract_lane_s
2250 // instructions.
2251 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2252 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2253 return SDValue();
2254
2255 const SDValue &Extract = Op.getOperand(0);
2256 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2257 if (VecT.getVectorElementType().getSizeInBits() > 32)
2258 return SDValue();
2259 MVT ExtractedLaneT =
2260 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2261 MVT ExtractedVecT =
2262 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2263 if (ExtractedVecT == VecT)
2264 return Op;
2265
2266 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2267 const SDNode *Index = Extract.getOperand(1).getNode();
2268 if (!isa<ConstantSDNode>(Index))
2269 return SDValue();
2270 unsigned IndexVal = Index->getAsZExtVal();
2271 unsigned Scale =
2272 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2273 assert(Scale > 1);
2274 SDValue NewIndex =
2275 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2276 SDValue NewExtract = DAG.getNode(
2278 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2279 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2280 Op.getOperand(1));
2281}
2282
2283static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2284 SelectionDAG &DAG) {
2285 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2286 return SDValue();
2287
2288 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2289 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2290 "expected extend_low");
2291 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2292
2293 ArrayRef<int> Mask = Shuffle->getMask();
2294 // Look for a shuffle which moves from the high half to the low half.
2295 size_t FirstIdx = Mask.size() / 2;
2296 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2297 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2298 return SDValue();
2299 }
2300 }
2301
2302 SDLoc DL(Op);
2303 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2304 ? WebAssemblyISD::EXTEND_HIGH_S
2305 : WebAssemblyISD::EXTEND_HIGH_U;
2306 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2307}
2308
2309SDValue
2310WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2311 SelectionDAG &DAG) const {
2312 SDLoc DL(Op);
2313 EVT VT = Op.getValueType();
2314 SDValue Src = Op.getOperand(0);
2315 EVT SrcVT = Src.getValueType();
2316
2317 if (SrcVT.getVectorElementType() == MVT::i1 ||
2318 SrcVT.getVectorElementType() == MVT::i64)
2319 return SDValue();
2320
2321 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2322 "Unexpected extension factor.");
2323 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2324
2325 if (Scale != 2 && Scale != 4 && Scale != 8)
2326 return SDValue();
2327
2328 unsigned Ext;
2329 switch (Op.getOpcode()) {
2330 default:
2331 llvm_unreachable("unexpected opcode");
2334 Ext = WebAssemblyISD::EXTEND_LOW_U;
2335 break;
2337 Ext = WebAssemblyISD::EXTEND_LOW_S;
2338 break;
2339 }
2340
2341 if (Scale == 2) {
2342 // See if we can use EXTEND_HIGH.
2343 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2344 return ExtendHigh;
2345 }
2346
2347 SDValue Ret = Src;
2348 while (Scale != 1) {
2349 Ret = DAG.getNode(Ext, DL,
2350 Ret.getValueType()
2353 Ret);
2354 Scale /= 2;
2355 }
2356 assert(Ret.getValueType() == VT);
2357 return Ret;
2358}
2359
2361 SDLoc DL(Op);
2362 if (Op.getValueType() != MVT::v2f64)
2363 return SDValue();
2364
2365 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2366 unsigned &Index) -> bool {
2367 switch (Op.getOpcode()) {
2368 case ISD::SINT_TO_FP:
2369 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2370 break;
2371 case ISD::UINT_TO_FP:
2372 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2373 break;
2374 case ISD::FP_EXTEND:
2375 Opcode = WebAssemblyISD::PROMOTE_LOW;
2376 break;
2377 default:
2378 return false;
2379 }
2380
2381 auto ExtractVector = Op.getOperand(0);
2382 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2383 return false;
2384
2385 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2386 return false;
2387
2388 SrcVec = ExtractVector.getOperand(0);
2389 Index = ExtractVector.getConstantOperandVal(1);
2390 return true;
2391 };
2392
2393 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2394 SDValue LHSSrcVec, RHSSrcVec;
2395 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2396 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2397 return SDValue();
2398
2399 if (LHSOpcode != RHSOpcode)
2400 return SDValue();
2401
2402 MVT ExpectedSrcVT;
2403 switch (LHSOpcode) {
2404 case WebAssemblyISD::CONVERT_LOW_S:
2405 case WebAssemblyISD::CONVERT_LOW_U:
2406 ExpectedSrcVT = MVT::v4i32;
2407 break;
2408 case WebAssemblyISD::PROMOTE_LOW:
2409 ExpectedSrcVT = MVT::v4f32;
2410 break;
2411 }
2412 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2413 return SDValue();
2414
2415 auto Src = LHSSrcVec;
2416 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2417 // Shuffle the source vector so that the converted lanes are the low lanes.
2418 Src = DAG.getVectorShuffle(
2419 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2420 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2421 }
2422 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2423}
2424
2425SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2426 SelectionDAG &DAG) const {
2427 MVT VT = Op.getSimpleValueType();
2428 if (VT == MVT::v8f16) {
2429 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2430 // FP16 type, so cast them to I16s.
2431 MVT IVT = VT.changeVectorElementType(MVT::i16);
2433 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2434 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2435 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2436 return DAG.getBitcast(VT, Res);
2437 }
2438
2439 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2440 return ConvertLow;
2441
2442 SDLoc DL(Op);
2443 const EVT VecT = Op.getValueType();
2444 const EVT LaneT = Op.getOperand(0).getValueType();
2445 const size_t Lanes = Op.getNumOperands();
2446 bool CanSwizzle = VecT == MVT::v16i8;
2447
2448 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2449 // possible number of lanes at once followed by a sequence of replace_lane
2450 // instructions to individually initialize any remaining lanes.
2451
2452 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2453 // swizzled lanes should be given greater weight.
2454
2455 // TODO: Investigate looping rather than always extracting/replacing specific
2456 // lanes to fill gaps.
2457
2458 auto IsConstant = [](const SDValue &V) {
2459 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2460 };
2461
2462 // Returns the source vector and index vector pair if they exist. Checks for:
2463 // (extract_vector_elt
2464 // $src,
2465 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2466 // )
2467 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2468 auto Bail = std::make_pair(SDValue(), SDValue());
2469 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2470 return Bail;
2471 const SDValue &SwizzleSrc = Lane->getOperand(0);
2472 const SDValue &IndexExt = Lane->getOperand(1);
2473 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2474 return Bail;
2475 const SDValue &Index = IndexExt->getOperand(0);
2476 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2477 return Bail;
2478 const SDValue &SwizzleIndices = Index->getOperand(0);
2479 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2480 SwizzleIndices.getValueType() != MVT::v16i8 ||
2481 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2482 Index->getConstantOperandVal(1) != I)
2483 return Bail;
2484 return std::make_pair(SwizzleSrc, SwizzleIndices);
2485 };
2486
2487 // If the lane is extracted from another vector at a constant index, return
2488 // that vector. The source vector must not have more lanes than the dest
2489 // because the shufflevector indices are in terms of the destination lanes and
2490 // would not be able to address the smaller individual source lanes.
2491 auto GetShuffleSrc = [&](const SDValue &Lane) {
2492 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2493 return SDValue();
2494 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2495 return SDValue();
2496 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2497 VecT.getVectorNumElements())
2498 return SDValue();
2499 return Lane->getOperand(0);
2500 };
2501
2502 using ValueEntry = std::pair<SDValue, size_t>;
2503 SmallVector<ValueEntry, 16> SplatValueCounts;
2504
2505 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2506 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2507
2508 using ShuffleEntry = std::pair<SDValue, size_t>;
2509 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2510
2511 auto AddCount = [](auto &Counts, const auto &Val) {
2512 auto CountIt =
2513 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2514 if (CountIt == Counts.end()) {
2515 Counts.emplace_back(Val, 1);
2516 } else {
2517 CountIt->second++;
2518 }
2519 };
2520
2521 auto GetMostCommon = [](auto &Counts) {
2522 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2523 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2524 return *CommonIt;
2525 };
2526
2527 size_t NumConstantLanes = 0;
2528
2529 // Count eligible lanes for each type of vector creation op
2530 for (size_t I = 0; I < Lanes; ++I) {
2531 const SDValue &Lane = Op->getOperand(I);
2532 if (Lane.isUndef())
2533 continue;
2534
2535 AddCount(SplatValueCounts, Lane);
2536
2537 if (IsConstant(Lane))
2538 NumConstantLanes++;
2539 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2540 AddCount(ShuffleCounts, ShuffleSrc);
2541 if (CanSwizzle) {
2542 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2543 if (SwizzleSrcs.first)
2544 AddCount(SwizzleCounts, SwizzleSrcs);
2545 }
2546 }
2547
2548 SDValue SplatValue;
2549 size_t NumSplatLanes;
2550 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2551
2552 SDValue SwizzleSrc;
2553 SDValue SwizzleIndices;
2554 size_t NumSwizzleLanes = 0;
2555 if (SwizzleCounts.size())
2556 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2557 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2558
2559 // Shuffles can draw from up to two vectors, so find the two most common
2560 // sources.
2561 SDValue ShuffleSrc1, ShuffleSrc2;
2562 size_t NumShuffleLanes = 0;
2563 if (ShuffleCounts.size()) {
2564 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2565 llvm::erase_if(ShuffleCounts,
2566 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2567 }
2568 if (ShuffleCounts.size()) {
2569 size_t AdditionalShuffleLanes;
2570 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2571 GetMostCommon(ShuffleCounts);
2572 NumShuffleLanes += AdditionalShuffleLanes;
2573 }
2574
2575 // Predicate returning true if the lane is properly initialized by the
2576 // original instruction
2577 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2579 // Prefer swizzles over shuffles over vector consts over splats
2580 if (NumSwizzleLanes >= NumShuffleLanes &&
2581 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2582 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2583 SwizzleIndices);
2584 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2585 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2586 return Swizzled == GetSwizzleSrcs(I, Lane);
2587 };
2588 } else if (NumShuffleLanes >= NumConstantLanes &&
2589 NumShuffleLanes >= NumSplatLanes) {
2590 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2591 size_t DestLaneCount = VecT.getVectorNumElements();
2592 size_t Scale1 = 1;
2593 size_t Scale2 = 1;
2594 SDValue Src1 = ShuffleSrc1;
2595 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2596 if (Src1.getValueType() != VecT) {
2597 size_t LaneSize =
2599 assert(LaneSize > DestLaneSize);
2600 Scale1 = LaneSize / DestLaneSize;
2601 Src1 = DAG.getBitcast(VecT, Src1);
2602 }
2603 if (Src2.getValueType() != VecT) {
2604 size_t LaneSize =
2606 assert(LaneSize > DestLaneSize);
2607 Scale2 = LaneSize / DestLaneSize;
2608 Src2 = DAG.getBitcast(VecT, Src2);
2609 }
2610
2611 int Mask[16];
2612 assert(DestLaneCount <= 16);
2613 for (size_t I = 0; I < DestLaneCount; ++I) {
2614 const SDValue &Lane = Op->getOperand(I);
2615 SDValue Src = GetShuffleSrc(Lane);
2616 if (Src == ShuffleSrc1) {
2617 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2618 } else if (Src && Src == ShuffleSrc2) {
2619 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2620 } else {
2621 Mask[I] = -1;
2622 }
2623 }
2624 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2625 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2626 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2627 auto Src = GetShuffleSrc(Lane);
2628 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2629 };
2630 } else if (NumConstantLanes >= NumSplatLanes) {
2631 SmallVector<SDValue, 16> ConstLanes;
2632 for (const SDValue &Lane : Op->op_values()) {
2633 if (IsConstant(Lane)) {
2634 // Values may need to be fixed so that they will sign extend to be
2635 // within the expected range during ISel. Check whether the value is in
2636 // bounds based on the lane bit width and if it is out of bounds, lop
2637 // off the extra bits.
2638 uint64_t LaneBits = 128 / Lanes;
2639 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2640 ConstLanes.push_back(DAG.getConstant(
2641 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2642 SDLoc(Lane), LaneT));
2643 } else {
2644 ConstLanes.push_back(Lane);
2645 }
2646 } else if (LaneT.isFloatingPoint()) {
2647 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2648 } else {
2649 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2650 }
2651 }
2652 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2653 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2654 return IsConstant(Lane);
2655 };
2656 } else {
2657 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2658 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2659 (DestLaneSize == 32 || DestLaneSize == 64)) {
2660 // Could be selected to load_zero.
2661 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2662 } else {
2663 // Use a splat (which might be selected as a load splat)
2664 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2665 }
2666 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2667 return Lane == SplatValue;
2668 };
2669 }
2670
2671 assert(Result);
2672 assert(IsLaneConstructed);
2673
2674 // Add replace_lane instructions for any unhandled values
2675 for (size_t I = 0; I < Lanes; ++I) {
2676 const SDValue &Lane = Op->getOperand(I);
2677 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2678 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2679 DAG.getConstant(I, DL, MVT::i32));
2680 }
2681
2682 return Result;
2683}
2684
2685SDValue
2686WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2687 SelectionDAG &DAG) const {
2688 SDLoc DL(Op);
2689 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2690 MVT VecType = Op.getOperand(0).getSimpleValueType();
2691 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2692 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2693
2694 // Space for two vector args and sixteen mask indices
2695 SDValue Ops[18];
2696 size_t OpIdx = 0;
2697 Ops[OpIdx++] = Op.getOperand(0);
2698 Ops[OpIdx++] = Op.getOperand(1);
2699
2700 // Expand mask indices to byte indices and materialize them as operands
2701 for (int M : Mask) {
2702 for (size_t J = 0; J < LaneBytes; ++J) {
2703 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2704 // whole lane of vector input, to allow further reduction at VM. E.g.
2705 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2706 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2707 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2708 }
2709 }
2710
2711 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2712}
2713
2714SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2715 SelectionDAG &DAG) const {
2716 SDLoc DL(Op);
2717 // The legalizer does not know how to expand the unsupported comparison modes
2718 // of i64x2 vectors, so we manually unroll them here.
2719 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2721 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2722 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2723 const SDValue &CC = Op->getOperand(2);
2724 auto MakeLane = [&](unsigned I) {
2725 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2726 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2727 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2728 };
2729 return DAG.getBuildVector(Op->getValueType(0), DL,
2730 {MakeLane(0), MakeLane(1)});
2731}
2732
2733SDValue
2734WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2735 SelectionDAG &DAG) const {
2736 // Allow constant lane indices, expand variable lane indices
2737 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2738 if (isa<ConstantSDNode>(IdxNode)) {
2739 // Ensure the index type is i32 to match the tablegen patterns
2740 uint64_t Idx = IdxNode->getAsZExtVal();
2741 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2742 Ops[Op.getNumOperands() - 1] =
2743 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2744 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2745 }
2746 // Perform default expansion
2747 return SDValue();
2748}
2749
2751 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2752 // 32-bit and 64-bit unrolled shifts will have proper semantics
2753 if (LaneT.bitsGE(MVT::i32))
2754 return DAG.UnrollVectorOp(Op.getNode());
2755 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2756 SDLoc DL(Op);
2757 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2758 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2759 unsigned ShiftOpcode = Op.getOpcode();
2760 SmallVector<SDValue, 16> ShiftedElements;
2761 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2762 SmallVector<SDValue, 16> ShiftElements;
2763 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2764 SmallVector<SDValue, 16> UnrolledOps;
2765 for (size_t i = 0; i < NumLanes; ++i) {
2766 SDValue MaskedShiftValue =
2767 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2768 SDValue ShiftedValue = ShiftedElements[i];
2769 if (ShiftOpcode == ISD::SRA)
2770 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2771 ShiftedValue, DAG.getValueType(LaneT));
2772 UnrolledOps.push_back(
2773 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2774 }
2775 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2776}
2777
2778SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2779 SelectionDAG &DAG) const {
2780 SDLoc DL(Op);
2781
2782 // Only manually lower vector shifts
2783 assert(Op.getSimpleValueType().isVector());
2784
2785 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2786 auto ShiftVal = Op.getOperand(1);
2787
2788 // Try to skip bitmask operation since it is implied inside shift instruction
2789 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2790 if (MaskOp.getOpcode() != ISD::AND)
2791 return MaskOp;
2792 SDValue LHS = MaskOp.getOperand(0);
2793 SDValue RHS = MaskOp.getOperand(1);
2794 if (MaskOp.getValueType().isVector()) {
2795 APInt MaskVal;
2796 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2797 std::swap(LHS, RHS);
2798
2799 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2800 MaskVal == MaskBits)
2801 MaskOp = LHS;
2802 } else {
2803 if (!isa<ConstantSDNode>(RHS.getNode()))
2804 std::swap(LHS, RHS);
2805
2806 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2807 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2808 MaskOp = LHS;
2809 }
2810
2811 return MaskOp;
2812 };
2813
2814 // Skip vector and operation
2815 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2816 ShiftVal = DAG.getSplatValue(ShiftVal);
2817 if (!ShiftVal)
2818 return unrollVectorShift(Op, DAG);
2819
2820 // Skip scalar and operation
2821 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2822 // Use anyext because none of the high bits can affect the shift
2823 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2824
2825 unsigned Opcode;
2826 switch (Op.getOpcode()) {
2827 case ISD::SHL:
2828 Opcode = WebAssemblyISD::VEC_SHL;
2829 break;
2830 case ISD::SRA:
2831 Opcode = WebAssemblyISD::VEC_SHR_S;
2832 break;
2833 case ISD::SRL:
2834 Opcode = WebAssemblyISD::VEC_SHR_U;
2835 break;
2836 default:
2837 llvm_unreachable("unexpected opcode");
2838 }
2839
2840 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2841}
2842
2843SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2844 SelectionDAG &DAG) const {
2845 EVT ResT = Op.getValueType();
2846 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2847
2848 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2849 (SatVT == MVT::i32 || SatVT == MVT::i64))
2850 return Op;
2851
2852 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2853 return Op;
2854
2855 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2856 return Op;
2857
2858 return SDValue();
2859}
2860
2861//===----------------------------------------------------------------------===//
2862// Custom DAG combine hooks
2863//===----------------------------------------------------------------------===//
2864static SDValue
2866 auto &DAG = DCI.DAG;
2867 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2868
2869 // Hoist vector bitcasts that don't change the number of lanes out of unary
2870 // shuffles, where they are less likely to get in the way of other combines.
2871 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2872 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2873 SDValue Bitcast = N->getOperand(0);
2874 if (Bitcast.getOpcode() != ISD::BITCAST)
2875 return SDValue();
2876 if (!N->getOperand(1).isUndef())
2877 return SDValue();
2878 SDValue CastOp = Bitcast.getOperand(0);
2879 EVT SrcType = CastOp.getValueType();
2880 EVT DstType = Bitcast.getValueType();
2881 if (!SrcType.is128BitVector() ||
2882 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2883 return SDValue();
2884 SDValue NewShuffle = DAG.getVectorShuffle(
2885 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2886 return DAG.getBitcast(DstType, NewShuffle);
2887}
2888
2889/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2890/// split up into scalar instructions during legalization, and the vector
2891/// extending instructions are selected in performVectorExtendCombine below.
2892static SDValue
2895 auto &DAG = DCI.DAG;
2896 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2897 N->getOpcode() == ISD::SINT_TO_FP);
2898
2899 EVT InVT = N->getOperand(0)->getValueType(0);
2900 EVT ResVT = N->getValueType(0);
2901 MVT ExtVT;
2902 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2903 ExtVT = MVT::v4i32;
2904 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2905 ExtVT = MVT::v2i32;
2906 else
2907 return SDValue();
2908
2909 unsigned Op =
2911 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2912 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2913}
2914
2915static SDValue
2918 auto &DAG = DCI.DAG;
2919
2920 SDNodeFlags Flags = N->getFlags();
2921 SDValue Op0 = N->getOperand(0);
2922 EVT VT = N->getValueType(0);
2923
2924 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2925 // Depending on the target (runtime) backend, this might be performance
2926 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2927 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2928 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2929 }
2930
2931 return SDValue();
2932}
2933
2934static SDValue
2936 auto &DAG = DCI.DAG;
2937 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2938 N->getOpcode() == ISD::ZERO_EXTEND);
2939
2940 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2941 // possible before the extract_subvector can be expanded.
2942 auto Extract = N->getOperand(0);
2943 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2944 return SDValue();
2945 auto Source = Extract.getOperand(0);
2946 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2947 if (IndexNode == nullptr)
2948 return SDValue();
2949 auto Index = IndexNode->getZExtValue();
2950
2951 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2952 // extracted subvector is the low or high half of its source.
2953 EVT ResVT = N->getValueType(0);
2954 if (ResVT == MVT::v8i16) {
2955 if (Extract.getValueType() != MVT::v8i8 ||
2956 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2957 return SDValue();
2958 } else if (ResVT == MVT::v4i32) {
2959 if (Extract.getValueType() != MVT::v4i16 ||
2960 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2961 return SDValue();
2962 } else if (ResVT == MVT::v2i64) {
2963 if (Extract.getValueType() != MVT::v2i32 ||
2964 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2965 return SDValue();
2966 } else {
2967 return SDValue();
2968 }
2969
2970 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2971 bool IsLow = Index == 0;
2972
2973 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2974 : WebAssemblyISD::EXTEND_HIGH_S)
2975 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2976 : WebAssemblyISD::EXTEND_HIGH_U);
2977
2978 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2979}
2980
2981static SDValue
2983 auto &DAG = DCI.DAG;
2984
2985 auto GetWasmConversionOp = [](unsigned Op) {
2986 switch (Op) {
2988 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2990 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2991 case ISD::FP_ROUND:
2992 return WebAssemblyISD::DEMOTE_ZERO;
2993 }
2994 llvm_unreachable("unexpected op");
2995 };
2996
2997 auto IsZeroSplat = [](SDValue SplatVal) {
2998 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
2999 APInt SplatValue, SplatUndef;
3000 unsigned SplatBitSize;
3001 bool HasAnyUndefs;
3002 // Endianness doesn't matter in this context because we are looking for
3003 // an all-zero value.
3004 return Splat &&
3005 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3006 HasAnyUndefs) &&
3007 SplatValue == 0;
3008 };
3009
3010 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3011 // Combine this:
3012 //
3013 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3014 //
3015 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3016 //
3017 // Or this:
3018 //
3019 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3020 //
3021 // into (f32x4.demote_zero_f64x2 $x).
3022 EVT ResVT;
3023 EVT ExpectedConversionType;
3024 auto Conversion = N->getOperand(0);
3025 auto ConversionOp = Conversion.getOpcode();
3026 switch (ConversionOp) {
3029 ResVT = MVT::v4i32;
3030 ExpectedConversionType = MVT::v2i32;
3031 break;
3032 case ISD::FP_ROUND:
3033 ResVT = MVT::v4f32;
3034 ExpectedConversionType = MVT::v2f32;
3035 break;
3036 default:
3037 return SDValue();
3038 }
3039
3040 if (N->getValueType(0) != ResVT)
3041 return SDValue();
3042
3043 if (Conversion.getValueType() != ExpectedConversionType)
3044 return SDValue();
3045
3046 auto Source = Conversion.getOperand(0);
3047 if (Source.getValueType() != MVT::v2f64)
3048 return SDValue();
3049
3050 if (!IsZeroSplat(N->getOperand(1)) ||
3051 N->getOperand(1).getValueType() != ExpectedConversionType)
3052 return SDValue();
3053
3054 unsigned Op = GetWasmConversionOp(ConversionOp);
3055 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3056 }
3057
3058 // Combine this:
3059 //
3060 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3061 //
3062 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3063 //
3064 // Or this:
3065 //
3066 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3067 //
3068 // into (f32x4.demote_zero_f64x2 $x).
3069 EVT ResVT;
3070 auto ConversionOp = N->getOpcode();
3071 switch (ConversionOp) {
3074 ResVT = MVT::v4i32;
3075 break;
3076 case ISD::FP_ROUND:
3077 ResVT = MVT::v4f32;
3078 break;
3079 default:
3080 llvm_unreachable("unexpected op");
3081 }
3082
3083 if (N->getValueType(0) != ResVT)
3084 return SDValue();
3085
3086 auto Concat = N->getOperand(0);
3087 if (Concat.getValueType() != MVT::v4f64)
3088 return SDValue();
3089
3090 auto Source = Concat.getOperand(0);
3091 if (Source.getValueType() != MVT::v2f64)
3092 return SDValue();
3093
3094 if (!IsZeroSplat(Concat.getOperand(1)) ||
3095 Concat.getOperand(1).getValueType() != MVT::v2f64)
3096 return SDValue();
3097
3098 unsigned Op = GetWasmConversionOp(ConversionOp);
3099 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3100}
3101
3102// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3103static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3104 const SDLoc &DL, unsigned VectorWidth) {
3105 EVT VT = Vec.getValueType();
3106 EVT ElVT = VT.getVectorElementType();
3107 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3108 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3109 VT.getVectorNumElements() / Factor);
3110
3111 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3112 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3113 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3114
3115 // This is the index of the first element of the VectorWidth-bit chunk
3116 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3117 IdxVal &= ~(ElemsPerChunk - 1);
3118
3119 // If the input is a buildvector just emit a smaller one.
3120 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3121 return DAG.getBuildVector(ResultVT, DL,
3122 Vec->ops().slice(IdxVal, ElemsPerChunk));
3123
3124 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3125 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3126}
3127
3128// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3129// is the expected destination value type after recursion. In is the initial
3130// input. Note that the input should have enough leading zero bits to prevent
3131// NARROW_U from saturating results.
3133 SelectionDAG &DAG) {
3134 EVT SrcVT = In.getValueType();
3135
3136 // No truncation required, we might get here due to recursive calls.
3137 if (SrcVT == DstVT)
3138 return In;
3139
3140 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3141 unsigned NumElems = SrcVT.getVectorNumElements();
3142 if (!isPowerOf2_32(NumElems))
3143 return SDValue();
3144 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3145 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3146
3147 LLVMContext &Ctx = *DAG.getContext();
3148 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3149
3150 // Narrow to the largest type possible:
3151 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3152 EVT InVT = MVT::i16, OutVT = MVT::i8;
3153 if (SrcVT.getScalarSizeInBits() > 16) {
3154 InVT = MVT::i32;
3155 OutVT = MVT::i16;
3156 }
3157 unsigned SubSizeInBits = SrcSizeInBits / 2;
3158 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3159 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3160
3161 // Split lower/upper subvectors.
3162 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3163 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3164
3165 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3166 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3167 Lo = DAG.getBitcast(InVT, Lo);
3168 Hi = DAG.getBitcast(InVT, Hi);
3169 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3170 return DAG.getBitcast(DstVT, Res);
3171 }
3172
3173 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3174 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3175 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3176 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3177
3178 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3179 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3180 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3181}
3182
3185 auto &DAG = DCI.DAG;
3186
3187 SDValue In = N->getOperand(0);
3188 EVT InVT = In.getValueType();
3189 if (!InVT.isSimple())
3190 return SDValue();
3191
3192 EVT OutVT = N->getValueType(0);
3193 if (!OutVT.isVector())
3194 return SDValue();
3195
3196 EVT OutSVT = OutVT.getVectorElementType();
3197 EVT InSVT = InVT.getVectorElementType();
3198 // Currently only cover truncate to v16i8 or v8i16.
3199 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3200 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3201 return SDValue();
3202
3203 SDLoc DL(N);
3205 OutVT.getScalarSizeInBits());
3206 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3207 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3208}
3209
3212 using namespace llvm::SDPatternMatch;
3213 auto &DAG = DCI.DAG;
3214 SDLoc DL(N);
3215 SDValue Src = N->getOperand(0);
3216 EVT VT = N->getValueType(0);
3217 EVT SrcVT = Src.getValueType();
3218
3219 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3220 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3221 return SDValue();
3222
3223 unsigned NumElts = SrcVT.getVectorNumElements();
3224 EVT Width = MVT::getIntegerVT(128 / NumElts);
3225
3226 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3227 // ==> bitmask
3228 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3229 return DAG.getZExtOrTrunc(
3230 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3231 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3232 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3233 SrcVT.changeVectorElementType(
3234 *DAG.getContext(), Width))}),
3235 DL, VT);
3236 }
3237
3238 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3239 if (NumElts == 32 || NumElts == 64) {
3240 // Strategy: We will setcc them separately in v16i8 -> v16i1
3241 // Bitcast them to i16, extend them to either i32 or i64.
3242 // Add them together, shifting left 1 by 1.
3243 SDValue Concat, SetCCVector;
3244 ISD::CondCode SetCond;
3245
3246 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3247 m_CondCode(SetCond)))))
3248 return SDValue();
3249 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3250 return SDValue();
3251
3252 uint64_t ElementWidth =
3254
3255 SmallVector<SDValue> VectorsToShuffle;
3256 for (size_t I = 0; I < Concat->ops().size(); I++) {
3257 VectorsToShuffle.push_back(DAG.getBitcast(
3258 MVT::i16,
3259 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3260 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3261 DAG, DL, 128),
3262 SetCond)));
3263 }
3264
3265 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3266 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3267
3268 for (SDValue V : VectorsToShuffle) {
3269 ReturningInteger = DAG.getNode(
3270 ISD::SHL, DL, ReturnType,
3271 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3272
3273 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3274 ReturningInteger =
3275 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3276 }
3277
3278 return ReturningInteger;
3279 }
3280
3281 return SDValue();
3282}
3283
3285 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3286 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3287 // any_true (setcc <X>, 0, ne) => (any_true X)
3288 // all_true (setcc <X>, 0, ne) => (all_true X)
3289 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3290 using namespace llvm::SDPatternMatch;
3291
3292 SDValue LHS;
3293 if (N->getNumOperands() < 2 ||
3294 !sd_match(N->getOperand(1),
3296 return SDValue();
3297 EVT LT = LHS.getValueType();
3298 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3299 return SDValue();
3300
3301 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3302 ISD::CondCode SetType,
3303 Intrinsic::WASMIntrinsics InPost) {
3304 if (N->getConstantOperandVal(0) != InPre)
3305 return SDValue();
3306
3307 SDValue LHS;
3308 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3309 m_SpecificCondCode(SetType))))
3310 return SDValue();
3311
3312 SDLoc DL(N);
3313 SDValue Ret = DAG.getZExtOrTrunc(
3314 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3315 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3316 DL, MVT::i1);
3317 if (SetType == ISD::SETEQ)
3318 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3319 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3320 };
3321
3322 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3323 Intrinsic::wasm_alltrue))
3324 return AnyTrueEQ;
3325 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3326 Intrinsic::wasm_anytrue))
3327 return AllTrueEQ;
3328 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3329 Intrinsic::wasm_anytrue))
3330 return AnyTrueNE;
3331 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3332 Intrinsic::wasm_alltrue))
3333 return AllTrueNE;
3334
3335 return SDValue();
3336}
3337
3338template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3339 Intrinsic::ID Intrin>
3341 SDValue LHS = N->getOperand(0);
3342 SDValue RHS = N->getOperand(1);
3343 SDValue Cond = N->getOperand(2);
3344 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3345 return SDValue();
3346
3347 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3348 return SDValue();
3349
3350 SDLoc DL(N);
3351 SDValue Ret = DAG.getZExtOrTrunc(
3352 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3353 {DAG.getConstant(Intrin, DL, MVT::i32),
3354 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3355 DL, MVT::i1);
3356 if (RequiresNegate)
3357 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3358 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3359}
3360
3361/// Try to convert a i128 comparison to a v16i8 comparison before type
3362/// legalization splits it up into chunks
3363static SDValue
3365 const WebAssemblySubtarget *Subtarget) {
3366
3367 SDLoc DL(N);
3368 SDValue X = N->getOperand(0);
3369 SDValue Y = N->getOperand(1);
3370 EVT VT = N->getValueType(0);
3371 EVT OpVT = X.getValueType();
3372
3373 SelectionDAG &DAG = DCI.DAG;
3375 Attribute::NoImplicitFloat))
3376 return SDValue();
3377
3378 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3379 // We're looking for an oversized integer equality comparison with SIMD
3380 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3381 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3382 return SDValue();
3383
3384 // Don't perform this combine if constructing the vector will be expensive.
3385 auto IsVectorBitCastCheap = [](SDValue X) {
3387 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3388 };
3389
3390 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3391 return SDValue();
3392
3393 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3394 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3395 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3396
3397 SDValue Intr =
3398 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3399 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3400 : Intrinsic::wasm_anytrue,
3401 DL, MVT::i32),
3402 Cmp});
3403
3404 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3405 ISD::SETNE);
3406}
3407
3410 const WebAssemblySubtarget *Subtarget) {
3411 if (!DCI.isBeforeLegalize())
3412 return SDValue();
3413
3414 EVT VT = N->getValueType(0);
3415 if (!VT.isScalarInteger())
3416 return SDValue();
3417
3418 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3419 return V;
3420
3421 SDValue LHS = N->getOperand(0);
3422 if (LHS->getOpcode() != ISD::BITCAST)
3423 return SDValue();
3424
3425 EVT FromVT = LHS->getOperand(0).getValueType();
3426 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3427 return SDValue();
3428
3429 unsigned NumElts = FromVT.getVectorNumElements();
3430 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3431 return SDValue();
3432
3433 if (!cast<ConstantSDNode>(N->getOperand(1)))
3434 return SDValue();
3435
3436 auto &DAG = DCI.DAG;
3437 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3438 MVT::getIntegerVT(128 / NumElts));
3439 // setcc (iN (bitcast (vNi1 X))), 0, ne
3440 // ==> any_true (vNi1 X)
3442 N, VecVT, DAG)) {
3443 return Match;
3444 }
3445 // setcc (iN (bitcast (vNi1 X))), 0, eq
3446 // ==> xor (any_true (vNi1 X)), -1
3448 N, VecVT, DAG)) {
3449 return Match;
3450 }
3451 // setcc (iN (bitcast (vNi1 X))), -1, eq
3452 // ==> all_true (vNi1 X)
3454 N, VecVT, DAG)) {
3455 return Match;
3456 }
3457 // setcc (iN (bitcast (vNi1 X))), -1, ne
3458 // ==> xor (all_true (vNi1 X)), -1
3460 N, VecVT, DAG)) {
3461 return Match;
3462 }
3463 return SDValue();
3464}
3465
3467 EVT VT = N->getValueType(0);
3468 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3469 return SDValue();
3470
3471 // Mul with extending inputs.
3472 SDValue LHS = N->getOperand(0);
3473 SDValue RHS = N->getOperand(1);
3474 if (LHS.getOpcode() != RHS.getOpcode())
3475 return SDValue();
3476
3477 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3478 LHS.getOpcode() != ISD::ZERO_EXTEND)
3479 return SDValue();
3480
3481 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3482 return SDValue();
3483
3484 EVT FromVT = LHS->getOperand(0).getValueType();
3485 EVT EltTy = FromVT.getVectorElementType();
3486 if (EltTy != MVT::i8)
3487 return SDValue();
3488
3489 // For an input DAG that looks like this
3490 // %a = input_type
3491 // %b = input_type
3492 // %lhs = extend %a to output_type
3493 // %rhs = extend %b to output_type
3494 // %mul = mul %lhs, %rhs
3495
3496 // input_type | output_type | instructions
3497 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3498 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3499 // | | %low_low = i32x4.ext_low_i16x8_ %low
3500 // | | %low_high = i32x4.ext_high_i16x8_ %low
3501 // | | %high_low = i32x4.ext_low_i16x8_ %high
3502 // | | %high_high = i32x4.ext_high_i16x8_ %high
3503 // | | %res = concat_vector(...)
3504 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3505 // | | %low_low = i32x4.ext_low_i16x8_ %low
3506 // | | %low_high = i32x4.ext_high_i16x8_ %low
3507 // | | %res = concat_vector(%low_low, %low_high)
3508
3509 SDLoc DL(N);
3510 unsigned NumElts = VT.getVectorNumElements();
3511 SDValue ExtendInLHS = LHS->getOperand(0);
3512 SDValue ExtendInRHS = RHS->getOperand(0);
3513 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3514 unsigned ExtendLowOpc =
3515 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3516 unsigned ExtendHighOpc =
3517 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3518
3519 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3520 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3521 };
3522 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3523 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3524 };
3525
3526 if (NumElts == 16) {
3527 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3528 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3529 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3530 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3531 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3532 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3533 SDValue SubVectors[] = {
3534 GetExtendLow(MVT::v4i32, MulLow),
3535 GetExtendHigh(MVT::v4i32, MulLow),
3536 GetExtendLow(MVT::v4i32, MulHigh),
3537 GetExtendHigh(MVT::v4i32, MulHigh),
3538 };
3539 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3540 } else {
3541 assert(NumElts == 8);
3542 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3543 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3544 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3545 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3546 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3547 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3548 }
3549 return SDValue();
3550}
3551
3554 assert(N->getOpcode() == ISD::MUL);
3555 EVT VT = N->getValueType(0);
3556 if (!VT.isVector())
3557 return SDValue();
3558
3559 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3560 return Res;
3561
3562 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3563 // extend them to v8i16.
3564 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3565 return SDValue();
3566
3567 SDLoc DL(N);
3568 SelectionDAG &DAG = DCI.DAG;
3569 SDValue LHS = N->getOperand(0);
3570 SDValue RHS = N->getOperand(1);
3571 EVT MulVT = MVT::v8i16;
3572
3573 if (VT == MVT::v8i8) {
3574 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3575 DAG.getUNDEF(MVT::v8i8));
3576 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3577 DAG.getUNDEF(MVT::v8i8));
3578 SDValue LowLHS =
3579 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3580 SDValue LowRHS =
3581 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3582 SDValue MulLow = DAG.getBitcast(
3583 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3584 // Take the low byte of each lane.
3585 SDValue Shuffle = DAG.getVectorShuffle(
3586 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3587 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3588 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3589 } else {
3590 assert(VT == MVT::v16i8 && "Expected v16i8");
3591 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3592 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3593 SDValue HighLHS =
3594 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3595 SDValue HighRHS =
3596 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3597
3598 SDValue MulLow =
3599 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3600 SDValue MulHigh =
3601 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3602
3603 // Take the low byte of each lane.
3604 return DAG.getVectorShuffle(
3605 VT, DL, MulLow, MulHigh,
3606 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3607 }
3608}
3609
3610SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3611 SelectionDAG &DAG) {
3612 SDLoc DL(In);
3613 LLVMContext &Ctx = *DAG.getContext();
3614 EVT InVT = In.getValueType();
3615 unsigned NumElems = InVT.getVectorNumElements() * 2;
3616 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3617 SDValue Concat =
3618 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3619 if (NumElems < RequiredNumElems) {
3620 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3621 }
3622 return Concat;
3623}
3624
3626 EVT OutVT = N->getValueType(0);
3627 if (!OutVT.isVector())
3628 return SDValue();
3629
3630 EVT OutElTy = OutVT.getVectorElementType();
3631 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3632 return SDValue();
3633
3634 unsigned NumElems = OutVT.getVectorNumElements();
3635 if (!isPowerOf2_32(NumElems))
3636 return SDValue();
3637
3638 EVT FPVT = N->getOperand(0)->getValueType(0);
3639 if (FPVT.getVectorElementType() != MVT::f32)
3640 return SDValue();
3641
3642 SDLoc DL(N);
3643
3644 // First, convert to i32.
3645 LLVMContext &Ctx = *DAG.getContext();
3646 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3647 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3649 OutVT.getScalarSizeInBits());
3650 // Mask out the top MSBs.
3651 SDValue Masked =
3652 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3653
3654 if (OutVT.getSizeInBits() < 128) {
3655 // Create a wide enough vector that we can use narrow.
3656 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3657 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3658 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3659 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3660 return DAG.getBitcast(
3661 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3662 } else {
3663 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3664 }
3665 return SDValue();
3666}
3667
3668SDValue
3669WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3670 DAGCombinerInfo &DCI) const {
3671 switch (N->getOpcode()) {
3672 default:
3673 return SDValue();
3674 case ISD::BITCAST:
3675 return performBitcastCombine(N, DCI);
3676 case ISD::SETCC:
3677 return performSETCCCombine(N, DCI, Subtarget);
3679 return performVECTOR_SHUFFLECombine(N, DCI);
3680 case ISD::SIGN_EXTEND:
3681 case ISD::ZERO_EXTEND:
3682 return performVectorExtendCombine(N, DCI);
3683 case ISD::UINT_TO_FP:
3684 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3685 return ExtCombine;
3686 return performVectorNonNegToFPCombine(N, DCI);
3687 case ISD::SINT_TO_FP:
3688 return performVectorExtendToFPCombine(N, DCI);
3691 case ISD::FP_ROUND:
3693 return performVectorTruncZeroCombine(N, DCI);
3694 case ISD::FP_TO_SINT:
3695 case ISD::FP_TO_UINT:
3696 return performConvertFPCombine(N, DCI.DAG);
3697 case ISD::TRUNCATE:
3698 return performTruncateCombine(N, DCI);
3700 return performAnyAllCombine(N, DCI.DAG);
3701 case ISD::MUL:
3702 return performMulCombine(N, DCI);
3703 }
3704}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1394
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:712
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:778
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:909
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:992
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:671
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:810
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:898
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:887
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:977
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:958
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:920
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:944
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:832
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2078
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:444
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.