LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
161 if (Subtarget->hasFP16() && T == MVT::f32) {
163 setTruncStoreAction(T, MVT::f16, Legal);
164 } else {
166 setTruncStoreAction(T, MVT::f16, Expand);
167 }
168 }
169
170 // Expand unavailable integer operations.
171 for (auto Op :
175 for (auto T : {MVT::i32, MVT::i64})
177 if (Subtarget->hasSIMD128())
178 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
180 }
181
182 if (Subtarget->hasWideArithmetic()) {
188 }
189
190 if (Subtarget->hasNontrappingFPToInt())
192 for (auto T : {MVT::i32, MVT::i64})
194
195 if (Subtarget->hasRelaxedSIMD()) {
198 {MVT::v4f32, MVT::v2f64}, Custom);
199 }
200 // SIMD-specific configuration
201 if (Subtarget->hasSIMD128()) {
202
204
205 // Combine wide-vector muls, with extend inputs, to extmul_half.
208
209 // Combine vector mask reductions into alltrue/anytrue
211
212 // Convert vector to integer bitcasts to bitmask
214
215 // Hoist bitcasts out of shuffles
217
218 // Combine extends of extract_subvectors into widening ops
220
221 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
222 // conversions ops
225
226 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
227 // into conversion ops
231
233
234 // Support saturating add/sub for i8x16 and i16x8
236 for (auto T : {MVT::v16i8, MVT::v8i16})
238
239 // Support integer abs
240 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
242
243 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
244 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
245 MVT::v2f64})
247
248 if (Subtarget->hasFP16()) {
251 }
252
253 // We have custom shuffle lowering to expose the shuffle mask
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 if (Subtarget->hasFP16())
260
261 // Support splatting
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
263 MVT::v2f64})
265
266 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
267
268 // Custom lowering since wasm shifts must have a scalar shift amount
269 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
270 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
272
273 // Custom lower lane accesses to expand out variable indices
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
278
279 // There is no i8x16.mul instruction
280 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
281
282 // Expand integer operations supported for scalars but not SIMD
283 for (auto Op :
285 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
287
288 // But we do have integer min and max operations
289 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
290 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
292
293 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
294 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
295 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
296 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
297
298 // Custom lower bit counting operations for other types to scalarize them.
299 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
300 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
302
303 // Expand float operations supported for scalars but not SIMD
306 for (auto T : {MVT::v4f32, MVT::v2f64})
308
309 // Unsigned comparison operations are unavailable for i64x2 vectors.
311 setCondCodeAction(CC, MVT::v2i64, Custom);
312
313 // 64x2 conversions are not in the spec
314 for (auto Op :
316 for (auto T : {MVT::v2i64, MVT::v2f64})
318
319 // But saturating fp_to_int converstions are
321 setOperationAction(Op, MVT::v4i32, Custom);
322 if (Subtarget->hasFP16()) {
323 setOperationAction(Op, MVT::v8i16, Custom);
324 }
325 }
326
327 // Support vector extending
332 }
333
334 if (Subtarget->hasFP16()) {
335 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
336 }
337
338 if (Subtarget->hasRelaxedSIMD()) {
341 }
342
343 // Partial MLA reductions.
345 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
346 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
347 }
348 }
349
350 // As a special case, these operators use the type to mean the type to
351 // sign-extend from.
353 if (!Subtarget->hasSignExt()) {
354 // Sign extends are legal only when extending a vector extract
355 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
356 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
358 }
361
362 // Dynamic stack allocation: use the default expansion.
366
370
371 // Expand these forms; we pattern-match the forms that we can handle in isel.
372 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
373 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
375
376 if (Subtarget->hasReferenceTypes())
377 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
378 for (auto T : {MVT::externref, MVT::funcref})
380
381 // There is no vector conditional select instruction
382 for (auto T :
383 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
385
386 // We have custom switch handling.
388
389 // WebAssembly doesn't have:
390 // - Floating-point extending loads.
391 // - Floating-point truncating stores.
392 // - i1 extending loads.
393 // - truncating SIMD stores and most extending loads
394 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
395 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
396 for (auto T : MVT::integer_valuetypes())
397 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
398 setLoadExtAction(Ext, T, MVT::i1, Promote);
399 if (Subtarget->hasSIMD128()) {
400 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
401 MVT::v2f64}) {
402 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
403 if (MVT(T) != MemT) {
405 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
406 setLoadExtAction(Ext, T, MemT, Expand);
407 }
408 }
409 }
410 // But some vector extending loads are legal
411 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
412 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
413 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
414 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
415 }
416 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
417 }
418
419 // Don't do anything clever with build_pairs
421
422 // Trap lowers to wasm unreachable
423 setOperationAction(ISD::TRAP, MVT::Other, Legal);
425
426 // Exception handling intrinsics
430
432
433 // Always convert switches to br_tables unless there is only one case, which
434 // is equivalent to a simple branch. This reduces code size for wasm, and we
435 // defer possible jump table optimizations to the VM.
437}
438
447
456
458WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
459 const AtomicRMWInst *AI) const {
460 // We have wasm instructions for these
461 switch (AI->getOperation()) {
469 default:
470 break;
471 }
473}
474
475bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
476 // Implementation copied from X86TargetLowering.
477 unsigned Opc = VecOp.getOpcode();
478
479 // Assume target opcodes can't be scalarized.
480 // TODO - do we have any exceptions?
482 return false;
483
484 // If the vector op is not supported, try to convert to scalar.
485 EVT VecVT = VecOp.getValueType();
487 return true;
488
489 // If the vector op is supported, but the scalar op is not, the transform may
490 // not be worthwhile.
491 EVT ScalarVT = VecVT.getScalarType();
492 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
493}
494
495FastISel *WebAssemblyTargetLowering::createFastISel(
496 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
497 const LibcallLoweringInfo *LibcallLowering) const {
498 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
499}
500
501MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
502 EVT VT) const {
503 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
504 if (BitWidth > 1 && BitWidth < 8)
505 BitWidth = 8;
506
507 if (BitWidth > 64) {
508 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
509 // the count to be an i32.
510 BitWidth = 32;
512 "32-bit shift counts ought to be enough for anyone");
513 }
514
517 "Unable to represent scalar shift amount type");
518 return Result;
519}
520
521// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
522// undefined result on invalid/overflow, to the WebAssembly opcode, which
523// traps on invalid/overflow.
526 const TargetInstrInfo &TII,
527 bool IsUnsigned, bool Int64,
528 bool Float64, unsigned LoweredOpcode) {
530
531 Register OutReg = MI.getOperand(0).getReg();
532 Register InReg = MI.getOperand(1).getReg();
533
534 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
535 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
536 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
537 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
538 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
539 unsigned Eqz = WebAssembly::EQZ_I32;
540 unsigned And = WebAssembly::AND_I32;
541 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
542 int64_t Substitute = IsUnsigned ? 0 : Limit;
543 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
544 auto &Context = BB->getParent()->getFunction().getContext();
545 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
546
547 const BasicBlock *LLVMBB = BB->getBasicBlock();
548 MachineFunction *F = BB->getParent();
549 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
550 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
551 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
552
554 F->insert(It, FalseMBB);
555 F->insert(It, TrueMBB);
556 F->insert(It, DoneMBB);
557
558 // Transfer the remainder of BB and its successor edges to DoneMBB.
559 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
561
562 BB->addSuccessor(TrueMBB);
563 BB->addSuccessor(FalseMBB);
564 TrueMBB->addSuccessor(DoneMBB);
565 FalseMBB->addSuccessor(DoneMBB);
566
567 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
568 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
569 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
570 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
571 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
572 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
573 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
574
575 MI.eraseFromParent();
576 // For signed numbers, we can do a single comparison to determine whether
577 // fabs(x) is within range.
578 if (IsUnsigned) {
579 Tmp0 = InReg;
580 } else {
581 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
582 }
583 BuildMI(BB, DL, TII.get(FConst), Tmp1)
584 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
585 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
586
587 // For unsigned numbers, we have to do a separate comparison with zero.
588 if (IsUnsigned) {
589 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
590 Register SecondCmpReg =
591 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
592 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
593 BuildMI(BB, DL, TII.get(FConst), Tmp1)
594 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
595 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
596 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
597 CmpReg = AndReg;
598 }
599
600 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
601
602 // Create the CFG diamond to select between doing the conversion or using
603 // the substitute value.
604 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
605 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
606 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
607 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
608 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
609 .addReg(FalseReg)
610 .addMBB(FalseMBB)
611 .addReg(TrueReg)
612 .addMBB(TrueMBB);
613
614 return DoneMBB;
615}
616
617// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
618// instuction to handle the zero-length case.
621 const TargetInstrInfo &TII, bool Int64) {
623
624 MachineOperand DstMem = MI.getOperand(0);
625 MachineOperand SrcMem = MI.getOperand(1);
626 MachineOperand Dst = MI.getOperand(2);
627 MachineOperand Src = MI.getOperand(3);
628 MachineOperand Len = MI.getOperand(4);
629
630 // If the length is a constant, we don't actually need the check.
631 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
632 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
633 Def->getOpcode() == WebAssembly::CONST_I64) {
634 if (Def->getOperand(1).getImm() == 0) {
635 // A zero-length memcpy is a no-op.
636 MI.eraseFromParent();
637 return BB;
638 }
639 // A non-zero-length memcpy doesn't need a zero check.
640 unsigned MemoryCopy =
641 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
642 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
643 .add(DstMem)
644 .add(SrcMem)
645 .add(Dst)
646 .add(Src)
647 .add(Len);
648 MI.eraseFromParent();
649 return BB;
650 }
651 }
652
653 // We're going to add an extra use to `Len` to test if it's zero; that
654 // use shouldn't be a kill, even if the original use is.
655 MachineOperand NoKillLen = Len;
656 NoKillLen.setIsKill(false);
657
658 // Decide on which `MachineInstr` opcode we're going to use.
659 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
660 unsigned MemoryCopy =
661 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
662
663 // Create two new basic blocks; one for the new `memory.fill` that we can
664 // branch over, and one for the rest of the instructions after the original
665 // `memory.fill`.
666 const BasicBlock *LLVMBB = BB->getBasicBlock();
667 MachineFunction *F = BB->getParent();
668 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
669 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
670
672 F->insert(It, TrueMBB);
673 F->insert(It, DoneMBB);
674
675 // Transfer the remainder of BB and its successor edges to DoneMBB.
676 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
678
679 // Connect the CFG edges.
680 BB->addSuccessor(TrueMBB);
681 BB->addSuccessor(DoneMBB);
682 TrueMBB->addSuccessor(DoneMBB);
683
684 // Create a virtual register for the `Eqz` result.
685 unsigned EqzReg;
686 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
687
688 // Erase the original `memory.copy`.
689 MI.eraseFromParent();
690
691 // Test if `Len` is zero.
692 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
693
694 // Insert a new `memory.copy`.
695 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
696 .add(DstMem)
697 .add(SrcMem)
698 .add(Dst)
699 .add(Src)
700 .add(Len);
701
702 // Create the CFG triangle.
703 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
704 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
705
706 return DoneMBB;
707}
708
709// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
710// instuction to handle the zero-length case.
713 const TargetInstrInfo &TII, bool Int64) {
715
716 MachineOperand Mem = MI.getOperand(0);
717 MachineOperand Dst = MI.getOperand(1);
718 MachineOperand Val = MI.getOperand(2);
719 MachineOperand Len = MI.getOperand(3);
720
721 // If the length is a constant, we don't actually need the check.
722 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
723 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
724 Def->getOpcode() == WebAssembly::CONST_I64) {
725 if (Def->getOperand(1).getImm() == 0) {
726 // A zero-length memset is a no-op.
727 MI.eraseFromParent();
728 return BB;
729 }
730 // A non-zero-length memset doesn't need a zero check.
731 unsigned MemoryFill =
732 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
733 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
734 .add(Mem)
735 .add(Dst)
736 .add(Val)
737 .add(Len);
738 MI.eraseFromParent();
739 return BB;
740 }
741 }
742
743 // We're going to add an extra use to `Len` to test if it's zero; that
744 // use shouldn't be a kill, even if the original use is.
745 MachineOperand NoKillLen = Len;
746 NoKillLen.setIsKill(false);
747
748 // Decide on which `MachineInstr` opcode we're going to use.
749 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
750 unsigned MemoryFill =
751 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
752
753 // Create two new basic blocks; one for the new `memory.fill` that we can
754 // branch over, and one for the rest of the instructions after the original
755 // `memory.fill`.
756 const BasicBlock *LLVMBB = BB->getBasicBlock();
757 MachineFunction *F = BB->getParent();
758 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
759 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
760
762 F->insert(It, TrueMBB);
763 F->insert(It, DoneMBB);
764
765 // Transfer the remainder of BB and its successor edges to DoneMBB.
766 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
768
769 // Connect the CFG edges.
770 BB->addSuccessor(TrueMBB);
771 BB->addSuccessor(DoneMBB);
772 TrueMBB->addSuccessor(DoneMBB);
773
774 // Create a virtual register for the `Eqz` result.
775 unsigned EqzReg;
776 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
777
778 // Erase the original `memory.fill`.
779 MI.eraseFromParent();
780
781 // Test if `Len` is zero.
782 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
783
784 // Insert a new `memory.copy`.
785 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
786
787 // Create the CFG triangle.
788 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
789 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
790
791 return DoneMBB;
792}
793
794static MachineBasicBlock *
796 const WebAssemblySubtarget *Subtarget,
797 const TargetInstrInfo &TII) {
798 MachineInstr &CallParams = *CallResults.getPrevNode();
799 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
800 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
801 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
802
803 bool IsIndirect =
804 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
805 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
806
807 bool IsFuncrefCall = false;
808 if (IsIndirect && CallParams.getOperand(0).isReg()) {
809 Register Reg = CallParams.getOperand(0).getReg();
810 const MachineFunction *MF = BB->getParent();
811 const MachineRegisterInfo &MRI = MF->getRegInfo();
812 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
813 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
814 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
815 }
816
817 unsigned CallOp;
818 if (IsIndirect && IsRetCall) {
819 CallOp = WebAssembly::RET_CALL_INDIRECT;
820 } else if (IsIndirect) {
821 CallOp = WebAssembly::CALL_INDIRECT;
822 } else if (IsRetCall) {
823 CallOp = WebAssembly::RET_CALL;
824 } else {
825 CallOp = WebAssembly::CALL;
826 }
827
828 MachineFunction &MF = *BB->getParent();
829 const MCInstrDesc &MCID = TII.get(CallOp);
830 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
831
832 // Move the function pointer to the end of the arguments for indirect calls
833 if (IsIndirect) {
834 auto FnPtr = CallParams.getOperand(0);
835 CallParams.removeOperand(0);
836
837 // For funcrefs, call_indirect is done through __funcref_call_table and the
838 // funcref is always installed in slot 0 of the table, therefore instead of
839 // having the function pointer added at the end of the params list, a zero
840 // (the index in
841 // __funcref_call_table is added).
842 if (IsFuncrefCall) {
843 Register RegZero =
844 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
845 MachineInstrBuilder MIBC0 =
846 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
847
848 BB->insert(CallResults.getIterator(), MIBC0);
849 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
850 } else
851 CallParams.addOperand(FnPtr);
852 }
853
854 for (auto Def : CallResults.defs())
855 MIB.add(Def);
856
857 if (IsIndirect) {
858 // Placeholder for the type index.
859 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
860 MIB.addImm(0);
861 // The table into which this call_indirect indexes.
862 MCSymbolWasm *Table = IsFuncrefCall
864 MF.getContext(), Subtarget)
866 MF.getContext(), Subtarget);
867 if (Subtarget->hasCallIndirectOverlong()) {
868 MIB.addSym(Table);
869 } else {
870 // For the MVP there is at most one table whose number is 0, but we can't
871 // write a table symbol or issue relocations. Instead we just ensure the
872 // table is live and write a zero.
873 Table->setNoStrip();
874 MIB.addImm(0);
875 }
876 }
877
878 for (auto Use : CallParams.uses())
879 MIB.add(Use);
880
881 BB->insert(CallResults.getIterator(), MIB);
882 CallParams.eraseFromParent();
883 CallResults.eraseFromParent();
884
885 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
886 // table slot with ref.null upon call_indirect return.
887 //
888 // This generates the following code, which comes right after a call_indirect
889 // of a funcref:
890 //
891 // i32.const 0
892 // ref.null func
893 // table.set __funcref_call_table
894 if (IsIndirect && IsFuncrefCall) {
896 MF.getContext(), Subtarget);
897 Register RegZero =
898 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
899 MachineInstr *Const0 =
900 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
901 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
902
903 Register RegFuncref =
904 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
905 MachineInstr *RefNull =
906 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
907 BB->insertAfter(Const0->getIterator(), RefNull);
908
909 MachineInstr *TableSet =
910 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
911 .addSym(Table)
912 .addReg(RegZero)
913 .addReg(RegFuncref);
914 BB->insertAfter(RefNull->getIterator(), TableSet);
915 }
916
917 return BB;
918}
919
920MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
921 MachineInstr &MI, MachineBasicBlock *BB) const {
922 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
923 DebugLoc DL = MI.getDebugLoc();
924
925 switch (MI.getOpcode()) {
926 default:
927 llvm_unreachable("Unexpected instr type to insert");
928 case WebAssembly::FP_TO_SINT_I32_F32:
929 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
930 WebAssembly::I32_TRUNC_S_F32);
931 case WebAssembly::FP_TO_UINT_I32_F32:
932 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
933 WebAssembly::I32_TRUNC_U_F32);
934 case WebAssembly::FP_TO_SINT_I64_F32:
935 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
936 WebAssembly::I64_TRUNC_S_F32);
937 case WebAssembly::FP_TO_UINT_I64_F32:
938 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
939 WebAssembly::I64_TRUNC_U_F32);
940 case WebAssembly::FP_TO_SINT_I32_F64:
941 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
942 WebAssembly::I32_TRUNC_S_F64);
943 case WebAssembly::FP_TO_UINT_I32_F64:
944 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
945 WebAssembly::I32_TRUNC_U_F64);
946 case WebAssembly::FP_TO_SINT_I64_F64:
947 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
948 WebAssembly::I64_TRUNC_S_F64);
949 case WebAssembly::FP_TO_UINT_I64_F64:
950 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
951 WebAssembly::I64_TRUNC_U_F64);
952 case WebAssembly::MEMCPY_A32:
953 return LowerMemcpy(MI, DL, BB, TII, false);
954 case WebAssembly::MEMCPY_A64:
955 return LowerMemcpy(MI, DL, BB, TII, true);
956 case WebAssembly::MEMSET_A32:
957 return LowerMemset(MI, DL, BB, TII, false);
958 case WebAssembly::MEMSET_A64:
959 return LowerMemset(MI, DL, BB, TII, true);
960 case WebAssembly::CALL_RESULTS:
961 case WebAssembly::RET_CALL_RESULTS:
962 return LowerCallResults(MI, DL, BB, Subtarget, TII);
963 }
964}
965
966std::pair<unsigned, const TargetRegisterClass *>
967WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
968 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
969 // First, see if this is a constraint that directly corresponds to a
970 // WebAssembly register class.
971 if (Constraint.size() == 1) {
972 switch (Constraint[0]) {
973 case 'r':
974 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
975 if (Subtarget->hasSIMD128() && VT.isVector()) {
976 if (VT.getSizeInBits() == 128)
977 return std::make_pair(0U, &WebAssembly::V128RegClass);
978 }
979 if (VT.isInteger() && !VT.isVector()) {
980 if (VT.getSizeInBits() <= 32)
981 return std::make_pair(0U, &WebAssembly::I32RegClass);
982 if (VT.getSizeInBits() <= 64)
983 return std::make_pair(0U, &WebAssembly::I64RegClass);
984 }
985 if (VT.isFloatingPoint() && !VT.isVector()) {
986 switch (VT.getSizeInBits()) {
987 case 32:
988 return std::make_pair(0U, &WebAssembly::F32RegClass);
989 case 64:
990 return std::make_pair(0U, &WebAssembly::F64RegClass);
991 default:
992 break;
993 }
994 }
995 break;
996 default:
997 break;
998 }
999 }
1000
1001 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1002}
1003
1004bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1005 // Assume ctz is a relatively cheap operation.
1006 return true;
1007}
1008
1009bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1010 // Assume clz is a relatively cheap operation.
1011 return true;
1012}
1013
1014bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1015 const AddrMode &AM,
1016 Type *Ty, unsigned AS,
1017 Instruction *I) const {
1018 // WebAssembly offsets are added as unsigned without wrapping. The
1019 // isLegalAddressingMode gives us no way to determine if wrapping could be
1020 // happening, so we approximate this by accepting only non-negative offsets.
1021 if (AM.BaseOffs < 0)
1022 return false;
1023
1024 // WebAssembly has no scale register operands.
1025 if (AM.Scale != 0)
1026 return false;
1027
1028 // Everything else is legal.
1029 return true;
1030}
1031
1032bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1033 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1034 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1035 // WebAssembly supports unaligned accesses, though it should be declared
1036 // with the p2align attribute on loads and stores which do so, and there
1037 // may be a performance impact. We tell LLVM they're "fast" because
1038 // for the kinds of things that LLVM uses this for (merging adjacent stores
1039 // of constants, etc.), WebAssembly implementations will either want the
1040 // unaligned access or they'll split anyway.
1041 if (Fast)
1042 *Fast = 1;
1043 return true;
1044}
1045
1046bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1047 AttributeList Attr) const {
1048 // The current thinking is that wasm engines will perform this optimization,
1049 // so we can save on code size.
1050 return true;
1051}
1052
1053bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1054 EVT ExtT = ExtVal.getValueType();
1055 SDValue N0 = peekThroughFreeze(ExtVal->getOperand(0));
1056 auto *Load = dyn_cast<LoadSDNode>(N0);
1057 if (!Load)
1058 return false;
1059 EVT MemT = Load->getValueType(0);
1060 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1061 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1062 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1063}
1064
1065bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1066 const GlobalAddressSDNode *GA) const {
1067 // Wasm doesn't support function addresses with offsets
1068 const GlobalValue *GV = GA->getGlobal();
1070}
1071
1072EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1073 LLVMContext &C,
1074 EVT VT) const {
1075 if (VT.isVector()) {
1076 if (VT.getVectorElementType() == MVT::f16 && !Subtarget->hasFP16())
1077 return VT.changeElementType(C, MVT::i1);
1078
1080 }
1081
1082 // So far, all branch instructions in Wasm take an I32 condition.
1083 // The default TargetLowering::getSetCCResultType returns the pointer size,
1084 // which would be useful to reduce instruction counts when testing
1085 // against 64-bit pointers/values if at some point Wasm supports that.
1086 return EVT::getIntegerVT(C, 32);
1087}
1088
1089void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1091 MachineFunction &MF, unsigned Intrinsic) const {
1093 switch (Intrinsic) {
1094 case Intrinsic::wasm_memory_atomic_notify:
1096 Info.memVT = MVT::i32;
1097 Info.ptrVal = I.getArgOperand(0);
1098 Info.offset = 0;
1099 Info.align = Align(4);
1100 // atomic.notify instruction does not really load the memory specified with
1101 // this argument, but MachineMemOperand should either be load or store, so
1102 // we set this to a load.
1103 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1104 // instructions are treated as volatiles in the backend, so we should be
1105 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1107 Infos.push_back(Info);
1108 return;
1109 case Intrinsic::wasm_memory_atomic_wait32:
1111 Info.memVT = MVT::i32;
1112 Info.ptrVal = I.getArgOperand(0);
1113 Info.offset = 0;
1114 Info.align = Align(4);
1116 Infos.push_back(Info);
1117 return;
1118 case Intrinsic::wasm_memory_atomic_wait64:
1120 Info.memVT = MVT::i64;
1121 Info.ptrVal = I.getArgOperand(0);
1122 Info.offset = 0;
1123 Info.align = Align(8);
1125 Infos.push_back(Info);
1126 return;
1127 case Intrinsic::wasm_loadf16_f32:
1129 Info.memVT = MVT::f16;
1130 Info.ptrVal = I.getArgOperand(0);
1131 Info.offset = 0;
1132 Info.align = Align(2);
1134 Infos.push_back(Info);
1135 return;
1136 case Intrinsic::wasm_storef16_f32:
1138 Info.memVT = MVT::f16;
1139 Info.ptrVal = I.getArgOperand(1);
1140 Info.offset = 0;
1141 Info.align = Align(2);
1143 Infos.push_back(Info);
1144 return;
1145 default:
1146 return;
1147 }
1148}
1149
1150void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1151 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1152 const SelectionDAG &DAG, unsigned Depth) const {
1153 switch (Op.getOpcode()) {
1154 default:
1155 break;
1157 unsigned IntNo = Op.getConstantOperandVal(0);
1158 switch (IntNo) {
1159 default:
1160 break;
1161 case Intrinsic::wasm_bitmask: {
1162 unsigned BitWidth = Known.getBitWidth();
1163 EVT VT = Op.getOperand(1).getSimpleValueType();
1164 unsigned PossibleBits = VT.getVectorNumElements();
1165 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1166 Known.Zero |= ZeroMask;
1167 break;
1168 }
1169 }
1170 break;
1171 }
1172 case WebAssemblyISD::EXTEND_LOW_U:
1173 case WebAssemblyISD::EXTEND_HIGH_U: {
1174 // We know the high half, of each destination vector element, will be zero.
1175 SDValue SrcOp = Op.getOperand(0);
1176 EVT VT = SrcOp.getSimpleValueType();
1177 unsigned BitWidth = Known.getBitWidth();
1178 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1179 assert(BitWidth >= 8 && "Unexpected width!");
1181 Known.Zero |= Mask;
1182 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1183 assert(BitWidth >= 16 && "Unexpected width!");
1185 Known.Zero |= Mask;
1186 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1187 assert(BitWidth >= 32 && "Unexpected width!");
1189 Known.Zero |= Mask;
1190 }
1191 break;
1192 }
1193 // For 128-bit addition if the upper bits are all zero then it's known that
1194 // the upper bits of the result will have all bits guaranteed zero except the
1195 // first.
1196 case WebAssemblyISD::I64_ADD128:
1197 if (Op.getResNo() == 1) {
1198 SDValue LHS_HI = Op.getOperand(1);
1199 SDValue RHS_HI = Op.getOperand(3);
1200 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1201 Known.Zero.setBitsFrom(1);
1202 }
1203 break;
1204 }
1205}
1206
1208WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1209 if (VT.isFixedLengthVector()) {
1210 MVT EltVT = VT.getVectorElementType();
1211 // We have legal vector types with these lane types, so widening the
1212 // vector would let us use some of the lanes directly without having to
1213 // extend or truncate values.
1214 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1215 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1216 return TypeWidenVector;
1217 }
1218
1220}
1221
1222bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1223 const MachineFunction &MF, EVT VT) const {
1224 if (!Subtarget->hasFP16() || !VT.isVector())
1225 return false;
1226
1227 EVT ScalarVT = VT.getScalarType();
1228 if (!ScalarVT.isSimple())
1229 return false;
1230
1231 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1232}
1233
1234bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1235 SDValue Op, const TargetLoweringOpt &TLO) const {
1236 // ISel process runs DAGCombiner after legalization; this step is called
1237 // SelectionDAG optimization phase. This post-legalization combining process
1238 // runs DAGCombiner on each node, and if there was a change to be made,
1239 // re-runs legalization again on it and its user nodes to make sure
1240 // everythiing is in a legalized state.
1241 //
1242 // The legalization calls lowering routines, and we do our custom lowering for
1243 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1244 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1245 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1246 // turns unused vector elements into undefs. But this routine does not work
1247 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1248 // combination can result in a infinite loop, in which undefs are converted to
1249 // zeros in legalization and back to undefs in combining.
1250 //
1251 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1252 // running for build_vectors.
1253 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1254 return false;
1255 return true;
1256}
1257
1258//===----------------------------------------------------------------------===//
1259// WebAssembly Lowering private implementation.
1260//===----------------------------------------------------------------------===//
1261
1262//===----------------------------------------------------------------------===//
1263// Lowering Code
1264//===----------------------------------------------------------------------===//
1265
1266static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1268 DAG.getContext()->diagnose(
1269 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1270}
1271
1272// Test whether the given calling convention is supported.
1274 // We currently support the language-independent target-independent
1275 // conventions. We don't yet have a way to annotate calls with properties like
1276 // "cold", and we don't have any call-clobbered registers, so these are mostly
1277 // all handled the same.
1278 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1279 CallConv == CallingConv::Cold ||
1280 CallConv == CallingConv::PreserveMost ||
1281 CallConv == CallingConv::PreserveAll ||
1282 CallConv == CallingConv::CXX_FAST_TLS ||
1284 CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail;
1285}
1286
1287SDValue
1288WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1289 SmallVectorImpl<SDValue> &InVals) const {
1290 SelectionDAG &DAG = CLI.DAG;
1291 SDLoc DL = CLI.DL;
1292 SDValue Chain = CLI.Chain;
1293 SDValue Callee = CLI.Callee;
1294 MachineFunction &MF = DAG.getMachineFunction();
1295 auto Layout = MF.getDataLayout();
1296
1297 CallingConv::ID CallConv = CLI.CallConv;
1298 if (!callingConvSupported(CallConv))
1299 fail(DL, DAG,
1300 "WebAssembly doesn't support language-specific or target-specific "
1301 "calling conventions yet");
1302 if (CLI.IsPatchPoint)
1303 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1304
1305 if (CLI.IsTailCall) {
1306 auto NoTail = [&](const char *Msg) {
1307 if (CLI.CB && CLI.CB->isMustTailCall())
1308 fail(DL, DAG, Msg);
1309 CLI.IsTailCall = false;
1310 };
1311
1312 if (!Subtarget->hasTailCall())
1313 NoTail("WebAssembly 'tail-call' feature not enabled");
1314
1315 // Varargs calls cannot be tail calls because the buffer is on the stack
1316 if (CLI.IsVarArg)
1317 NoTail("WebAssembly does not support varargs tail calls");
1318
1319 // Do not tail call unless caller and callee return types match
1320 const Function &F = MF.getFunction();
1321 const TargetMachine &TM = getTargetMachine();
1322 Type *RetTy = F.getReturnType();
1323 SmallVector<MVT, 4> CallerRetTys;
1324 SmallVector<MVT, 4> CalleeRetTys;
1325 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1326 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1327 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1328 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1329 CalleeRetTys.begin());
1330 if (!TypesMatch)
1331 NoTail("WebAssembly tail call requires caller and callee return types to "
1332 "match");
1333
1334 // If pointers to local stack values are passed, we cannot tail call
1335 if (CLI.CB) {
1336 for (auto &Arg : CLI.CB->args()) {
1337 Value *Val = Arg.get();
1338 // Trace the value back through pointer operations
1339 while (true) {
1340 Value *Src = Val->stripPointerCastsAndAliases();
1341 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1342 Src = GEP->getPointerOperand();
1343 if (Val == Src)
1344 break;
1345 Val = Src;
1346 }
1347 if (isa<AllocaInst>(Val)) {
1348 NoTail(
1349 "WebAssembly does not support tail calling with stack arguments");
1350 break;
1351 }
1352 }
1353 }
1354 }
1355
1356 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1357 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1358 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1359
1360 // The generic code may have added an sret argument. If we're lowering an
1361 // invoke function, the ABI requires that the function pointer be the first
1362 // argument, so we may have to swap the arguments.
1363 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1364 Outs[0].Flags.isSRet()) {
1365 std::swap(Outs[0], Outs[1]);
1366 std::swap(OutVals[0], OutVals[1]);
1367 }
1368
1369 bool HasSwiftSelfArg = false;
1370 bool HasSwiftErrorArg = false;
1371 bool HasSwiftAsyncArg = false;
1372 unsigned NumFixedArgs = 0;
1373 for (unsigned I = 0; I < Outs.size(); ++I) {
1374 const ISD::OutputArg &Out = Outs[I];
1375 SDValue &OutVal = OutVals[I];
1376 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1377 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1378 HasSwiftAsyncArg |= Out.Flags.isSwiftAsync();
1379 if (Out.Flags.isNest())
1380 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1381 if (Out.Flags.isInAlloca())
1382 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1383 if (Out.Flags.isInConsecutiveRegs())
1384 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1386 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1387 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1388 auto &MFI = MF.getFrameInfo();
1389 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1391 /*isSS=*/false);
1392 SDValue SizeNode =
1393 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1394 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1395 Align Alignment = Out.Flags.getNonZeroByValAlign();
1396 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode, Alignment,
1397 Alignment,
1398 /*isVolatile*/ false, /*AlwaysInline=*/false,
1399 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1400 MachinePointerInfo());
1401 OutVal = FINode;
1402 }
1403 // Count the number of fixed args *after* legalization.
1404 NumFixedArgs += !Out.Flags.isVarArg();
1405 }
1406
1407 bool IsVarArg = CLI.IsVarArg;
1408 auto PtrVT = getPointerTy(Layout);
1409
1410 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1411 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1412 // arguments are also added for callee signature. They are necessary to match
1413 // callee and caller signature for indirect call.
1414 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1415 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1416 if (!HasSwiftSelfArg) {
1417 NumFixedArgs++;
1418 ISD::ArgFlagsTy Flags;
1419 Flags.setSwiftSelf();
1420 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1421 CLI.Outs.push_back(Arg);
1422 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1423 CLI.OutVals.push_back(ArgVal);
1424 }
1425 if (!HasSwiftErrorArg) {
1426 NumFixedArgs++;
1427 ISD::ArgFlagsTy Flags;
1428 Flags.setSwiftError();
1429 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1430 CLI.Outs.push_back(Arg);
1431 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1432 CLI.OutVals.push_back(ArgVal);
1433 }
1434 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1435 NumFixedArgs++;
1436 ISD::ArgFlagsTy Flags;
1437 Flags.setSwiftAsync();
1438 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1439 CLI.Outs.push_back(Arg);
1440 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1441 CLI.OutVals.push_back(ArgVal);
1442 }
1443 }
1444
1445 // Analyze operands of the call, assigning locations to each operand.
1447 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1448
1449 if (IsVarArg) {
1450 // Outgoing non-fixed arguments are placed in a buffer. First
1451 // compute their offsets and the total amount of buffer space needed.
1452 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1453 const ISD::OutputArg &Out = Outs[I];
1454 SDValue &Arg = OutVals[I];
1455 EVT VT = Arg.getValueType();
1456 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1457 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1458 Align Alignment =
1459 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1460 unsigned Offset =
1461 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1462 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1463 Offset, VT.getSimpleVT(),
1465 }
1466 }
1467
1468 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1469
1470 SDValue FINode;
1471 if (IsVarArg && NumBytes) {
1472 // For non-fixed arguments, next emit stores to store the argument values
1473 // to the stack buffer at the offsets computed above.
1474 MaybeAlign StackAlign = Layout.getStackAlignment();
1475 assert(StackAlign && "data layout string is missing stack alignment");
1476 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1477 /*isSS=*/false);
1478 unsigned ValNo = 0;
1480 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1481 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1482 "ArgLocs should remain in order and only hold varargs args");
1483 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1484 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1485 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1486 DAG.getConstant(Offset, DL, PtrVT));
1487 Chains.push_back(
1488 DAG.getStore(Chain, DL, Arg, Add,
1490 }
1491 if (!Chains.empty())
1492 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1493 } else if (IsVarArg) {
1494 FINode = DAG.getIntPtrConstant(0, DL);
1495 }
1496
1497 if (Callee->getOpcode() == ISD::GlobalAddress) {
1498 // If the callee is a GlobalAddress node (quite common, every direct call
1499 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1500 // doesn't at MO_GOT which is not needed for direct calls.
1501 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1504 GA->getOffset());
1505 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1506 getPointerTy(DAG.getDataLayout()), Callee);
1507 }
1508
1509 // Compute the operands for the CALLn node.
1511 Ops.push_back(Chain);
1512 Ops.push_back(Callee);
1513
1514 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1515 // isn't reliable.
1516 Ops.append(OutVals.begin(),
1517 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1518 // Add a pointer to the vararg buffer.
1519 if (IsVarArg)
1520 Ops.push_back(FINode);
1521
1522 SmallVector<EVT, 8> InTys;
1523 for (const auto &In : Ins) {
1524 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1525 assert(!In.Flags.isNest() && "nest is not valid for return values");
1526 if (In.Flags.isInAlloca())
1527 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1528 if (In.Flags.isInConsecutiveRegs())
1529 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1530 if (In.Flags.isInConsecutiveRegsLast())
1531 fail(DL, DAG,
1532 "WebAssembly hasn't implemented cons regs last return values");
1533 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1534 // registers.
1535 InTys.push_back(In.VT);
1536 }
1537
1538 // Lastly, if this is a call to a funcref we need to add an instruction
1539 // table.set to the chain and transform the call.
1541 CLI.CB->getCalledOperand()->getType())) {
1542 // In the absence of function references proposal where a funcref call is
1543 // lowered to call_ref, using reference types we generate a table.set to set
1544 // the funcref to a special table used solely for this purpose, followed by
1545 // a call_indirect. Here we just generate the table set, and return the
1546 // SDValue of the table.set so that LowerCall can finalize the lowering by
1547 // generating the call_indirect.
1548 SDValue Chain = Ops[0];
1549
1551 MF.getContext(), Subtarget);
1552 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1553 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1554 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1555 SDValue TableSet = DAG.getMemIntrinsicNode(
1556 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1557 MVT::funcref,
1558 // Machine Mem Operand args
1559 MachinePointerInfo(
1561 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1563
1564 Ops[0] = TableSet; // The new chain is the TableSet itself
1565 }
1566
1567 if (CLI.IsTailCall) {
1568 // ret_calls do not return values to the current frame
1569 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1570 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1571 }
1572
1573 InTys.push_back(MVT::Other);
1574 SDVTList InTyList = DAG.getVTList(InTys);
1575 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1576
1577 for (size_t I = 0; I < Ins.size(); ++I)
1578 InVals.push_back(Res.getValue(I));
1579
1580 // Return the chain
1581 return Res.getValue(Ins.size());
1582}
1583
1584bool WebAssemblyTargetLowering::CanLowerReturn(
1585 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1586 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1587 const Type *RetTy) const {
1588 // WebAssembly can only handle returning tuples with multivalue enabled
1589 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1590}
1591
1592SDValue WebAssemblyTargetLowering::LowerReturn(
1593 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1595 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1596 SelectionDAG &DAG) const {
1597 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1598 "MVP WebAssembly can only return up to one value");
1599 if (!callingConvSupported(CallConv))
1600 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1601
1602 SmallVector<SDValue, 4> RetOps(1, Chain);
1603 RetOps.append(OutVals.begin(), OutVals.end());
1604 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1605
1606 // Record the number and types of the return values.
1607 for (const ISD::OutputArg &Out : Outs) {
1608 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1609 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1610 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1611 if (Out.Flags.isInAlloca())
1612 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1613 if (Out.Flags.isInConsecutiveRegs())
1614 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1616 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1617 }
1618
1619 return Chain;
1620}
1621
1622SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1623 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1624 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1625 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1626 if (!callingConvSupported(CallConv))
1627 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1628
1629 MachineFunction &MF = DAG.getMachineFunction();
1630 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1631
1632 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1633 // of the incoming values before they're represented by virtual registers.
1634 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1635
1636 bool HasSwiftErrorArg = false;
1637 bool HasSwiftSelfArg = false;
1638 bool HasSwiftAsyncArg = false;
1639 for (const ISD::InputArg &In : Ins) {
1640 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1641 HasSwiftErrorArg |= In.Flags.isSwiftError();
1642 HasSwiftAsyncArg |= In.Flags.isSwiftAsync();
1643 if (In.Flags.isInAlloca())
1644 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1645 if (In.Flags.isNest())
1646 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1647 if (In.Flags.isInConsecutiveRegs())
1648 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1649 if (In.Flags.isInConsecutiveRegsLast())
1650 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1651 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1652 // registers.
1653 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1654 DAG.getTargetConstant(InVals.size(),
1655 DL, MVT::i32))
1656 : DAG.getUNDEF(In.VT));
1657
1658 // Record the number and types of arguments.
1659 MFI->addParam(In.VT);
1660 }
1661
1662 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1663 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1664 // arguments are also added for callee signature. They are necessary to match
1665 // callee and caller signature for indirect call.
1666 auto PtrVT = getPointerTy(MF.getDataLayout());
1667 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1668 if (!HasSwiftSelfArg) {
1669 MFI->addParam(PtrVT);
1670 }
1671 if (!HasSwiftErrorArg) {
1672 MFI->addParam(PtrVT);
1673 }
1674 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1675 MFI->addParam(PtrVT);
1676 }
1677 }
1678 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1679 // the buffer is passed as an argument.
1680 if (IsVarArg) {
1681 MVT PtrVT = getPointerTy(MF.getDataLayout());
1682 Register VarargVreg =
1684 MFI->setVarargBufferVreg(VarargVreg);
1685 Chain = DAG.getCopyToReg(
1686 Chain, DL, VarargVreg,
1687 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1688 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1689 MFI->addParam(PtrVT);
1690 }
1691
1692 // Record the number and types of arguments and results.
1693 SmallVector<MVT, 4> Params;
1696 MF.getFunction(), DAG.getTarget(), Params, Results);
1697 for (MVT VT : Results)
1698 MFI->addResult(VT);
1699 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1700 // the param logic here with ComputeSignatureVTs
1701 assert(MFI->getParams().size() == Params.size() &&
1702 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1703 Params.begin()));
1704
1705 return Chain;
1706}
1707
1708void WebAssemblyTargetLowering::ReplaceNodeResults(
1710 switch (N->getOpcode()) {
1712 // Do not add any results, signifying that N should not be custom lowered
1713 // after all. This happens because simd128 turns on custom lowering for
1714 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1715 // illegal type.
1716 break;
1720 // Do not add any results, signifying that N should not be custom lowered.
1721 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1722 break;
1723 case ISD::FP_ROUND: {
1724 EVT VT = N->getValueType(0);
1725 SDValue Src = N->getOperand(0);
1726 if (VT == MVT::v4f16 && Src.getValueType() == MVT::v4f32) {
1727 Results.push_back(
1728 DAG.getNode(WebAssemblyISD::DEMOTE_ZERO, SDLoc(N), MVT::v8f16, Src));
1729 }
1730 break;
1731 }
1732 case ISD::ADD:
1733 case ISD::SUB:
1734 Results.push_back(Replace128Op(N, DAG));
1735 break;
1736 default:
1738 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1739 }
1740}
1741
1742//===----------------------------------------------------------------------===//
1743// Custom lowering hooks.
1744//===----------------------------------------------------------------------===//
1745
1746SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1747 SelectionDAG &DAG) const {
1748 SDLoc DL(Op);
1749 switch (Op.getOpcode()) {
1750 default:
1751 llvm_unreachable("unimplemented operation lowering");
1752 return SDValue();
1753 case ISD::FrameIndex:
1754 return LowerFrameIndex(Op, DAG);
1755 case ISD::GlobalAddress:
1756 return LowerGlobalAddress(Op, DAG);
1758 return LowerGlobalTLSAddress(Op, DAG);
1760 return LowerExternalSymbol(Op, DAG);
1761 case ISD::JumpTable:
1762 return LowerJumpTable(Op, DAG);
1763 case ISD::BR_JT:
1764 return LowerBR_JT(Op, DAG);
1765 case ISD::VASTART:
1766 return LowerVASTART(Op, DAG);
1767 case ISD::BlockAddress:
1768 case ISD::BRIND:
1769 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1770 return SDValue();
1771 case ISD::RETURNADDR:
1772 return LowerRETURNADDR(Op, DAG);
1773 case ISD::FRAMEADDR:
1774 return LowerFRAMEADDR(Op, DAG);
1775 case ISD::CopyToReg:
1776 return LowerCopyToReg(Op, DAG);
1779 return LowerAccessVectorElement(Op, DAG);
1783 return LowerIntrinsic(Op, DAG);
1785 return LowerSIGN_EXTEND_INREG(Op, DAG);
1789 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1790 case ISD::BUILD_VECTOR:
1791 return LowerBUILD_VECTOR(Op, DAG);
1793 return LowerVECTOR_SHUFFLE(Op, DAG);
1794 case ISD::SETCC:
1795 return LowerSETCC(Op, DAG);
1796 case ISD::SHL:
1797 case ISD::SRA:
1798 case ISD::SRL:
1799 return LowerShift(Op, DAG);
1802 return LowerFP_TO_INT_SAT(Op, DAG);
1803 case ISD::FMINNUM:
1804 case ISD::FMINIMUMNUM:
1805 return LowerFMIN(Op, DAG);
1806 case ISD::FMAXNUM:
1807 case ISD::FMAXIMUMNUM:
1808 return LowerFMAX(Op, DAG);
1809 case ISD::LOAD:
1810 return LowerLoad(Op, DAG);
1811 case ISD::STORE:
1812 return LowerStore(Op, DAG);
1813 case ISD::CTPOP:
1814 case ISD::CTLZ:
1815 case ISD::CTTZ:
1816 return DAG.UnrollVectorOp(Op.getNode());
1817 case ISD::CLEAR_CACHE:
1818 report_fatal_error("llvm.clear_cache is not supported on wasm");
1819 case ISD::SMUL_LOHI:
1820 case ISD::UMUL_LOHI:
1821 return LowerMUL_LOHI(Op, DAG);
1822 case ISD::UADDO:
1823 return LowerUADDO(Op, DAG);
1824 }
1825}
1826
1830
1831 return false;
1832}
1833
1834static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1835 SelectionDAG &DAG) {
1837 if (!FI)
1838 return std::nullopt;
1839
1840 auto &MF = DAG.getMachineFunction();
1842}
1843
1844SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1845 SelectionDAG &DAG) const {
1846 SDLoc DL(Op);
1847 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1848 const SDValue &Value = SN->getValue();
1849 const SDValue &Base = SN->getBasePtr();
1850 const SDValue &Offset = SN->getOffset();
1851
1853 if (!Offset->isUndef())
1854 report_fatal_error("unexpected offset when storing to webassembly global",
1855 false);
1856
1857 SDVTList Tys = DAG.getVTList(MVT::Other);
1858 SDValue Ops[] = {SN->getChain(), Value, Base};
1859 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1860 SN->getMemoryVT(), SN->getMemOperand());
1861 }
1862
1863 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1864 if (!Offset->isUndef())
1865 report_fatal_error("unexpected offset when storing to webassembly local",
1866 false);
1867
1868 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1869 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1870 SDValue Ops[] = {SN->getChain(), Idx, Value};
1871 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1872 }
1873
1876 "Encountered an unlowerable store to the wasm_var address space",
1877 false);
1878
1879 return Op;
1880}
1881
1882SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1883 SelectionDAG &DAG) const {
1884 SDLoc DL(Op);
1885 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1886 const SDValue &Base = LN->getBasePtr();
1887 const SDValue &Offset = LN->getOffset();
1888
1890 if (!Offset->isUndef())
1892 "unexpected offset when loading from webassembly global", false);
1893
1894 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1895 SDValue Ops[] = {LN->getChain(), Base};
1896 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1897 LN->getMemoryVT(), LN->getMemOperand());
1898 }
1899
1900 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1901 if (!Offset->isUndef())
1903 "unexpected offset when loading from webassembly local", false);
1904
1905 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1906 EVT LocalVT = LN->getValueType(0);
1907 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1908 {LN->getChain(), Idx});
1909 }
1910
1913 "Encountered an unlowerable load from the wasm_var address space",
1914 false);
1915
1916 return Op;
1917}
1918
1919SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1920 SelectionDAG &DAG) const {
1921 assert(Subtarget->hasWideArithmetic());
1922 assert(Op.getValueType() == MVT::i64);
1923 SDLoc DL(Op);
1924 unsigned Opcode;
1925 switch (Op.getOpcode()) {
1926 case ISD::UMUL_LOHI:
1927 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1928 break;
1929 case ISD::SMUL_LOHI:
1930 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1931 break;
1932 default:
1933 llvm_unreachable("unexpected opcode");
1934 }
1935 SDValue LHS = Op.getOperand(0);
1936 SDValue RHS = Op.getOperand(1);
1937 SDValue Lo =
1938 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1939 SDValue Hi(Lo.getNode(), 1);
1940 SDValue Ops[] = {Lo, Hi};
1941 return DAG.getMergeValues(Ops, DL);
1942}
1943
1944// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1945//
1946// This enables generating a single wasm instruction for this operation where
1947// the upper half of both operands are constant zeros. The upper half of the
1948// result is then whether the overflow happened.
1949SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1950 SelectionDAG &DAG) const {
1951 assert(Subtarget->hasWideArithmetic());
1952 assert(Op.getValueType() == MVT::i64);
1953 assert(Op.getOpcode() == ISD::UADDO);
1954 SDLoc DL(Op);
1955 SDValue LHS = Op.getOperand(0);
1956 SDValue RHS = Op.getOperand(1);
1957 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1958 SDValue Result =
1959 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1960 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1961 SDValue CarryI64(Result.getNode(), 1);
1962 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1963 SDValue Ops[] = {Result, CarryI32};
1964 return DAG.getMergeValues(Ops, DL);
1965}
1966
1967SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1968 SelectionDAG &DAG) const {
1969 assert(Subtarget->hasWideArithmetic());
1970 assert(N->getValueType(0) == MVT::i128);
1971 SDLoc DL(N);
1972 unsigned Opcode;
1973 switch (N->getOpcode()) {
1974 case ISD::ADD:
1975 Opcode = WebAssemblyISD::I64_ADD128;
1976 break;
1977 case ISD::SUB:
1978 Opcode = WebAssemblyISD::I64_SUB128;
1979 break;
1980 default:
1981 llvm_unreachable("unexpected opcode");
1982 }
1983 SDValue LHS = N->getOperand(0);
1984 SDValue RHS = N->getOperand(1);
1985
1986 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1987 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1988 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1989 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1990 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1991 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1992 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1993 LHS_0, LHS_1, RHS_0, RHS_1);
1994 SDValue Result_HI(Result_LO.getNode(), 1);
1995 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1996}
1997
1998SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1999 SelectionDAG &DAG) const {
2000 SDValue Src = Op.getOperand(2);
2001 if (isa<FrameIndexSDNode>(Src.getNode())) {
2002 // CopyToReg nodes don't support FrameIndex operands. Other targets select
2003 // the FI to some LEA-like instruction, but since we don't have that, we
2004 // need to insert some kind of instruction that can take an FI operand and
2005 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
2006 // local.copy between Op and its FI operand.
2007 SDValue Chain = Op.getOperand(0);
2008 SDLoc DL(Op);
2009 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
2010 EVT VT = Src.getValueType();
2011 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
2012 : WebAssembly::COPY_I64,
2013 DL, VT, Src),
2014 0);
2015 return Op.getNode()->getNumValues() == 1
2016 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
2017 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
2018 Op.getNumOperands() == 4 ? Op.getOperand(3)
2019 : SDValue());
2020 }
2021 return SDValue();
2022}
2023
2024SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
2025 SelectionDAG &DAG) const {
2026 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
2027 return DAG.getTargetFrameIndex(FI, Op.getValueType());
2028}
2029
2030SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
2031 SelectionDAG &DAG) const {
2032 SDLoc DL(Op);
2033
2034 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
2035 fail(DL, DAG,
2036 "Non-Emscripten WebAssembly hasn't implemented "
2037 "__builtin_return_address");
2038 return SDValue();
2039 }
2040
2041 unsigned Depth = Op.getConstantOperandVal(0);
2042 MakeLibCallOptions CallOptions;
2043 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
2044 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
2045 .first;
2046}
2047
2048SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2049 SelectionDAG &DAG) const {
2050 // Non-zero depths are not supported by WebAssembly currently. Use the
2051 // legalizer's default expansion, which is to return 0 (what this function is
2052 // documented to do).
2053 if (Op.getConstantOperandVal(0) > 0)
2054 return SDValue();
2055
2057 EVT VT = Op.getValueType();
2058 Register FP =
2059 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2060 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2061}
2062
2063SDValue
2064WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2065 SelectionDAG &DAG) const {
2066 SDLoc DL(Op);
2067 const auto *GA = cast<GlobalAddressSDNode>(Op);
2068
2069 MachineFunction &MF = DAG.getMachineFunction();
2070 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2071 report_fatal_error("cannot use thread-local storage without bulk memory",
2072 false);
2073
2074 const GlobalValue *GV = GA->getGlobal();
2075
2076 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2077 // on other targets, if we have thread-local storage, only the local-exec
2078 // model is possible.
2079 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2080 ? GV->getThreadLocalMode()
2082
2083 // Unsupported TLS modes
2086
2087 if (model == GlobalValue::LocalExecTLSModel ||
2090 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2091 // For DSO-local TLS variables we use offset from __tls_base, or
2092 // __wasm_get_tls_base() if using libcall thread context.
2093
2094 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2095 SDValue BaseAddr(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
2096
2097 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2098 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2099 SDValue SymOffset =
2100 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2101
2102 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2103 }
2104
2106
2107 EVT VT = Op.getValueType();
2108 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2109 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2110 GA->getOffset(),
2112}
2113
2114SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2115 SelectionDAG &DAG) const {
2116 SDLoc DL(Op);
2117 const auto *GA = cast<GlobalAddressSDNode>(Op);
2118 EVT VT = Op.getValueType();
2119 assert(GA->getTargetFlags() == 0 &&
2120 "Unexpected target flags on generic GlobalAddressSDNode");
2122 fail(DL, DAG, "Invalid address space for WebAssembly target");
2123
2124 unsigned OperandFlags = 0;
2125 const GlobalValue *GV = GA->getGlobal();
2126 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2127 // need special treatment for tables in PIC mode.
2128 if (isPositionIndependent() &&
2130 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2131 MachineFunction &MF = DAG.getMachineFunction();
2132 MVT PtrVT = getPointerTy(MF.getDataLayout());
2133 const char *BaseName;
2134 if (GV->getValueType()->isFunctionTy()) {
2135 BaseName = MF.createExternalSymbolName("__table_base");
2137 } else {
2138 BaseName = MF.createExternalSymbolName("__memory_base");
2140 }
2142 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2143 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2144
2145 SDValue SymAddr = DAG.getNode(
2146 WebAssemblyISD::WrapperREL, DL, VT,
2147 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2148 OperandFlags));
2149
2150 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2151 }
2153 }
2154
2155 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2156 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2157 GA->getOffset(), OperandFlags));
2158}
2159
2160SDValue
2161WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2162 SelectionDAG &DAG) const {
2163 SDLoc DL(Op);
2164 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2165 EVT VT = Op.getValueType();
2166 assert(ES->getTargetFlags() == 0 &&
2167 "Unexpected target flags on generic ExternalSymbolSDNode");
2168 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2169 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2170}
2171
2172SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2173 SelectionDAG &DAG) const {
2174 // There's no need for a Wrapper node because we always incorporate a jump
2175 // table operand into a BR_TABLE instruction, rather than ever
2176 // materializing it in a register.
2177 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2178 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2179 JT->getTargetFlags());
2180}
2181
2182SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2183 SelectionDAG &DAG) const {
2184 SDLoc DL(Op);
2185 SDValue Chain = Op.getOperand(0);
2186 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2187 SDValue Index = Op.getOperand(2);
2188 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2189
2191 Ops.push_back(Chain);
2192 Ops.push_back(Index);
2193
2194 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2195 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2196
2197 // Add an operand for each case.
2198 for (auto *MBB : MBBs)
2199 Ops.push_back(DAG.getBasicBlock(MBB));
2200
2201 // Add the first MBB as a dummy default target for now. This will be replaced
2202 // with the proper default target (and the preceding range check eliminated)
2203 // if possible by WebAssemblyFixBrTableDefaults.
2204 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2205 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2206}
2207
2208SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2209 SelectionDAG &DAG) const {
2210 SDLoc DL(Op);
2211 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2212
2213 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2214 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2215
2216 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2217 MFI->getVarargBufferVreg(), PtrVT);
2218 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2219 MachinePointerInfo(SV));
2220}
2221
2222SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2223 SelectionDAG &DAG) const {
2224 MachineFunction &MF = DAG.getMachineFunction();
2225 unsigned IntNo;
2226 switch (Op.getOpcode()) {
2229 IntNo = Op.getConstantOperandVal(1);
2230 break;
2232 IntNo = Op.getConstantOperandVal(0);
2233 break;
2234 default:
2235 llvm_unreachable("Invalid intrinsic");
2236 }
2237 SDLoc DL(Op);
2238
2239 switch (IntNo) {
2240 default:
2241 return SDValue(); // Don't custom lower most intrinsics.
2242
2243 case Intrinsic::wasm_lsda: {
2244 auto PtrVT = getPointerTy(MF.getDataLayout());
2245 const char *SymName = MF.createExternalSymbolName(
2246 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2247 if (isPositionIndependent()) {
2249 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2250 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2252 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2253 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2254 SDValue SymAddr =
2255 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2256 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2257 }
2258 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2259 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2260 }
2261
2262 case Intrinsic::wasm_shuffle: {
2263 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2264 SDValue Ops[18];
2265 size_t OpIdx = 0;
2266 Ops[OpIdx++] = Op.getOperand(1);
2267 Ops[OpIdx++] = Op.getOperand(2);
2268 while (OpIdx < 18) {
2269 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2270 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2271 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2272 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2273 } else {
2274 Ops[OpIdx++] = MaskIdx;
2275 }
2276 }
2277 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2278 }
2279
2280 case Intrinsic::thread_pointer: {
2281 return SDValue(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
2282 }
2283 }
2284}
2285
2286SDValue
2287WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2288 SelectionDAG &DAG) const {
2289 SDLoc DL(Op);
2290 // If sign extension operations are disabled, allow sext_inreg only if operand
2291 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2292 // extension operations, but allowing sext_inreg in this context lets us have
2293 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2294 // everywhere would be simpler in this file, but would necessitate large and
2295 // brittle patterns to undo the expansion and select extract_lane_s
2296 // instructions.
2297 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2298 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2299 return SDValue();
2300
2301 const SDValue &Extract = Op.getOperand(0);
2302 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2303 if (VecT.getVectorElementType().getSizeInBits() > 32)
2304 return SDValue();
2305 MVT ExtractedLaneT =
2306 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2307 MVT ExtractedVecT =
2308 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2309 if (ExtractedVecT == VecT)
2310 return Op;
2311
2312 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2313 const SDNode *Index = Extract.getOperand(1).getNode();
2314 if (!isa<ConstantSDNode>(Index))
2315 return SDValue();
2316 unsigned IndexVal = Index->getAsZExtVal();
2317 unsigned Scale =
2318 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2319 assert(Scale > 1);
2320 SDValue NewIndex =
2321 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2322 SDValue NewExtract = DAG.getNode(
2324 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2325 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2326 Op.getOperand(1));
2327}
2328
2329static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2330 SelectionDAG &DAG) {
2331 SDValue Source = peekThroughBitcasts(Op);
2332 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2333 return SDValue();
2334
2335 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2336 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2337 "expected extend_low");
2338 auto *Shuffle = cast<ShuffleVectorSDNode>(Source.getNode());
2339
2340 ArrayRef<int> Mask = Shuffle->getMask();
2341 // Look for a shuffle which moves from the high half to the low half.
2342 size_t FirstIdx = Mask.size() / 2;
2343 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2344 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2345 return SDValue();
2346 }
2347 }
2348
2349 SDLoc DL(Op);
2350 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2351 ? WebAssemblyISD::EXTEND_HIGH_S
2352 : WebAssemblyISD::EXTEND_HIGH_U;
2353 SDValue ShuffleSrc = Shuffle->getOperand(0);
2354 if (Op.getOpcode() == ISD::BITCAST)
2355 ShuffleSrc = DAG.getBitcast(Op.getValueType(), ShuffleSrc);
2356
2357 return DAG.getNode(Opc, DL, VT, ShuffleSrc);
2358}
2359
2360SDValue
2361WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2362 SelectionDAG &DAG) const {
2363 SDLoc DL(Op);
2364 EVT VT = Op.getValueType();
2365 SDValue Src = Op.getOperand(0);
2366 EVT SrcVT = Src.getValueType();
2367
2368 if (SrcVT.getVectorElementType() == MVT::i1 ||
2369 SrcVT.getVectorElementType() == MVT::i64)
2370 return SDValue();
2371
2372 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2373 "Unexpected extension factor.");
2374 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2375
2376 if (Scale != 2 && Scale != 4 && Scale != 8)
2377 return SDValue();
2378
2379 unsigned Ext;
2380 switch (Op.getOpcode()) {
2381 default:
2382 llvm_unreachable("unexpected opcode");
2385 Ext = WebAssemblyISD::EXTEND_LOW_U;
2386 break;
2388 Ext = WebAssemblyISD::EXTEND_LOW_S;
2389 break;
2390 }
2391
2392 if (Scale == 2) {
2393 // See if we can use EXTEND_HIGH.
2394 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2395 return ExtendHigh;
2396 }
2397
2398 SDValue Ret = Src;
2399 while (Scale != 1) {
2400 Ret = DAG.getNode(Ext, DL,
2401 Ret.getValueType()
2404 Ret);
2405 Scale /= 2;
2406 }
2407 assert(Ret.getValueType() == VT);
2408 return Ret;
2409}
2410
2412 SDLoc DL(Op);
2413 if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
2414 return SDValue();
2415
2416 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2417 unsigned &Index) -> bool {
2418 switch (Op.getOpcode()) {
2419 case ISD::SINT_TO_FP:
2420 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2421 break;
2422 case ISD::UINT_TO_FP:
2423 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2424 break;
2425 case ISD::FP_EXTEND:
2426 case ISD::FP16_TO_FP:
2427 Opcode = WebAssemblyISD::PROMOTE_LOW;
2428 break;
2429 default:
2430 return false;
2431 }
2432
2433 auto ExtractVector = Op.getOperand(0);
2434 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2435 return false;
2436
2437 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2438 return false;
2439
2440 SrcVec = ExtractVector.getOperand(0);
2441 Index = ExtractVector.getConstantOperandVal(1);
2442 return true;
2443 };
2444
2445 unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
2446 unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
2447 unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
2448 SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
2449
2450 if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec,
2451 FirstIndex) ||
2452 !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec,
2453 SecondIndex))
2454 return SDValue();
2455
2456 // If we're converting to v4f32, check the third and fourth lanes, too.
2457 if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode,
2458 ThirdSrcVec, ThirdIndex) ||
2459 !GetConvertedLane(Op.getOperand(3), FourthOpcode,
2460 FourthSrcVec, FourthIndex)))
2461 return SDValue();
2462
2463 if (FirstOpcode != SecondOpcode)
2464 return SDValue();
2465
2466 // TODO Add an optimization similar to the v2f64 below for shuffling the
2467 // vectors when the lanes are in the wrong order or come from different src
2468 // vectors.
2469 if (NumLanes == 4 &&
2470 (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
2471 FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec ||
2472 FirstSrcVec != FourthSrcVec || FirstIndex != 0 || SecondIndex != 1 ||
2473 ThirdIndex != 2 || FourthIndex != 3))
2474 return SDValue();
2475
2476 MVT ExpectedSrcVT;
2477 switch (FirstOpcode) {
2478 case WebAssemblyISD::CONVERT_LOW_S:
2479 case WebAssemblyISD::CONVERT_LOW_U:
2480 ExpectedSrcVT = MVT::v4i32;
2481 break;
2482 case WebAssemblyISD::PROMOTE_LOW:
2483 ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
2484 break;
2485 }
2486 if (FirstSrcVec.getValueType() != ExpectedSrcVT)
2487 return SDValue();
2488
2489 auto Src = FirstSrcVec;
2490 if (NumLanes == 2 &&
2491 (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
2492 // Shuffle the source vector so that the converted lanes are the low lanes.
2493 Src = DAG.getVectorShuffle(ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
2494 {static_cast<int>(FirstIndex),
2495 static_cast<int>(SecondIndex) + 4, -1, -1});
2496 }
2497 return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32,
2498 Src);
2499}
2500
2501SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2502 SelectionDAG &DAG) const {
2503 MVT VT = Op.getSimpleValueType();
2504 if (VT == MVT::v8f16) {
2505 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2506 // FP16 type, so cast them to I16s.
2507 MVT IVT = VT.changeVectorElementType(MVT::i16);
2509 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2510 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2511 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2512 return DAG.getBitcast(VT, Res);
2513 }
2514
2515 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2516 return ConvertLow;
2517
2518 SDLoc DL(Op);
2519 const EVT VecT = Op.getValueType();
2520 const EVT LaneT = Op.getOperand(0).getValueType();
2521 const size_t Lanes = Op.getNumOperands();
2522 bool CanSwizzle = VecT == MVT::v16i8;
2523
2524 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2525 // possible number of lanes at once followed by a sequence of replace_lane
2526 // instructions to individually initialize any remaining lanes.
2527
2528 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2529 // swizzled lanes should be given greater weight.
2530
2531 // TODO: Investigate looping rather than always extracting/replacing specific
2532 // lanes to fill gaps.
2533
2534 auto IsConstant = [](const SDValue &V) {
2535 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2536 };
2537
2538 // Returns the source vector and index vector pair if they exist. Checks for:
2539 // (extract_vector_elt
2540 // $src,
2541 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2542 // )
2543 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2544 auto Bail = std::make_pair(SDValue(), SDValue());
2545 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2546 return Bail;
2547 const SDValue &SwizzleSrc = Lane->getOperand(0);
2548 const SDValue &IndexExt = Lane->getOperand(1);
2549 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2550 return Bail;
2551 const SDValue &Index = IndexExt->getOperand(0);
2552 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2553 return Bail;
2554 const SDValue &SwizzleIndices = Index->getOperand(0);
2555 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2556 SwizzleIndices.getValueType() != MVT::v16i8 ||
2557 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2558 Index->getConstantOperandVal(1) != I)
2559 return Bail;
2560 return std::make_pair(SwizzleSrc, SwizzleIndices);
2561 };
2562
2563 // If the lane is extracted from another vector at a constant index, return
2564 // that vector. The source vector must not have more lanes than the dest
2565 // because the shufflevector indices are in terms of the destination lanes and
2566 // would not be able to address the smaller individual source lanes.
2567 auto GetShuffleSrc = [&](const SDValue &Lane) {
2568 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2569 return SDValue();
2570 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2571 return SDValue();
2572 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2573 VecT.getVectorNumElements())
2574 return SDValue();
2575 return Lane->getOperand(0);
2576 };
2577
2578 using ValueEntry = std::pair<SDValue, size_t>;
2579 SmallVector<ValueEntry, 16> SplatValueCounts;
2580
2581 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2582 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2583
2584 using ShuffleEntry = std::pair<SDValue, size_t>;
2585 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2586
2587 auto AddCount = [](auto &Counts, const auto &Val) {
2588 auto CountIt =
2589 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2590 if (CountIt == Counts.end()) {
2591 Counts.emplace_back(Val, 1);
2592 } else {
2593 CountIt->second++;
2594 }
2595 };
2596
2597 auto GetMostCommon = [](auto &Counts) {
2598 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2599 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2600 return *CommonIt;
2601 };
2602
2603 size_t NumConstantLanes = 0;
2604
2605 // Count eligible lanes for each type of vector creation op
2606 for (size_t I = 0; I < Lanes; ++I) {
2607 const SDValue &Lane = Op->getOperand(I);
2608 if (Lane.isUndef())
2609 continue;
2610
2611 AddCount(SplatValueCounts, Lane);
2612
2613 if (IsConstant(Lane))
2614 NumConstantLanes++;
2615 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2616 AddCount(ShuffleCounts, ShuffleSrc);
2617 if (CanSwizzle) {
2618 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2619 if (SwizzleSrcs.first)
2620 AddCount(SwizzleCounts, SwizzleSrcs);
2621 }
2622 }
2623
2624 SDValue SplatValue;
2625 size_t NumSplatLanes;
2626 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2627
2628 SDValue SwizzleSrc;
2629 SDValue SwizzleIndices;
2630 size_t NumSwizzleLanes = 0;
2631 if (SwizzleCounts.size())
2632 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2633 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2634
2635 // Shuffles can draw from up to two vectors, so find the two most common
2636 // sources.
2637 SDValue ShuffleSrc1, ShuffleSrc2;
2638 size_t NumShuffleLanes = 0;
2639 if (ShuffleCounts.size()) {
2640 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2641 llvm::erase_if(ShuffleCounts,
2642 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2643 }
2644 if (ShuffleCounts.size()) {
2645 size_t AdditionalShuffleLanes;
2646 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2647 GetMostCommon(ShuffleCounts);
2648 NumShuffleLanes += AdditionalShuffleLanes;
2649 }
2650
2651 // Predicate returning true if the lane is properly initialized by the
2652 // original instruction
2653 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2655 // Prefer swizzles over shuffles over vector consts over splats
2656 if (NumSwizzleLanes >= NumShuffleLanes &&
2657 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2658 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2659 SwizzleIndices);
2660 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2661 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2662 return Swizzled == GetSwizzleSrcs(I, Lane);
2663 };
2664 } else if (NumShuffleLanes >= NumConstantLanes &&
2665 NumShuffleLanes >= NumSplatLanes) {
2666 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2667 size_t DestLaneCount = VecT.getVectorNumElements();
2668 size_t Scale1 = 1;
2669 size_t Scale2 = 1;
2670 SDValue Src1 = ShuffleSrc1;
2671 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2672 if (Src1.getValueType() != VecT) {
2673 size_t LaneSize =
2675 assert(LaneSize > DestLaneSize);
2676 Scale1 = LaneSize / DestLaneSize;
2677 Src1 = DAG.getBitcast(VecT, Src1);
2678 }
2679 if (Src2.getValueType() != VecT) {
2680 size_t LaneSize =
2682 assert(LaneSize > DestLaneSize);
2683 Scale2 = LaneSize / DestLaneSize;
2684 Src2 = DAG.getBitcast(VecT, Src2);
2685 }
2686
2687 int Mask[16];
2688 assert(DestLaneCount <= 16);
2689 for (size_t I = 0; I < DestLaneCount; ++I) {
2690 const SDValue &Lane = Op->getOperand(I);
2691 SDValue Src = GetShuffleSrc(Lane);
2692 if (Src == ShuffleSrc1) {
2693 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2694 } else if (Src && Src == ShuffleSrc2) {
2695 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2696 } else {
2697 Mask[I] = -1;
2698 }
2699 }
2700 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2701 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2702 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2703 auto Src = GetShuffleSrc(Lane);
2704 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2705 };
2706 } else if (NumConstantLanes >= NumSplatLanes) {
2707 SmallVector<SDValue, 16> ConstLanes;
2708 for (const SDValue &Lane : Op->op_values()) {
2709 if (IsConstant(Lane)) {
2710 // Values may need to be fixed so that they will sign extend to be
2711 // within the expected range during ISel. Check whether the value is in
2712 // bounds based on the lane bit width and if it is out of bounds, lop
2713 // off the extra bits.
2714 uint64_t LaneBits = 128 / Lanes;
2715 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2716 ConstLanes.push_back(DAG.getConstant(
2717 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2718 SDLoc(Lane), LaneT));
2719 } else {
2720 ConstLanes.push_back(Lane);
2721 }
2722 } else if (LaneT.isFloatingPoint()) {
2723 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2724 } else {
2725 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2726 }
2727 }
2728 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2729 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2730 return IsConstant(Lane);
2731 };
2732 } else {
2733 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2734 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2735 (DestLaneSize == 32 || DestLaneSize == 64)) {
2736 // Could be selected to load_zero.
2737 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2738 } else {
2739 // Use a splat (which might be selected as a load splat)
2740 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2741 }
2742 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2743 return Lane == SplatValue;
2744 };
2745 }
2746
2747 assert(Result);
2748 assert(IsLaneConstructed);
2749
2750 // Add replace_lane instructions for any unhandled values
2751 for (size_t I = 0; I < Lanes; ++I) {
2752 const SDValue &Lane = Op->getOperand(I);
2753 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2754 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2755 DAG.getConstant(I, DL, MVT::i32));
2756 }
2757
2758 return Result;
2759}
2760
2761SDValue
2762WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2763 SelectionDAG &DAG) const {
2764 SDLoc DL(Op);
2765 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2766 MVT VecType = Op.getOperand(0).getSimpleValueType();
2767 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2768 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2769
2770 // Space for two vector args and sixteen mask indices
2771 SDValue Ops[18];
2772 size_t OpIdx = 0;
2773 Ops[OpIdx++] = Op.getOperand(0);
2774 Ops[OpIdx++] = Op.getOperand(1);
2775
2776 // Expand mask indices to byte indices and materialize them as operands
2777 for (int M : Mask) {
2778 for (size_t J = 0; J < LaneBytes; ++J) {
2779 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2780 // whole lane of vector input, to allow further reduction at VM. E.g.
2781 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2782 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2783 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2784 }
2785 }
2786
2787 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2788}
2789
2790SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2791 SelectionDAG &DAG) const {
2792 SDLoc DL(Op);
2793 // The legalizer does not know how to expand the unsupported comparison modes
2794 // of i64x2 vectors, so we manually unroll them here.
2795 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2797 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2798 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2799 const SDValue &CC = Op->getOperand(2);
2800 auto MakeLane = [&](unsigned I) {
2801 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2802 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2803 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2804 };
2805 return DAG.getBuildVector(Op->getValueType(0), DL,
2806 {MakeLane(0), MakeLane(1)});
2807}
2808
2809SDValue
2810WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2811 SelectionDAG &DAG) const {
2812 // Allow constant lane indices, expand variable lane indices
2813 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2814 if (isa<ConstantSDNode>(IdxNode)) {
2815 // Ensure the index type is i32 to match the tablegen patterns
2816 uint64_t Idx = IdxNode->getAsZExtVal();
2817 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2818 Ops[Op.getNumOperands() - 1] =
2819 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2820 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2821 }
2822 // Perform default expansion
2823 return SDValue();
2824}
2825
2827 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2828 // 32-bit and 64-bit unrolled shifts will have proper semantics
2829 if (LaneT.bitsGE(MVT::i32))
2830 return DAG.UnrollVectorOp(Op.getNode());
2831 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2832 SDLoc DL(Op);
2833 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2834 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2835 unsigned ShiftOpcode = Op.getOpcode();
2836 SmallVector<SDValue, 16> ShiftedElements;
2837 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2838 SmallVector<SDValue, 16> ShiftElements;
2839 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2840 SmallVector<SDValue, 16> UnrolledOps;
2841 for (size_t i = 0; i < NumLanes; ++i) {
2842 SDValue MaskedShiftValue =
2843 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2844 SDValue ShiftedValue = ShiftedElements[i];
2845 if (ShiftOpcode == ISD::SRA)
2846 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2847 ShiftedValue, DAG.getValueType(LaneT));
2848 UnrolledOps.push_back(
2849 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2850 }
2851 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2852}
2853
2854SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2855 SelectionDAG &DAG) const {
2856 SDLoc DL(Op);
2857 // Only manually lower vector shifts
2858 assert(Op.getSimpleValueType().isVector());
2859
2860 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2861 auto ShiftVal = Op.getOperand(1);
2862
2863 // Try to skip bitmask operation since it is implied inside shift instruction
2864 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2865 if (MaskOp.getOpcode() != ISD::AND)
2866 return MaskOp;
2867 SDValue LHS = MaskOp.getOperand(0);
2868 SDValue RHS = MaskOp.getOperand(1);
2869 if (MaskOp.getValueType().isVector()) {
2870 APInt MaskVal;
2871 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2872 std::swap(LHS, RHS);
2873
2874 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2875 MaskVal == MaskBits)
2876 MaskOp = LHS;
2877 } else {
2878 if (!isa<ConstantSDNode>(RHS.getNode()))
2879 std::swap(LHS, RHS);
2880
2881 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2882 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2883 MaskOp = LHS;
2884 }
2885
2886 return MaskOp;
2887 };
2888
2889 // Skip vector and operation
2890 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2891 ShiftVal = DAG.getSplatValue(ShiftVal);
2892 if (!ShiftVal)
2893 return unrollVectorShift(Op, DAG);
2894
2895 // Skip scalar and operation
2896 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2897 // Use anyext because none of the high bits can affect the shift
2898 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2899
2900 unsigned Opcode;
2901 switch (Op.getOpcode()) {
2902 case ISD::SHL:
2903 Opcode = WebAssemblyISD::VEC_SHL;
2904 break;
2905 case ISD::SRA:
2906 Opcode = WebAssemblyISD::VEC_SHR_S;
2907 break;
2908 case ISD::SRL:
2909 Opcode = WebAssemblyISD::VEC_SHR_U;
2910 break;
2911 default:
2912 llvm_unreachable("unexpected opcode");
2913 }
2914
2915 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2916}
2917
2918SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2919 SelectionDAG &DAG) const {
2920 EVT ResT = Op.getValueType();
2921 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2922
2923 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2924 (SatVT == MVT::i32 || SatVT == MVT::i64))
2925 return Op;
2926
2927 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2928 return Op;
2929
2930 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2931 return Op;
2932
2933 return SDValue();
2934}
2935
2937 return (Op->getFlags().hasNoNaNs() ||
2938 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2939 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2940 (Op->getFlags().hasNoSignedZeros() ||
2941 DAG.isKnownNeverLogicalZero(Op->getOperand(0)) ||
2942 DAG.isKnownNeverLogicalZero(Op->getOperand(1)));
2943}
2944
2945SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2946 SelectionDAG &DAG) const {
2947 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2948 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2949 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2950 }
2951 return SDValue();
2952}
2953
2954SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2955 SelectionDAG &DAG) const {
2956 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2957 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2958 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2959 }
2960 return SDValue();
2961}
2962
2963//===----------------------------------------------------------------------===//
2964// Custom DAG combine hooks
2965//===----------------------------------------------------------------------===//
2966static SDValue
2968 auto &DAG = DCI.DAG;
2969 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2970
2971 // Hoist vector bitcasts that don't change the number of lanes out of unary
2972 // shuffles, where they are less likely to get in the way of other combines.
2973 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2974 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2975 SDValue Bitcast = N->getOperand(0);
2976 if (Bitcast.getOpcode() != ISD::BITCAST)
2977 return SDValue();
2978 if (!N->getOperand(1).isUndef())
2979 return SDValue();
2980 SDValue CastOp = Bitcast.getOperand(0);
2981 EVT SrcType = CastOp.getValueType();
2982 EVT DstType = Bitcast.getValueType();
2983 if (!SrcType.is128BitVector() ||
2984 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2985 return SDValue();
2986 SDValue NewShuffle = DAG.getVectorShuffle(
2987 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2988 return DAG.getBitcast(DstType, NewShuffle);
2989}
2990
2991/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2992/// split up into scalar instructions during legalization, and the vector
2993/// extending instructions are selected in performVectorExtendCombine below.
2994static SDValue
2997 auto &DAG = DCI.DAG;
2998 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2999 N->getOpcode() == ISD::SINT_TO_FP);
3000
3001 EVT InVT = N->getOperand(0)->getValueType(0);
3002 EVT ResVT = N->getValueType(0);
3003 MVT ExtVT;
3004 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
3005 ExtVT = MVT::v4i32;
3006 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
3007 ExtVT = MVT::v2i32;
3008 else
3009 return SDValue();
3010
3011 unsigned Op =
3013 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
3014 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
3015}
3016
3017static SDValue
3020 auto &DAG = DCI.DAG;
3021
3022 SDNodeFlags Flags = N->getFlags();
3023 SDValue Op0 = N->getOperand(0);
3024 EVT VT = N->getValueType(0);
3025
3026 // Optimize uitofp to sitofp when the sign bit is known to be zero.
3027 // Depending on the target (runtime) backend, this might be performance
3028 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
3029 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
3030 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
3031 }
3032
3033 return SDValue();
3034}
3035
3036static SDValue
3038 auto &DAG = DCI.DAG;
3039 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
3040 N->getOpcode() == ISD::ZERO_EXTEND);
3041
3042 EVT ResVT = N->getValueType(0);
3043 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3044 SDLoc DL(N);
3045
3046 if (ResVT == MVT::v16i32 && N->getOperand(0)->getValueType(0) == MVT::v16i8) {
3047 // Use a tree of extend low/high to split and extend the input in two
3048 // layers to avoid doing several shuffles and even more extends.
3049 unsigned LowOp =
3050 IsSext ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3051 unsigned HighOp =
3052 IsSext ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3053 SDValue Input = N->getOperand(0);
3054 SDValue LowHalf = DAG.getNode(LowOp, DL, MVT::v8i16, Input);
3055 SDValue HighHalf = DAG.getNode(HighOp, DL, MVT::v8i16, Input);
3056 SDValue Subvectors[] = {
3057 DAG.getNode(LowOp, DL, MVT::v4i32, LowHalf),
3058 DAG.getNode(HighOp, DL, MVT::v4i32, LowHalf),
3059 DAG.getNode(LowOp, DL, MVT::v4i32, HighHalf),
3060 DAG.getNode(HighOp, DL, MVT::v4i32, HighHalf),
3061 };
3062 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Subvectors);
3063 }
3064
3065 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
3066 // possible before the extract_subvector can be expanded.
3067 auto Extract = N->getOperand(0);
3068 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3069 return SDValue();
3070 auto Source = Extract.getOperand(0);
3071 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
3072 if (IndexNode == nullptr)
3073 return SDValue();
3074 auto Index = IndexNode->getZExtValue();
3075
3076 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3077 // extracted subvector is the low or high half of its source.
3078 if (ResVT == MVT::v8i16) {
3079 if (Extract.getValueType() != MVT::v8i8 ||
3080 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3081 return SDValue();
3082 } else if (ResVT == MVT::v4i32) {
3083 if (Extract.getValueType() != MVT::v4i16 ||
3084 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3085 return SDValue();
3086 } else if (ResVT == MVT::v2i64) {
3087 if (Extract.getValueType() != MVT::v2i32 ||
3088 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3089 return SDValue();
3090 } else {
3091 return SDValue();
3092 }
3093
3094 bool IsLow = Index == 0;
3095
3096 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3097 : WebAssemblyISD::EXTEND_HIGH_S)
3098 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3099 : WebAssemblyISD::EXTEND_HIGH_U);
3100
3101 return DAG.getNode(Op, DL, ResVT, Source);
3102}
3103
3104static SDValue
3106 auto &DAG = DCI.DAG;
3107
3108 auto GetWasmConversionOp = [](unsigned Op) {
3109 switch (Op) {
3111 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3113 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3114 case ISD::FP_ROUND:
3115 return WebAssemblyISD::DEMOTE_ZERO;
3116 }
3117 llvm_unreachable("unexpected op");
3118 };
3119
3120 auto IsZeroSplat = [](SDValue SplatVal) {
3121 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3122 APInt SplatValue, SplatUndef;
3123 unsigned SplatBitSize;
3124 bool HasAnyUndefs;
3125 // Endianness doesn't matter in this context because we are looking for
3126 // an all-zero value.
3127 return Splat &&
3128 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3129 HasAnyUndefs) &&
3130 SplatValue == 0;
3131 };
3132
3133 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3134 // Combine this:
3135 //
3136 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3137 //
3138 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3139 //
3140 // Or this:
3141 //
3142 // (concat_vectors ({v2f32, v4f16} (fp_round ({v2f64, v4f32} $x))),
3143 // ({v2f32, v4f16} (splat 0)))
3144 //
3145 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3146 EVT ResVT;
3147 EVT ExpectedConversionType;
3148 auto Conversion = N->getOperand(0);
3149 auto ConversionOp = Conversion.getOpcode();
3150 switch (ConversionOp) {
3153 ResVT = MVT::v4i32;
3154 ExpectedConversionType = MVT::v2i32;
3155 break;
3156 case ISD::FP_ROUND:
3157 if (Conversion.getValueType() == MVT::v2f32) {
3158 ResVT = MVT::v4f32;
3159 ExpectedConversionType = MVT::v2f32;
3160 } else if (Conversion.getValueType() == MVT::v4f16) {
3161 ResVT = MVT::v8f16;
3162 ExpectedConversionType = MVT::v4f16;
3163 } else {
3164 return SDValue();
3165 }
3166 break;
3167 default:
3168 return SDValue();
3169 }
3170
3171 if (N->getValueType(0) != ResVT)
3172 return SDValue();
3173
3174 if (Conversion.getValueType() != ExpectedConversionType)
3175 return SDValue();
3176
3177 auto Source = Conversion.getOperand(0);
3178 if (!((Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4f32) ||
3179 (Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4i32) ||
3180 (Source.getValueType() == MVT::v4f32 && ResVT == MVT::v8f16)))
3181 return SDValue();
3182
3183 if (!IsZeroSplat(N->getOperand(1)) ||
3184 N->getOperand(1).getValueType() != ExpectedConversionType)
3185 return SDValue();
3186
3187 unsigned Op = GetWasmConversionOp(ConversionOp);
3188 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3189 }
3190
3191 // Combine this:
3192 //
3193 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3194 //
3195 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3196 //
3197 // Or this:
3198 //
3199 // ({v4f32, v8f16} (fp_round (concat_vectors $x,
3200 // ({v2f64, v4f32} (splat 0)))))
3201 //
3202 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3203 EVT ResVT;
3204 auto ConversionOp = N->getOpcode();
3205 switch (ConversionOp) {
3208 ResVT = MVT::v4i32;
3209 break;
3210 case ISD::FP_ROUND:
3211 ResVT = N->getValueType(0);
3212 break;
3213 default:
3214 llvm_unreachable("unexpected op");
3215 }
3216
3217 if (N->getValueType(0) != ResVT)
3218 return SDValue();
3219
3220 auto Concat = N->getOperand(0);
3221 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3222 return SDValue();
3223 EVT ConcatVT = Concat.getValueType();
3224 EVT SourceVT = Concat.getOperand(0).getValueType();
3225
3226 if (!IsZeroSplat(Concat.getOperand(1)))
3227 return SDValue();
3228
3229 if (ConversionOp == ISD::FP_ROUND) {
3230 bool IsF64ToF32 =
3231 ConcatVT == MVT::v4f64 && SourceVT == MVT::v2f64 && ResVT == MVT::v4f32;
3232 bool IsF32ToF16 =
3233 ConcatVT == MVT::v8f32 && SourceVT == MVT::v4f32 && ResVT == MVT::v8f16;
3234 if (!(IsF64ToF32 || IsF32ToF16))
3235 return SDValue();
3236 } else {
3237 if (ConcatVT != MVT::v4f64 || SourceVT != MVT::v2f64 || ResVT != MVT::v4i32)
3238 return SDValue();
3239 }
3240
3241 unsigned Op = GetWasmConversionOp(ConversionOp);
3242 return DAG.getNode(Op, SDLoc(N), ResVT, Concat.getOperand(0));
3243}
3244
3245// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3246static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3247 const SDLoc &DL, unsigned VectorWidth) {
3248 EVT VT = Vec.getValueType();
3249 EVT ElVT = VT.getVectorElementType();
3250 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3251 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3252 VT.getVectorNumElements() / Factor);
3253
3254 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3255 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3256 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3257
3258 // This is the index of the first element of the VectorWidth-bit chunk
3259 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3260 IdxVal &= ~(ElemsPerChunk - 1);
3261
3262 // If the input is a buildvector just emit a smaller one.
3263 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3264 return DAG.getBuildVector(ResultVT, DL,
3265 Vec->ops().slice(IdxVal, ElemsPerChunk));
3266
3267 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3268 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3269}
3270
3271// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3272// is the expected destination value type after recursion. In is the initial
3273// input. Note that the input should have enough leading zero bits to prevent
3274// NARROW_U from saturating results.
3276 SelectionDAG &DAG) {
3277 EVT SrcVT = In.getValueType();
3278
3279 // No truncation required, we might get here due to recursive calls.
3280 if (SrcVT == DstVT)
3281 return In;
3282
3283 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3284 unsigned NumElems = SrcVT.getVectorNumElements();
3285 if (!isPowerOf2_32(NumElems))
3286 return SDValue();
3287 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3288 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3289
3290 LLVMContext &Ctx = *DAG.getContext();
3291 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3292
3293 // Narrow to the largest type possible:
3294 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3295 EVT InVT = MVT::i16, OutVT = MVT::i8;
3296 if (SrcVT.getScalarSizeInBits() > 16) {
3297 InVT = MVT::i32;
3298 OutVT = MVT::i16;
3299 }
3300 unsigned SubSizeInBits = SrcSizeInBits / 2;
3301 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3302 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3303
3304 // Split lower/upper subvectors.
3305 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3306 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3307
3308 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3309 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3310 Lo = DAG.getBitcast(InVT, Lo);
3311 Hi = DAG.getBitcast(InVT, Hi);
3312 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3313 return DAG.getBitcast(DstVT, Res);
3314 }
3315
3316 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3317 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3318 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3319 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3320
3321 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3322 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3323 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3324}
3325
3328 auto &DAG = DCI.DAG;
3329
3330 SDValue In = N->getOperand(0);
3331 EVT InVT = In.getValueType();
3332 if (!InVT.isSimple())
3333 return SDValue();
3334
3335 EVT OutVT = N->getValueType(0);
3336 if (!OutVT.isVector())
3337 return SDValue();
3338
3339 EVT OutSVT = OutVT.getVectorElementType();
3340 EVT InSVT = InVT.getVectorElementType();
3341 // Currently only cover truncate to v16i8 or v8i16.
3342 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3343 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3344 return SDValue();
3345
3346 SDLoc DL(N);
3348 OutVT.getScalarSizeInBits());
3349 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3350 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3351}
3352
3355 using namespace llvm::SDPatternMatch;
3356 auto &DAG = DCI.DAG;
3357 SDLoc DL(N);
3358 SDValue Src = N->getOperand(0);
3359 EVT VT = N->getValueType(0);
3360 EVT SrcVT = Src.getValueType();
3361
3362 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3363 SrcVT.isFixedLengthVectorOf(MVT::i1)))
3364 return SDValue();
3365
3366 unsigned NumElts = SrcVT.getVectorNumElements();
3367 EVT Width = MVT::getIntegerVT(128 / NumElts);
3368
3369 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3370 // ==> bitmask
3371 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3372 return DAG.getZExtOrTrunc(
3373 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3374 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3375 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3376 SrcVT.changeVectorElementType(
3377 *DAG.getContext(), Width))}),
3378 DL, VT);
3379 }
3380
3381 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3382 if (NumElts == 32 || NumElts == 64) {
3383 SDValue Concat, SetCCVector;
3384 ISD::CondCode SetCond;
3385
3386 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3387 m_CondCode(SetCond)))))
3388 return SDValue();
3389 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3390 return SDValue();
3391
3392 // Reconstruct the wide bitmask from each CONCAT_VECTORS operand.
3393 // Derive the per-chunk mask/integer types from the actual operand type
3394 // instead of hardcoding v16i1 / i16 for every chunk.
3395 EVT ConcatOperandVT = Concat.getOperand(0).getValueType();
3396 unsigned ConcatOperandNumElts = ConcatOperandVT.getVectorNumElements();
3397
3398 EVT ConcatOperandMaskVT =
3399 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
3400 ElementCount::getFixed(ConcatOperandNumElts));
3401 EVT ConcatOperandBitmaskVT =
3402 EVT::getIntegerVT(*DAG.getContext(), ConcatOperandNumElts);
3403 EVT ReturnVT = N->getValueType(0);
3404 SDValue ReconstructedBitmask = DAG.getConstant(0, DL, ReturnVT);
3405 // Example:
3406 // v32i16 = concat(v8i16, v8i16, v8i16, v8i16)
3407 // -> v8i1 + v8i1 + v8i1 + v8i1
3408 // -> i8 + i8 + i8 + i8
3409 // -> reconstructed i32 bitmask
3410 for (size_t I = 0; I < Concat->ops().size(); ++I) {
3411 SDValue ConcatOperand = Concat.getOperand(I);
3412 assert(ConcatOperand.getValueType() == ConcatOperandVT &&
3413 "concat_vectors operands must have the same type");
3414
3415 SDValue SetCCVectorOperand =
3416 extractSubVector(SetCCVector, I * ConcatOperandNumElts, DAG, DL, 128);
3417 if (!SetCCVectorOperand ||
3418 SetCCVectorOperand.getValueType() != ConcatOperandVT)
3419 return SDValue();
3420
3421 // Build the per-chunk mask using the correct chunk type:
3422 // v16i8 -> v16i1 -> i16
3423 // v8i16 -> v8i1 -> i8
3424 // v4i32 -> v4i1 -> i4
3425 // v2i64 -> v2i1 -> i2
3426 SDValue ConcatOperandMask = DAG.getSetCC(
3427 DL, ConcatOperandMaskVT, ConcatOperand, SetCCVectorOperand, SetCond);
3428 SDValue ConcatOperandBitmask =
3429 DAG.getBitcast(ConcatOperandBitmaskVT, ConcatOperandMask);
3430 SDValue ExtendedConcatOperandBitmask =
3431 DAG.getZExtOrTrunc(ConcatOperandBitmask, DL, ReturnVT);
3432
3433 // Shift the previously reconstructed bits to make room for this chunk.
3434 if (I != 0) {
3435 ReconstructedBitmask = DAG.getNode(
3436 ISD::SHL, DL, ReturnVT, ReconstructedBitmask,
3437 DAG.getShiftAmountConstant(ConcatOperandNumElts, ReturnVT, DL));
3438 }
3439
3440 // Merge disjoint partial bitmasks with OR.
3441 ReconstructedBitmask =
3442 DAG.getNode(ISD::OR, DL, ReturnVT, ReconstructedBitmask,
3443 ExtendedConcatOperandBitmask);
3444 }
3445
3446 return ReconstructedBitmask;
3447 }
3448
3449 return SDValue();
3450}
3451
3453 // bitmask (setcc <X>, 0, setlt) => bitmask X
3454 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3455 using namespace llvm::SDPatternMatch;
3456
3457 if (N->getConstantOperandVal(0) != Intrinsic::wasm_bitmask)
3458 return SDValue();
3459
3460 SDValue LHS;
3461 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3463 return SDValue();
3464
3465 SDLoc DL(N);
3466 return DAG.getNode(
3467 ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
3468 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), LHS});
3469}
3470
3472 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3473 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3474 // any_true (setcc <X>, 0, ne) => (any_true X)
3475 // all_true (setcc <X>, 0, ne) => (all_true X)
3476 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3477 using namespace llvm::SDPatternMatch;
3478
3479 SDValue LHS;
3480 if (N->getNumOperands() < 2 ||
3481 !sd_match(N->getOperand(1),
3483 return SDValue();
3484 EVT LT = LHS.getValueType();
3485 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3486 return SDValue();
3487
3488 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3489 ISD::CondCode SetType,
3490 Intrinsic::WASMIntrinsics InPost) {
3491 if (N->getConstantOperandVal(0) != InPre)
3492 return SDValue();
3493
3494 SDValue LHS;
3495 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3496 m_SpecificCondCode(SetType))))
3497 return SDValue();
3498
3499 SDLoc DL(N);
3500 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3501 {DAG.getConstant(InPost, DL, MVT::i32), LHS});
3502 if (SetType == ISD::SETEQ)
3503 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3504 DAG.getConstant(1, DL, MVT::i32));
3505 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3506 };
3507
3508 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3509 Intrinsic::wasm_alltrue))
3510 return AnyTrueEQ;
3511 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3512 Intrinsic::wasm_anytrue))
3513 return AllTrueEQ;
3514 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3515 Intrinsic::wasm_anytrue))
3516 return AnyTrueNE;
3517 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3518 Intrinsic::wasm_alltrue))
3519 return AllTrueNE;
3520
3521 return SDValue();
3522}
3523
3529
3531 unsigned NumElts,
3532 const MaskReduceInfo &Info,
3533 SelectionDAG &DAG) {
3534 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3535 MVT::getIntegerVT(128 / NumElts));
3536 assert(VecVT.getSizeInBits() == 128 &&
3537 "mask reduction should be widened to a 128-bit vector");
3538
3539 SDLoc DL(N);
3540 SDValue Mask = N->getOperand(0)->getOperand(0);
3541 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3542 {DAG.getConstant(Info.IID, DL, MVT::i32),
3543 DAG.getSExtOrTrunc(Mask, DL, VecVT)});
3544 if (Info.Invert)
3545 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3546 DAG.getConstant(1, DL, MVT::i32));
3547 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3548}
3549
3551 unsigned NumElts,
3552 const MaskReduceInfo &Info,
3553 SelectionDAG &DAG) {
3554 assert((NumElts == 32 || NumElts == 64) &&
3555 "combineWideMaskReduction is only for wide masks");
3556 assert(MaskVT.isFixedLengthVector() &&
3557 MaskVT.getVectorElementType() == MVT::i1);
3558 SDLoc DL(N);
3559 unsigned ChunkElts = 16;
3560 EVT ChunkMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
3561 ElementCount::getFixed(ChunkElts));
3562 EVT LegalVecVT = ChunkMaskVT.changeVectorElementType(
3563 *DAG.getContext(), MVT::getIntegerVT(128 / ChunkElts));
3564
3565 SmallVector<SDValue, 4> ChunkResults;
3566 // Split the wide mask into v16i1 chunks and reduce each chunk separately.
3567 // For example:
3568 // v32i1: [0..15] [16..31]
3569 // | |
3570 // v v
3571 // chunk0 chunk1
3572 //
3573 // v64i1: [0..15] [16..31] [32..47] [48..63]
3574 // | | | |
3575 // v v v v
3576 // chunk0 chunk1 chunk2 chunk3
3577 //
3578 // each chunk:
3579 // v16i1 -> v16i8 -> wasm_anytrue/alltrue -> i32 0/1
3580 for (unsigned I = 0; I < NumElts; I += ChunkElts) {
3581 SDValue ChunkMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ChunkMaskVT,
3582 Mask, DAG.getVectorIdxConstant(I, DL));
3583 SDValue LegalMask = DAG.getSExtOrTrunc(ChunkMask, DL, LegalVecVT);
3584 SDValue Reduced =
3585 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3586 DAG.getConstant(Info.IID, DL, MVT::i32), LegalMask);
3587 ChunkResults.push_back(Reduced);
3588 }
3589
3590 SDValue Acc = ChunkResults[0];
3591 for (unsigned I = 1; I < ChunkResults.size(); ++I)
3592 Acc =
3593 DAG.getNode(Info.WideCombineOpcode, DL, MVT::i32, Acc, ChunkResults[I]);
3594
3595 if (Info.Invert)
3596 Acc = DAG.getNode(ISD::XOR, DL, MVT::i32, Acc,
3597 DAG.getConstant(1, DL, MVT::i32));
3598
3599 return DAG.getZExtOrTrunc(Acc, DL, N->getValueType(0));
3600}
3601
3602static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
3603 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3604 if (!C)
3605 return std::nullopt;
3606
3607 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3608
3609 // setcc (bitcast mask), 0, ne -> any_true(mask)
3610 if (C->isZero() && CC == ISD::SETNE)
3611 return MaskReduceInfo{Intrinsic::wasm_anytrue, ISD::OR, false};
3612
3613 // setcc (bitcast mask), 0, eq -> !any_true(mask)
3614 if (C->isZero() && CC == ISD::SETEQ)
3615 return MaskReduceInfo{Intrinsic::wasm_anytrue, ISD::OR, true};
3616
3617 // setcc (bitcast mask), -1, eq -> all_true(mask)
3618 if (C->isAllOnes() && CC == ISD::SETEQ)
3619 return MaskReduceInfo{Intrinsic::wasm_alltrue, ISD::AND, false};
3620
3621 // setcc (bitcast mask), -1, ne -> !all_true(mask)
3622 if (C->isAllOnes() && CC == ISD::SETNE)
3623 return MaskReduceInfo{Intrinsic::wasm_alltrue, ISD::AND, true};
3624
3625 return std::nullopt;
3626}
3627
3628/// Try to convert a i128 comparison to a v16i8 comparison before type
3629/// legalization splits it up into chunks
3630static SDValue
3632 const WebAssemblySubtarget *Subtarget) {
3633
3634 SDLoc DL(N);
3635 SDValue X = N->getOperand(0);
3636 SDValue Y = N->getOperand(1);
3637 EVT VT = N->getValueType(0);
3638 EVT OpVT = X.getValueType();
3639
3640 SelectionDAG &DAG = DCI.DAG;
3642 Attribute::NoImplicitFloat))
3643 return SDValue();
3644
3645 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3646 // We're looking for an oversized integer equality comparison with SIMD
3647 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3648 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3649 return SDValue();
3650
3651 // Don't perform this combine if constructing the vector will be expensive.
3652 auto IsVectorBitCastCheap = [](SDValue X) {
3654 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3655 };
3656
3657 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3658 return SDValue();
3659
3660 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3661 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3662 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3663
3664 SDValue Intr =
3665 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3666 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3667 : Intrinsic::wasm_anytrue,
3668 DL, MVT::i32),
3669 Cmp});
3670
3671 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3672 ISD::SETNE);
3673}
3674
3677 const WebAssemblySubtarget *Subtarget) {
3678 if (!DCI.isBeforeLegalize())
3679 return SDValue();
3680
3681 EVT VT = N->getValueType(0);
3682 if (!VT.isScalarInteger())
3683 return SDValue();
3684
3685 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3686 return V;
3687
3688 SDValue LHS = N->getOperand(0);
3689 if (LHS->getOpcode() != ISD::BITCAST)
3690 return SDValue();
3691
3692 EVT FromVT = LHS->getOperand(0).getValueType();
3693 if (!FromVT.isFixedLengthVectorOf(MVT::i1))
3694 return SDValue();
3695
3696 unsigned NumElts = FromVT.getVectorNumElements();
3697 auto Info = classifyMaskReduction(N);
3698 if (!Info)
3699 return SDValue();
3700
3701 auto &DAG = DCI.DAG;
3702 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16)
3703 return combineSmallMaskReduction(N, FromVT, NumElts, *Info, DAG);
3704
3705 if (NumElts == 32 || NumElts == 64)
3706 return combineWideMaskReduction(N, LHS.getOperand(0), FromVT, NumElts,
3707 *Info, DAG);
3708
3709 return SDValue();
3710}
3711
3713 EVT VT = N->getValueType(0);
3714 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3715 return SDValue();
3716
3717 // Mul with extending inputs.
3718 SDValue LHS = N->getOperand(0);
3719 SDValue RHS = N->getOperand(1);
3720 if (LHS.getOpcode() != RHS.getOpcode())
3721 return SDValue();
3722
3723 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3724 LHS.getOpcode() != ISD::ZERO_EXTEND)
3725 return SDValue();
3726
3727 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3728 return SDValue();
3729
3730 EVT FromVT = LHS->getOperand(0).getValueType();
3731 EVT EltTy = FromVT.getVectorElementType();
3732 if (EltTy != MVT::i8)
3733 return SDValue();
3734
3735 // For an input DAG that looks like this
3736 // %a = input_type
3737 // %b = input_type
3738 // %lhs = extend %a to output_type
3739 // %rhs = extend %b to output_type
3740 // %mul = mul %lhs, %rhs
3741
3742 // input_type | output_type | instructions
3743 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3744 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3745 // | | %low_low = i32x4.ext_low_i16x8_ %low
3746 // | | %low_high = i32x4.ext_high_i16x8_ %low
3747 // | | %high_low = i32x4.ext_low_i16x8_ %high
3748 // | | %high_high = i32x4.ext_high_i16x8_ %high
3749 // | | %res = concat_vector(...)
3750 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3751 // | | %low_low = i32x4.ext_low_i16x8_ %low
3752 // | | %low_high = i32x4.ext_high_i16x8_ %low
3753 // | | %res = concat_vector(%low_low, %low_high)
3754
3755 SDLoc DL(N);
3756 unsigned NumElts = VT.getVectorNumElements();
3757 SDValue ExtendInLHS = LHS->getOperand(0);
3758 SDValue ExtendInRHS = RHS->getOperand(0);
3759 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3760 unsigned ExtendLowOpc =
3761 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3762 unsigned ExtendHighOpc =
3763 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3764
3765 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3766 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3767 };
3768 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3769 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3770 };
3771
3772 if (NumElts == 16) {
3773 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3774 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3775 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3776 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3777 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3778 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3779 SDValue SubVectors[] = {
3780 GetExtendLow(MVT::v4i32, MulLow),
3781 GetExtendHigh(MVT::v4i32, MulLow),
3782 GetExtendLow(MVT::v4i32, MulHigh),
3783 GetExtendHigh(MVT::v4i32, MulHigh),
3784 };
3785 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3786 } else {
3787 assert(NumElts == 8);
3788 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3789 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3790 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3791 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3792 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3793 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3794 }
3795 return SDValue();
3796}
3797
3800 assert(N->getOpcode() == ISD::MUL);
3801 EVT VT = N->getValueType(0);
3802 if (!VT.isVector())
3803 return SDValue();
3804
3805 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3806 return Res;
3807
3808 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3809 // extend them to v8i16.
3810 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3811 return SDValue();
3812
3813 SDLoc DL(N);
3814 SelectionDAG &DAG = DCI.DAG;
3815 SDValue LHS = N->getOperand(0);
3816 SDValue RHS = N->getOperand(1);
3817 EVT MulVT = MVT::v8i16;
3818
3819 if (VT == MVT::v8i8) {
3820 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3821 DAG.getUNDEF(MVT::v8i8));
3822 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3823 DAG.getUNDEF(MVT::v8i8));
3824 SDValue LowLHS =
3825 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3826 SDValue LowRHS =
3827 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3828 SDValue MulLow = DAG.getBitcast(
3829 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3830 // Take the low byte of each lane.
3831 SDValue Shuffle = DAG.getVectorShuffle(
3832 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3833 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3834 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3835 } else {
3836 assert(VT == MVT::v16i8 && "Expected v16i8");
3837 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3838 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3839 SDValue HighLHS =
3840 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3841 SDValue HighRHS =
3842 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3843
3844 SDValue MulLow =
3845 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3846 SDValue MulHigh =
3847 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3848
3849 // Take the low byte of each lane.
3850 return DAG.getVectorShuffle(
3851 VT, DL, MulLow, MulHigh,
3852 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3853 }
3854}
3855
3856SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3857 SelectionDAG &DAG) {
3858 SDLoc DL(In);
3859 LLVMContext &Ctx = *DAG.getContext();
3860 EVT InVT = In.getValueType();
3861 unsigned NumElems = InVT.getVectorNumElements() * 2;
3862 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3863 SDValue Concat =
3864 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3865 if (NumElems < RequiredNumElems) {
3866 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3867 }
3868 return Concat;
3869}
3870
3872 EVT OutVT = N->getValueType(0);
3873 if (!OutVT.isVector())
3874 return SDValue();
3875
3876 EVT OutElTy = OutVT.getVectorElementType();
3877 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3878 return SDValue();
3879
3880 unsigned NumElems = OutVT.getVectorNumElements();
3881 if (!isPowerOf2_32(NumElems))
3882 return SDValue();
3883
3884 EVT FPVT = N->getOperand(0)->getValueType(0);
3885 if (FPVT.getVectorElementType() != MVT::f32)
3886 return SDValue();
3887
3888 SDLoc DL(N);
3889
3890 // First, convert to i32.
3891 LLVMContext &Ctx = *DAG.getContext();
3892 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3893 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3895 OutVT.getScalarSizeInBits());
3896 // Mask out the top MSBs.
3897 SDValue Masked =
3898 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3899
3900 if (OutVT.getSizeInBits() < 128) {
3901 // Create a wide enough vector that we can use narrow.
3902 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3903 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3904 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3905 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3906 return DAG.getBitcast(
3907 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3908 } else {
3909 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3910 }
3911 return SDValue();
3912}
3913
3914// Wide vector shift operations such as v8i32 with sign-extended
3915// operands cause Type Legalizer crashes because the target-specific
3916// extension nodes cannot be directly mapped to the 256-bit size.
3917//
3918// To resolve the crash and optimize performance, we intercept the
3919// illegal v8i32 shift in DAGCombine. We convert the shift amounts
3920// into multipliers and manually split the vector into two v4i32 halves.
3921//
3922// Before: t1: v8i32 = shl (sign_extend v8i16), const_vec
3923// After : t2: v4i32 = mul (ext_low_s v8i16), (ext_low_s narrow_vec)
3924// t3: v4i32 = mul (ext_high_s v8i16), (ext_high_s narrow_vec)
3925// t4: v8i32 = concat_vectors t2, t3
3928 SelectionDAG &DAG = DCI.DAG;
3929 assert(N->getOpcode() == ISD::SHL);
3930 EVT VT = N->getValueType(0);
3931 if (VT != MVT::v8i32)
3932 return SDValue();
3933
3934 SDValue LHS = N->getOperand(0);
3935 SDValue RHS = N->getOperand(1);
3936 unsigned ExtOpc = LHS.getOpcode();
3937 if (ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND)
3938 return SDValue();
3939
3940 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3941 return SDValue();
3942
3943 SDLoc DL(N);
3944 SDValue ExtendIn = LHS.getOperand(0);
3945 EVT FromVT = ExtendIn.getValueType();
3946 if (FromVT != MVT::v8i16)
3947 return SDValue();
3948
3949 unsigned NumElts = VT.getVectorNumElements();
3950 unsigned BitWidth = FromVT.getScalarSizeInBits();
3951 bool IsSigned = (ExtOpc == ISD::SIGN_EXTEND);
3952 unsigned MaxValidShift = IsSigned ? (BitWidth - 1) : BitWidth;
3953 SmallVector<SDValue, 16> MulConsts;
3954 for (unsigned I = 0; I < NumElts; ++I) {
3955 auto *C = dyn_cast<ConstantSDNode>(RHS.getOperand(I));
3956 if (!C)
3957 return SDValue();
3958
3959 const APInt &ShiftAmt = C->getAPIntValue();
3960 if (ShiftAmt.uge(MaxValidShift))
3961 return SDValue();
3962
3963 APInt MulAmt = APInt::getOneBitSet(BitWidth, ShiftAmt.getZExtValue());
3964 MulConsts.push_back(DAG.getConstant(MulAmt, DL, FromVT.getScalarType(),
3965 /*isTarget=*/false, /*isOpaque=*/true));
3966 }
3967
3968 SDValue NarrowConst = DAG.getBuildVector(FromVT, DL, MulConsts);
3969 unsigned ExtLowOpc =
3970 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3971 unsigned ExtHighOpc =
3972 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3973
3974 EVT HalfVT = MVT::v4i32;
3975 SDValue LHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, ExtendIn);
3976 SDValue LHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, ExtendIn);
3977 SDValue RHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, NarrowConst);
3978 SDValue RHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, NarrowConst);
3979 SDValue MulLo = DAG.getNode(ISD::MUL, DL, HalfVT, LHSLo, RHSLo);
3980 SDValue MulHi = DAG.getNode(ISD::MUL, DL, HalfVT, LHSHi, RHSHi);
3981 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MulLo, MulHi);
3982}
3983
3984SDValue
3985WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3986 DAGCombinerInfo &DCI) const {
3987 switch (N->getOpcode()) {
3988 default:
3989 return SDValue();
3990 case ISD::BITCAST:
3991 return performBitcastCombine(N, DCI);
3992 case ISD::SETCC:
3993 return performSETCCCombine(N, DCI, Subtarget);
3995 return performVECTOR_SHUFFLECombine(N, DCI);
3996 case ISD::SIGN_EXTEND:
3997 case ISD::ZERO_EXTEND:
3998 return performVectorExtendCombine(N, DCI);
3999 case ISD::UINT_TO_FP:
4000 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
4001 return ExtCombine;
4002 return performVectorNonNegToFPCombine(N, DCI);
4003 case ISD::SINT_TO_FP:
4004 return performVectorExtendToFPCombine(N, DCI);
4007 case ISD::FP_ROUND:
4009 return performVectorTruncZeroCombine(N, DCI);
4010 case ISD::FP_TO_SINT:
4011 case ISD::FP_TO_UINT:
4012 return performConvertFPCombine(N, DCI.DAG);
4013 case ISD::TRUNCATE:
4014 return performTruncateCombine(N, DCI);
4016 if (SDValue V = performBitmaskCombine(N, DCI.DAG))
4017 return V;
4018 return performAnyAllCombine(N, DCI.DAG);
4019 }
4020 case ISD::MUL:
4021 return performMulCombine(N, DCI);
4022 case ISD::SHL:
4023 return performShiftCombine(N, DCI);
4024 }
4025}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool callingConvSupported(CallingConv::ID CallConv)
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static std::optional< MaskReduceInfo > classifyMaskReduction(SDNode *N)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static SDValue combineSmallMaskReduction(SDNode *N, EVT FromVT, unsigned NumElts, const MaskReduceInfo &Info, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue performShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineWideMaskReduction(SDNode *N, SDValue Mask, EVT MaskVT, unsigned NumElts, const MaskReduceInfo &Info, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:354
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:273
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:287
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:716
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:949
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
auto m_Value()
Match an arbitrary value and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
MachineSDNode * getTLSBase(SelectionDAG &DAG, const SDLoc &DL, const WebAssemblySubtarget *Subtarget, const SDValue Chain=SDValue())
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
SDValue peekThroughFreeze(SDValue V)
Return the non-frozen source operand of V if it exists.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2087
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:266
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:475
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isFixedLengthVectorOf(EVT EltVT) const
Return true if this is a fixed length vector with matching element type.
Definition ValueTypes.h:205
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:315
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.