LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
162 setTruncStoreAction(T, MVT::f16, Expand);
163 }
164
165 // Expand unavailable integer operations.
166 for (auto Op :
170 for (auto T : {MVT::i32, MVT::i64})
172 if (Subtarget->hasSIMD128())
173 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175 }
176
177 if (Subtarget->hasWideArithmetic()) {
183 }
184
185 if (Subtarget->hasNontrappingFPToInt())
187 for (auto T : {MVT::i32, MVT::i64})
189
190 if (Subtarget->hasRelaxedSIMD()) {
193 {MVT::v4f32, MVT::v2f64}, Custom);
194 }
195 // SIMD-specific configuration
196 if (Subtarget->hasSIMD128()) {
197
199
200 // Combine wide-vector muls, with extend inputs, to extmul_half.
203
204 // Combine vector mask reductions into alltrue/anytrue
206
207 // Convert vector to integer bitcasts to bitmask
209
210 // Hoist bitcasts out of shuffles
212
213 // Combine extends of extract_subvectors into widening ops
215
216 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
217 // conversions ops
220
221 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
222 // into conversion ops
226
228
229 // Support saturating add/sub for i8x16 and i16x8
231 for (auto T : {MVT::v16i8, MVT::v8i16})
233
234 // Support integer abs
235 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
237
238 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
239 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
240 MVT::v2f64})
242
243 if (Subtarget->hasFP16())
245
246 // We have custom shuffle lowering to expose the shuffle mask
247 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
248 MVT::v2f64})
250
251 if (Subtarget->hasFP16())
253
254 // Support splatting
255 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
256 MVT::v2f64})
258
259 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
260
261 // Custom lowering since wasm shifts must have a scalar shift amount
262 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
263 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
265
266 // Custom lower lane accesses to expand out variable indices
268 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
269 MVT::v2f64})
271
272 // There is no i8x16.mul instruction
273 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
274
275 // Expand integer operations supported for scalars but not SIMD
276 for (auto Op :
278 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
280
281 // But we do have integer min and max operations
282 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
283 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
285
286 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
287 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
288 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
289 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
290
291 // Custom lower bit counting operations for other types to scalarize them.
292 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
293 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
295
296 // Expand float operations supported for scalars but not SIMD
299 for (auto T : {MVT::v4f32, MVT::v2f64})
301
302 // Unsigned comparison operations are unavailable for i64x2 vectors.
304 setCondCodeAction(CC, MVT::v2i64, Custom);
305
306 // 64x2 conversions are not in the spec
307 for (auto Op :
309 for (auto T : {MVT::v2i64, MVT::v2f64})
311
312 // But saturating fp_to_int converstions are
314 setOperationAction(Op, MVT::v4i32, Custom);
315 if (Subtarget->hasFP16()) {
316 setOperationAction(Op, MVT::v8i16, Custom);
317 }
318 }
319
320 // Support vector extending
325 }
326
327 if (Subtarget->hasFP16()) {
328 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
329 }
330
331 if (Subtarget->hasRelaxedSIMD()) {
334 }
335
336 // Partial MLA reductions.
338 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
339 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
340 }
341 }
342
343 // As a special case, these operators use the type to mean the type to
344 // sign-extend from.
346 if (!Subtarget->hasSignExt()) {
347 // Sign extends are legal only when extending a vector extract
348 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
349 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
351 }
354
355 // Dynamic stack allocation: use the default expansion.
359
363
364 // Expand these forms; we pattern-match the forms that we can handle in isel.
365 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
366 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
368
369 if (Subtarget->hasReferenceTypes())
370 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
371 for (auto T : {MVT::externref, MVT::funcref})
373
374 // There is no vector conditional select instruction
375 for (auto T :
376 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
378
379 // We have custom switch handling.
381
382 // WebAssembly doesn't have:
383 // - Floating-point extending loads.
384 // - Floating-point truncating stores.
385 // - i1 extending loads.
386 // - truncating SIMD stores and most extending loads
387 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
388 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
389 for (auto T : MVT::integer_valuetypes())
390 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
391 setLoadExtAction(Ext, T, MVT::i1, Promote);
392 if (Subtarget->hasSIMD128()) {
393 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
394 MVT::v2f64}) {
395 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
396 if (MVT(T) != MemT) {
398 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
399 setLoadExtAction(Ext, T, MemT, Expand);
400 }
401 }
402 }
403 // But some vector extending loads are legal
404 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
405 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
406 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
407 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
408 }
409 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
410 }
411
412 // Don't do anything clever with build_pairs
414
415 // Trap lowers to wasm unreachable
416 setOperationAction(ISD::TRAP, MVT::Other, Legal);
418
419 // Exception handling intrinsics
423
425
426 // Always convert switches to br_tables unless there is only one case, which
427 // is equivalent to a simple branch. This reduces code size for wasm, and we
428 // defer possible jump table optimizations to the VM.
430}
431
440
449
451WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
452 const AtomicRMWInst *AI) const {
453 // We have wasm instructions for these
454 switch (AI->getOperation()) {
462 default:
463 break;
464 }
466}
467
468bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
469 // Implementation copied from X86TargetLowering.
470 unsigned Opc = VecOp.getOpcode();
471
472 // Assume target opcodes can't be scalarized.
473 // TODO - do we have any exceptions?
475 return false;
476
477 // If the vector op is not supported, try to convert to scalar.
478 EVT VecVT = VecOp.getValueType();
480 return true;
481
482 // If the vector op is supported, but the scalar op is not, the transform may
483 // not be worthwhile.
484 EVT ScalarVT = VecVT.getScalarType();
485 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
486}
487
488FastISel *WebAssemblyTargetLowering::createFastISel(
489 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
490 const LibcallLoweringInfo *LibcallLowering) const {
491 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
492}
493
494MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
495 EVT VT) const {
496 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
497 if (BitWidth > 1 && BitWidth < 8)
498 BitWidth = 8;
499
500 if (BitWidth > 64) {
501 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
502 // the count to be an i32.
503 BitWidth = 32;
505 "32-bit shift counts ought to be enough for anyone");
506 }
507
510 "Unable to represent scalar shift amount type");
511 return Result;
512}
513
514// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
515// undefined result on invalid/overflow, to the WebAssembly opcode, which
516// traps on invalid/overflow.
519 const TargetInstrInfo &TII,
520 bool IsUnsigned, bool Int64,
521 bool Float64, unsigned LoweredOpcode) {
523
524 Register OutReg = MI.getOperand(0).getReg();
525 Register InReg = MI.getOperand(1).getReg();
526
527 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
528 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
529 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
530 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
531 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
532 unsigned Eqz = WebAssembly::EQZ_I32;
533 unsigned And = WebAssembly::AND_I32;
534 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
535 int64_t Substitute = IsUnsigned ? 0 : Limit;
536 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
537 auto &Context = BB->getParent()->getFunction().getContext();
538 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
539
540 const BasicBlock *LLVMBB = BB->getBasicBlock();
541 MachineFunction *F = BB->getParent();
542 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
543 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
544 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
545
547 F->insert(It, FalseMBB);
548 F->insert(It, TrueMBB);
549 F->insert(It, DoneMBB);
550
551 // Transfer the remainder of BB and its successor edges to DoneMBB.
552 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
554
555 BB->addSuccessor(TrueMBB);
556 BB->addSuccessor(FalseMBB);
557 TrueMBB->addSuccessor(DoneMBB);
558 FalseMBB->addSuccessor(DoneMBB);
559
560 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
561 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
562 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
563 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
564 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
565 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
566 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
567
568 MI.eraseFromParent();
569 // For signed numbers, we can do a single comparison to determine whether
570 // fabs(x) is within range.
571 if (IsUnsigned) {
572 Tmp0 = InReg;
573 } else {
574 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
575 }
576 BuildMI(BB, DL, TII.get(FConst), Tmp1)
577 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
578 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
579
580 // For unsigned numbers, we have to do a separate comparison with zero.
581 if (IsUnsigned) {
582 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
583 Register SecondCmpReg =
584 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
585 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
586 BuildMI(BB, DL, TII.get(FConst), Tmp1)
587 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
588 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
589 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
590 CmpReg = AndReg;
591 }
592
593 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
594
595 // Create the CFG diamond to select between doing the conversion or using
596 // the substitute value.
597 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
598 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
599 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
600 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
601 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
602 .addReg(FalseReg)
603 .addMBB(FalseMBB)
604 .addReg(TrueReg)
605 .addMBB(TrueMBB);
606
607 return DoneMBB;
608}
609
610// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
611// instuction to handle the zero-length case.
614 const TargetInstrInfo &TII, bool Int64) {
616
617 MachineOperand DstMem = MI.getOperand(0);
618 MachineOperand SrcMem = MI.getOperand(1);
619 MachineOperand Dst = MI.getOperand(2);
620 MachineOperand Src = MI.getOperand(3);
621 MachineOperand Len = MI.getOperand(4);
622
623 // If the length is a constant, we don't actually need the check.
624 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
625 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
626 Def->getOpcode() == WebAssembly::CONST_I64) {
627 if (Def->getOperand(1).getImm() == 0) {
628 // A zero-length memcpy is a no-op.
629 MI.eraseFromParent();
630 return BB;
631 }
632 // A non-zero-length memcpy doesn't need a zero check.
633 unsigned MemoryCopy =
634 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
635 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
636 .add(DstMem)
637 .add(SrcMem)
638 .add(Dst)
639 .add(Src)
640 .add(Len);
641 MI.eraseFromParent();
642 return BB;
643 }
644 }
645
646 // We're going to add an extra use to `Len` to test if it's zero; that
647 // use shouldn't be a kill, even if the original use is.
648 MachineOperand NoKillLen = Len;
649 NoKillLen.setIsKill(false);
650
651 // Decide on which `MachineInstr` opcode we're going to use.
652 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
653 unsigned MemoryCopy =
654 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
655
656 // Create two new basic blocks; one for the new `memory.fill` that we can
657 // branch over, and one for the rest of the instructions after the original
658 // `memory.fill`.
659 const BasicBlock *LLVMBB = BB->getBasicBlock();
660 MachineFunction *F = BB->getParent();
661 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
662 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
663
665 F->insert(It, TrueMBB);
666 F->insert(It, DoneMBB);
667
668 // Transfer the remainder of BB and its successor edges to DoneMBB.
669 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
671
672 // Connect the CFG edges.
673 BB->addSuccessor(TrueMBB);
674 BB->addSuccessor(DoneMBB);
675 TrueMBB->addSuccessor(DoneMBB);
676
677 // Create a virtual register for the `Eqz` result.
678 unsigned EqzReg;
679 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
680
681 // Erase the original `memory.copy`.
682 MI.eraseFromParent();
683
684 // Test if `Len` is zero.
685 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
686
687 // Insert a new `memory.copy`.
688 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
689 .add(DstMem)
690 .add(SrcMem)
691 .add(Dst)
692 .add(Src)
693 .add(Len);
694
695 // Create the CFG triangle.
696 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
697 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
698
699 return DoneMBB;
700}
701
702// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
703// instuction to handle the zero-length case.
706 const TargetInstrInfo &TII, bool Int64) {
708
709 MachineOperand Mem = MI.getOperand(0);
710 MachineOperand Dst = MI.getOperand(1);
711 MachineOperand Val = MI.getOperand(2);
712 MachineOperand Len = MI.getOperand(3);
713
714 // If the length is a constant, we don't actually need the check.
715 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
716 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
717 Def->getOpcode() == WebAssembly::CONST_I64) {
718 if (Def->getOperand(1).getImm() == 0) {
719 // A zero-length memset is a no-op.
720 MI.eraseFromParent();
721 return BB;
722 }
723 // A non-zero-length memset doesn't need a zero check.
724 unsigned MemoryFill =
725 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
726 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
727 .add(Mem)
728 .add(Dst)
729 .add(Val)
730 .add(Len);
731 MI.eraseFromParent();
732 return BB;
733 }
734 }
735
736 // We're going to add an extra use to `Len` to test if it's zero; that
737 // use shouldn't be a kill, even if the original use is.
738 MachineOperand NoKillLen = Len;
739 NoKillLen.setIsKill(false);
740
741 // Decide on which `MachineInstr` opcode we're going to use.
742 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
743 unsigned MemoryFill =
744 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
745
746 // Create two new basic blocks; one for the new `memory.fill` that we can
747 // branch over, and one for the rest of the instructions after the original
748 // `memory.fill`.
749 const BasicBlock *LLVMBB = BB->getBasicBlock();
750 MachineFunction *F = BB->getParent();
751 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
752 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
753
755 F->insert(It, TrueMBB);
756 F->insert(It, DoneMBB);
757
758 // Transfer the remainder of BB and its successor edges to DoneMBB.
759 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
761
762 // Connect the CFG edges.
763 BB->addSuccessor(TrueMBB);
764 BB->addSuccessor(DoneMBB);
765 TrueMBB->addSuccessor(DoneMBB);
766
767 // Create a virtual register for the `Eqz` result.
768 unsigned EqzReg;
769 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
770
771 // Erase the original `memory.fill`.
772 MI.eraseFromParent();
773
774 // Test if `Len` is zero.
775 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
776
777 // Insert a new `memory.copy`.
778 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
779
780 // Create the CFG triangle.
781 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
782 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
783
784 return DoneMBB;
785}
786
787static MachineBasicBlock *
789 const WebAssemblySubtarget *Subtarget,
790 const TargetInstrInfo &TII) {
791 MachineInstr &CallParams = *CallResults.getPrevNode();
792 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
793 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
794 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
795
796 bool IsIndirect =
797 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
798 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
799
800 bool IsFuncrefCall = false;
801 if (IsIndirect && CallParams.getOperand(0).isReg()) {
802 Register Reg = CallParams.getOperand(0).getReg();
803 const MachineFunction *MF = BB->getParent();
804 const MachineRegisterInfo &MRI = MF->getRegInfo();
805 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
806 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
807 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
808 }
809
810 unsigned CallOp;
811 if (IsIndirect && IsRetCall) {
812 CallOp = WebAssembly::RET_CALL_INDIRECT;
813 } else if (IsIndirect) {
814 CallOp = WebAssembly::CALL_INDIRECT;
815 } else if (IsRetCall) {
816 CallOp = WebAssembly::RET_CALL;
817 } else {
818 CallOp = WebAssembly::CALL;
819 }
820
821 MachineFunction &MF = *BB->getParent();
822 const MCInstrDesc &MCID = TII.get(CallOp);
823 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
824
825 // Move the function pointer to the end of the arguments for indirect calls
826 if (IsIndirect) {
827 auto FnPtr = CallParams.getOperand(0);
828 CallParams.removeOperand(0);
829
830 // For funcrefs, call_indirect is done through __funcref_call_table and the
831 // funcref is always installed in slot 0 of the table, therefore instead of
832 // having the function pointer added at the end of the params list, a zero
833 // (the index in
834 // __funcref_call_table is added).
835 if (IsFuncrefCall) {
836 Register RegZero =
837 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
838 MachineInstrBuilder MIBC0 =
839 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
840
841 BB->insert(CallResults.getIterator(), MIBC0);
842 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
843 } else
844 CallParams.addOperand(FnPtr);
845 }
846
847 for (auto Def : CallResults.defs())
848 MIB.add(Def);
849
850 if (IsIndirect) {
851 // Placeholder for the type index.
852 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
853 MIB.addImm(0);
854 // The table into which this call_indirect indexes.
855 MCSymbolWasm *Table = IsFuncrefCall
857 MF.getContext(), Subtarget)
859 MF.getContext(), Subtarget);
860 if (Subtarget->hasCallIndirectOverlong()) {
861 MIB.addSym(Table);
862 } else {
863 // For the MVP there is at most one table whose number is 0, but we can't
864 // write a table symbol or issue relocations. Instead we just ensure the
865 // table is live and write a zero.
866 Table->setNoStrip();
867 MIB.addImm(0);
868 }
869 }
870
871 for (auto Use : CallParams.uses())
872 MIB.add(Use);
873
874 BB->insert(CallResults.getIterator(), MIB);
875 CallParams.eraseFromParent();
876 CallResults.eraseFromParent();
877
878 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
879 // table slot with ref.null upon call_indirect return.
880 //
881 // This generates the following code, which comes right after a call_indirect
882 // of a funcref:
883 //
884 // i32.const 0
885 // ref.null func
886 // table.set __funcref_call_table
887 if (IsIndirect && IsFuncrefCall) {
889 MF.getContext(), Subtarget);
890 Register RegZero =
891 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
892 MachineInstr *Const0 =
893 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
894 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
895
896 Register RegFuncref =
897 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
898 MachineInstr *RefNull =
899 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
900 BB->insertAfter(Const0->getIterator(), RefNull);
901
902 MachineInstr *TableSet =
903 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
904 .addSym(Table)
905 .addReg(RegZero)
906 .addReg(RegFuncref);
907 BB->insertAfter(RefNull->getIterator(), TableSet);
908 }
909
910 return BB;
911}
912
913MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
914 MachineInstr &MI, MachineBasicBlock *BB) const {
915 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
916 DebugLoc DL = MI.getDebugLoc();
917
918 switch (MI.getOpcode()) {
919 default:
920 llvm_unreachable("Unexpected instr type to insert");
921 case WebAssembly::FP_TO_SINT_I32_F32:
922 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
923 WebAssembly::I32_TRUNC_S_F32);
924 case WebAssembly::FP_TO_UINT_I32_F32:
925 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
926 WebAssembly::I32_TRUNC_U_F32);
927 case WebAssembly::FP_TO_SINT_I64_F32:
928 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
929 WebAssembly::I64_TRUNC_S_F32);
930 case WebAssembly::FP_TO_UINT_I64_F32:
931 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
932 WebAssembly::I64_TRUNC_U_F32);
933 case WebAssembly::FP_TO_SINT_I32_F64:
934 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
935 WebAssembly::I32_TRUNC_S_F64);
936 case WebAssembly::FP_TO_UINT_I32_F64:
937 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
938 WebAssembly::I32_TRUNC_U_F64);
939 case WebAssembly::FP_TO_SINT_I64_F64:
940 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
941 WebAssembly::I64_TRUNC_S_F64);
942 case WebAssembly::FP_TO_UINT_I64_F64:
943 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
944 WebAssembly::I64_TRUNC_U_F64);
945 case WebAssembly::MEMCPY_A32:
946 return LowerMemcpy(MI, DL, BB, TII, false);
947 case WebAssembly::MEMCPY_A64:
948 return LowerMemcpy(MI, DL, BB, TII, true);
949 case WebAssembly::MEMSET_A32:
950 return LowerMemset(MI, DL, BB, TII, false);
951 case WebAssembly::MEMSET_A64:
952 return LowerMemset(MI, DL, BB, TII, true);
953 case WebAssembly::CALL_RESULTS:
954 case WebAssembly::RET_CALL_RESULTS:
955 return LowerCallResults(MI, DL, BB, Subtarget, TII);
956 }
957}
958
959std::pair<unsigned, const TargetRegisterClass *>
960WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
961 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
962 // First, see if this is a constraint that directly corresponds to a
963 // WebAssembly register class.
964 if (Constraint.size() == 1) {
965 switch (Constraint[0]) {
966 case 'r':
967 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
968 if (Subtarget->hasSIMD128() && VT.isVector()) {
969 if (VT.getSizeInBits() == 128)
970 return std::make_pair(0U, &WebAssembly::V128RegClass);
971 }
972 if (VT.isInteger() && !VT.isVector()) {
973 if (VT.getSizeInBits() <= 32)
974 return std::make_pair(0U, &WebAssembly::I32RegClass);
975 if (VT.getSizeInBits() <= 64)
976 return std::make_pair(0U, &WebAssembly::I64RegClass);
977 }
978 if (VT.isFloatingPoint() && !VT.isVector()) {
979 switch (VT.getSizeInBits()) {
980 case 32:
981 return std::make_pair(0U, &WebAssembly::F32RegClass);
982 case 64:
983 return std::make_pair(0U, &WebAssembly::F64RegClass);
984 default:
985 break;
986 }
987 }
988 break;
989 default:
990 break;
991 }
992 }
993
995}
996
997bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
998 // Assume ctz is a relatively cheap operation.
999 return true;
1000}
1001
1002bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1003 // Assume clz is a relatively cheap operation.
1004 return true;
1005}
1006
1007bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1008 const AddrMode &AM,
1009 Type *Ty, unsigned AS,
1010 Instruction *I) const {
1011 // WebAssembly offsets are added as unsigned without wrapping. The
1012 // isLegalAddressingMode gives us no way to determine if wrapping could be
1013 // happening, so we approximate this by accepting only non-negative offsets.
1014 if (AM.BaseOffs < 0)
1015 return false;
1016
1017 // WebAssembly has no scale register operands.
1018 if (AM.Scale != 0)
1019 return false;
1020
1021 // Everything else is legal.
1022 return true;
1023}
1024
1025bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1026 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1027 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1028 // WebAssembly supports unaligned accesses, though it should be declared
1029 // with the p2align attribute on loads and stores which do so, and there
1030 // may be a performance impact. We tell LLVM they're "fast" because
1031 // for the kinds of things that LLVM uses this for (merging adjacent stores
1032 // of constants, etc.), WebAssembly implementations will either want the
1033 // unaligned access or they'll split anyway.
1034 if (Fast)
1035 *Fast = 1;
1036 return true;
1037}
1038
1039bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1040 AttributeList Attr) const {
1041 // The current thinking is that wasm engines will perform this optimization,
1042 // so we can save on code size.
1043 return true;
1044}
1045
1046bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1047 EVT ExtT = ExtVal.getValueType();
1048 SDValue N0 = ExtVal->getOperand(0);
1049 if (N0.getOpcode() == ISD::FREEZE)
1050 N0 = N0.getOperand(0);
1051 auto *Load = dyn_cast<LoadSDNode>(N0);
1052 if (!Load)
1053 return false;
1054 EVT MemT = Load->getValueType(0);
1055 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1056 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1057 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1058}
1059
1060bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1061 const GlobalAddressSDNode *GA) const {
1062 // Wasm doesn't support function addresses with offsets
1063 const GlobalValue *GV = GA->getGlobal();
1065}
1066
1067EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1068 LLVMContext &C,
1069 EVT VT) const {
1070 if (VT.isVector())
1072
1073 // So far, all branch instructions in Wasm take an I32 condition.
1074 // The default TargetLowering::getSetCCResultType returns the pointer size,
1075 // which would be useful to reduce instruction counts when testing
1076 // against 64-bit pointers/values if at some point Wasm supports that.
1077 return EVT::getIntegerVT(C, 32);
1078}
1079
1080void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1082 MachineFunction &MF, unsigned Intrinsic) const {
1084 switch (Intrinsic) {
1085 case Intrinsic::wasm_memory_atomic_notify:
1087 Info.memVT = MVT::i32;
1088 Info.ptrVal = I.getArgOperand(0);
1089 Info.offset = 0;
1090 Info.align = Align(4);
1091 // atomic.notify instruction does not really load the memory specified with
1092 // this argument, but MachineMemOperand should either be load or store, so
1093 // we set this to a load.
1094 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1095 // instructions are treated as volatiles in the backend, so we should be
1096 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1098 Infos.push_back(Info);
1099 return;
1100 case Intrinsic::wasm_memory_atomic_wait32:
1102 Info.memVT = MVT::i32;
1103 Info.ptrVal = I.getArgOperand(0);
1104 Info.offset = 0;
1105 Info.align = Align(4);
1107 Infos.push_back(Info);
1108 return;
1109 case Intrinsic::wasm_memory_atomic_wait64:
1111 Info.memVT = MVT::i64;
1112 Info.ptrVal = I.getArgOperand(0);
1113 Info.offset = 0;
1114 Info.align = Align(8);
1116 Infos.push_back(Info);
1117 return;
1118 case Intrinsic::wasm_loadf16_f32:
1120 Info.memVT = MVT::f16;
1121 Info.ptrVal = I.getArgOperand(0);
1122 Info.offset = 0;
1123 Info.align = Align(2);
1125 Infos.push_back(Info);
1126 return;
1127 case Intrinsic::wasm_storef16_f32:
1129 Info.memVT = MVT::f16;
1130 Info.ptrVal = I.getArgOperand(1);
1131 Info.offset = 0;
1132 Info.align = Align(2);
1134 Infos.push_back(Info);
1135 return;
1136 default:
1137 return;
1138 }
1139}
1140
1141void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1142 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1143 const SelectionDAG &DAG, unsigned Depth) const {
1144 switch (Op.getOpcode()) {
1145 default:
1146 break;
1148 unsigned IntNo = Op.getConstantOperandVal(0);
1149 switch (IntNo) {
1150 default:
1151 break;
1152 case Intrinsic::wasm_bitmask: {
1153 unsigned BitWidth = Known.getBitWidth();
1154 EVT VT = Op.getOperand(1).getSimpleValueType();
1155 unsigned PossibleBits = VT.getVectorNumElements();
1156 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1157 Known.Zero |= ZeroMask;
1158 break;
1159 }
1160 }
1161 break;
1162 }
1163 case WebAssemblyISD::EXTEND_LOW_U:
1164 case WebAssemblyISD::EXTEND_HIGH_U: {
1165 // We know the high half, of each destination vector element, will be zero.
1166 SDValue SrcOp = Op.getOperand(0);
1167 EVT VT = SrcOp.getSimpleValueType();
1168 unsigned BitWidth = Known.getBitWidth();
1169 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1170 assert(BitWidth >= 8 && "Unexpected width!");
1172 Known.Zero |= Mask;
1173 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1174 assert(BitWidth >= 16 && "Unexpected width!");
1176 Known.Zero |= Mask;
1177 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1178 assert(BitWidth >= 32 && "Unexpected width!");
1180 Known.Zero |= Mask;
1181 }
1182 break;
1183 }
1184 // For 128-bit addition if the upper bits are all zero then it's known that
1185 // the upper bits of the result will have all bits guaranteed zero except the
1186 // first.
1187 case WebAssemblyISD::I64_ADD128:
1188 if (Op.getResNo() == 1) {
1189 SDValue LHS_HI = Op.getOperand(1);
1190 SDValue RHS_HI = Op.getOperand(3);
1191 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1192 Known.Zero.setBitsFrom(1);
1193 }
1194 break;
1195 }
1196}
1197
1199WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1200 if (VT.isFixedLengthVector()) {
1201 MVT EltVT = VT.getVectorElementType();
1202 // We have legal vector types with these lane types, so widening the
1203 // vector would let us use some of the lanes directly without having to
1204 // extend or truncate values.
1205 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1206 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1207 return TypeWidenVector;
1208 }
1209
1211}
1212
1213bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1214 const MachineFunction &MF, EVT VT) const {
1215 if (!Subtarget->hasFP16() || !VT.isVector())
1216 return false;
1217
1218 EVT ScalarVT = VT.getScalarType();
1219 if (!ScalarVT.isSimple())
1220 return false;
1221
1222 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1223}
1224
1225bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1226 SDValue Op, const TargetLoweringOpt &TLO) const {
1227 // ISel process runs DAGCombiner after legalization; this step is called
1228 // SelectionDAG optimization phase. This post-legalization combining process
1229 // runs DAGCombiner on each node, and if there was a change to be made,
1230 // re-runs legalization again on it and its user nodes to make sure
1231 // everythiing is in a legalized state.
1232 //
1233 // The legalization calls lowering routines, and we do our custom lowering for
1234 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1235 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1236 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1237 // turns unused vector elements into undefs. But this routine does not work
1238 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1239 // combination can result in a infinite loop, in which undefs are converted to
1240 // zeros in legalization and back to undefs in combining.
1241 //
1242 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1243 // running for build_vectors.
1244 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1245 return false;
1246 return true;
1247}
1248
1249//===----------------------------------------------------------------------===//
1250// WebAssembly Lowering private implementation.
1251//===----------------------------------------------------------------------===//
1252
1253//===----------------------------------------------------------------------===//
1254// Lowering Code
1255//===----------------------------------------------------------------------===//
1256
1257static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1259 DAG.getContext()->diagnose(
1260 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1261}
1262
1263// Test whether the given calling convention is supported.
1265 // We currently support the language-independent target-independent
1266 // conventions. We don't yet have a way to annotate calls with properties like
1267 // "cold", and we don't have any call-clobbered registers, so these are mostly
1268 // all handled the same.
1269 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1270 CallConv == CallingConv::Cold ||
1271 CallConv == CallingConv::PreserveMost ||
1272 CallConv == CallingConv::PreserveAll ||
1273 CallConv == CallingConv::CXX_FAST_TLS ||
1275 CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail;
1276}
1277
1278SDValue
1279WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1280 SmallVectorImpl<SDValue> &InVals) const {
1281 SelectionDAG &DAG = CLI.DAG;
1282 SDLoc DL = CLI.DL;
1283 SDValue Chain = CLI.Chain;
1284 SDValue Callee = CLI.Callee;
1285 MachineFunction &MF = DAG.getMachineFunction();
1286 auto Layout = MF.getDataLayout();
1287
1288 CallingConv::ID CallConv = CLI.CallConv;
1289 if (!callingConvSupported(CallConv))
1290 fail(DL, DAG,
1291 "WebAssembly doesn't support language-specific or target-specific "
1292 "calling conventions yet");
1293 if (CLI.IsPatchPoint)
1294 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1295
1296 if (CLI.IsTailCall) {
1297 auto NoTail = [&](const char *Msg) {
1298 if (CLI.CB && CLI.CB->isMustTailCall())
1299 fail(DL, DAG, Msg);
1300 CLI.IsTailCall = false;
1301 };
1302
1303 if (!Subtarget->hasTailCall())
1304 NoTail("WebAssembly 'tail-call' feature not enabled");
1305
1306 // Varargs calls cannot be tail calls because the buffer is on the stack
1307 if (CLI.IsVarArg)
1308 NoTail("WebAssembly does not support varargs tail calls");
1309
1310 // Do not tail call unless caller and callee return types match
1311 const Function &F = MF.getFunction();
1312 const TargetMachine &TM = getTargetMachine();
1313 Type *RetTy = F.getReturnType();
1314 SmallVector<MVT, 4> CallerRetTys;
1315 SmallVector<MVT, 4> CalleeRetTys;
1316 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1317 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1318 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1319 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1320 CalleeRetTys.begin());
1321 if (!TypesMatch)
1322 NoTail("WebAssembly tail call requires caller and callee return types to "
1323 "match");
1324
1325 // If pointers to local stack values are passed, we cannot tail call
1326 if (CLI.CB) {
1327 for (auto &Arg : CLI.CB->args()) {
1328 Value *Val = Arg.get();
1329 // Trace the value back through pointer operations
1330 while (true) {
1331 Value *Src = Val->stripPointerCastsAndAliases();
1332 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1333 Src = GEP->getPointerOperand();
1334 if (Val == Src)
1335 break;
1336 Val = Src;
1337 }
1338 if (isa<AllocaInst>(Val)) {
1339 NoTail(
1340 "WebAssembly does not support tail calling with stack arguments");
1341 break;
1342 }
1343 }
1344 }
1345 }
1346
1347 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1348 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1349 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1350
1351 // The generic code may have added an sret argument. If we're lowering an
1352 // invoke function, the ABI requires that the function pointer be the first
1353 // argument, so we may have to swap the arguments.
1354 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1355 Outs[0].Flags.isSRet()) {
1356 std::swap(Outs[0], Outs[1]);
1357 std::swap(OutVals[0], OutVals[1]);
1358 }
1359
1360 bool HasSwiftSelfArg = false;
1361 bool HasSwiftErrorArg = false;
1362 bool HasSwiftAsyncArg = false;
1363 unsigned NumFixedArgs = 0;
1364 for (unsigned I = 0; I < Outs.size(); ++I) {
1365 const ISD::OutputArg &Out = Outs[I];
1366 SDValue &OutVal = OutVals[I];
1367 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1368 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1369 HasSwiftAsyncArg |= Out.Flags.isSwiftAsync();
1370 if (Out.Flags.isNest())
1371 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1372 if (Out.Flags.isInAlloca())
1373 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1374 if (Out.Flags.isInConsecutiveRegs())
1375 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1377 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1378 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1379 auto &MFI = MF.getFrameInfo();
1380 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1382 /*isSS=*/false);
1383 SDValue SizeNode =
1384 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1385 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1386 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1388 /*isVolatile*/ false, /*AlwaysInline=*/false,
1389 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1390 MachinePointerInfo());
1391 OutVal = FINode;
1392 }
1393 // Count the number of fixed args *after* legalization.
1394 NumFixedArgs += !Out.Flags.isVarArg();
1395 }
1396
1397 bool IsVarArg = CLI.IsVarArg;
1398 auto PtrVT = getPointerTy(Layout);
1399
1400 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1401 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1402 // arguments are also added for callee signature. They are necessary to match
1403 // callee and caller signature for indirect call.
1404 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1405 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1406 if (!HasSwiftSelfArg) {
1407 NumFixedArgs++;
1408 ISD::ArgFlagsTy Flags;
1409 Flags.setSwiftSelf();
1410 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1411 CLI.Outs.push_back(Arg);
1412 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1413 CLI.OutVals.push_back(ArgVal);
1414 }
1415 if (!HasSwiftErrorArg) {
1416 NumFixedArgs++;
1417 ISD::ArgFlagsTy Flags;
1418 Flags.setSwiftError();
1419 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1420 CLI.Outs.push_back(Arg);
1421 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1422 CLI.OutVals.push_back(ArgVal);
1423 }
1424 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1425 NumFixedArgs++;
1426 ISD::ArgFlagsTy Flags;
1427 Flags.setSwiftAsync();
1428 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1429 CLI.Outs.push_back(Arg);
1430 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1431 CLI.OutVals.push_back(ArgVal);
1432 }
1433 }
1434
1435 // Analyze operands of the call, assigning locations to each operand.
1437 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1438
1439 if (IsVarArg) {
1440 // Outgoing non-fixed arguments are placed in a buffer. First
1441 // compute their offsets and the total amount of buffer space needed.
1442 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1443 const ISD::OutputArg &Out = Outs[I];
1444 SDValue &Arg = OutVals[I];
1445 EVT VT = Arg.getValueType();
1446 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1447 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1448 Align Alignment =
1449 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1450 unsigned Offset =
1451 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1452 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1453 Offset, VT.getSimpleVT(),
1455 }
1456 }
1457
1458 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1459
1460 SDValue FINode;
1461 if (IsVarArg && NumBytes) {
1462 // For non-fixed arguments, next emit stores to store the argument values
1463 // to the stack buffer at the offsets computed above.
1464 MaybeAlign StackAlign = Layout.getStackAlignment();
1465 assert(StackAlign && "data layout string is missing stack alignment");
1466 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1467 /*isSS=*/false);
1468 unsigned ValNo = 0;
1470 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1471 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1472 "ArgLocs should remain in order and only hold varargs args");
1473 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1474 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1475 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1476 DAG.getConstant(Offset, DL, PtrVT));
1477 Chains.push_back(
1478 DAG.getStore(Chain, DL, Arg, Add,
1480 }
1481 if (!Chains.empty())
1482 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1483 } else if (IsVarArg) {
1484 FINode = DAG.getIntPtrConstant(0, DL);
1485 }
1486
1487 if (Callee->getOpcode() == ISD::GlobalAddress) {
1488 // If the callee is a GlobalAddress node (quite common, every direct call
1489 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1490 // doesn't at MO_GOT which is not needed for direct calls.
1491 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1494 GA->getOffset());
1495 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1496 getPointerTy(DAG.getDataLayout()), Callee);
1497 }
1498
1499 // Compute the operands for the CALLn node.
1501 Ops.push_back(Chain);
1502 Ops.push_back(Callee);
1503
1504 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1505 // isn't reliable.
1506 Ops.append(OutVals.begin(),
1507 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1508 // Add a pointer to the vararg buffer.
1509 if (IsVarArg)
1510 Ops.push_back(FINode);
1511
1512 SmallVector<EVT, 8> InTys;
1513 for (const auto &In : Ins) {
1514 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1515 assert(!In.Flags.isNest() && "nest is not valid for return values");
1516 if (In.Flags.isInAlloca())
1517 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1518 if (In.Flags.isInConsecutiveRegs())
1519 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1520 if (In.Flags.isInConsecutiveRegsLast())
1521 fail(DL, DAG,
1522 "WebAssembly hasn't implemented cons regs last return values");
1523 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1524 // registers.
1525 InTys.push_back(In.VT);
1526 }
1527
1528 // Lastly, if this is a call to a funcref we need to add an instruction
1529 // table.set to the chain and transform the call.
1531 CLI.CB->getCalledOperand()->getType())) {
1532 // In the absence of function references proposal where a funcref call is
1533 // lowered to call_ref, using reference types we generate a table.set to set
1534 // the funcref to a special table used solely for this purpose, followed by
1535 // a call_indirect. Here we just generate the table set, and return the
1536 // SDValue of the table.set so that LowerCall can finalize the lowering by
1537 // generating the call_indirect.
1538 SDValue Chain = Ops[0];
1539
1541 MF.getContext(), Subtarget);
1542 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1543 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1544 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1545 SDValue TableSet = DAG.getMemIntrinsicNode(
1546 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1547 MVT::funcref,
1548 // Machine Mem Operand args
1549 MachinePointerInfo(
1551 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1553
1554 Ops[0] = TableSet; // The new chain is the TableSet itself
1555 }
1556
1557 if (CLI.IsTailCall) {
1558 // ret_calls do not return values to the current frame
1559 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1560 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1561 }
1562
1563 InTys.push_back(MVT::Other);
1564 SDVTList InTyList = DAG.getVTList(InTys);
1565 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1566
1567 for (size_t I = 0; I < Ins.size(); ++I)
1568 InVals.push_back(Res.getValue(I));
1569
1570 // Return the chain
1571 return Res.getValue(Ins.size());
1572}
1573
1574bool WebAssemblyTargetLowering::CanLowerReturn(
1575 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1576 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1577 const Type *RetTy) const {
1578 // WebAssembly can only handle returning tuples with multivalue enabled
1579 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1580}
1581
1582SDValue WebAssemblyTargetLowering::LowerReturn(
1583 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1585 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1586 SelectionDAG &DAG) const {
1587 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1588 "MVP WebAssembly can only return up to one value");
1589 if (!callingConvSupported(CallConv))
1590 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1591
1592 SmallVector<SDValue, 4> RetOps(1, Chain);
1593 RetOps.append(OutVals.begin(), OutVals.end());
1594 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1595
1596 // Record the number and types of the return values.
1597 for (const ISD::OutputArg &Out : Outs) {
1598 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1599 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1600 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1601 if (Out.Flags.isInAlloca())
1602 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1603 if (Out.Flags.isInConsecutiveRegs())
1604 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1606 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1607 }
1608
1609 return Chain;
1610}
1611
1612SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1613 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1614 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1615 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1616 if (!callingConvSupported(CallConv))
1617 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1618
1619 MachineFunction &MF = DAG.getMachineFunction();
1620 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1621
1622 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1623 // of the incoming values before they're represented by virtual registers.
1624 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1625
1626 bool HasSwiftErrorArg = false;
1627 bool HasSwiftSelfArg = false;
1628 bool HasSwiftAsyncArg = false;
1629 for (const ISD::InputArg &In : Ins) {
1630 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1631 HasSwiftErrorArg |= In.Flags.isSwiftError();
1632 HasSwiftAsyncArg |= In.Flags.isSwiftAsync();
1633 if (In.Flags.isInAlloca())
1634 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1635 if (In.Flags.isNest())
1636 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1637 if (In.Flags.isInConsecutiveRegs())
1638 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1639 if (In.Flags.isInConsecutiveRegsLast())
1640 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1641 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1642 // registers.
1643 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1644 DAG.getTargetConstant(InVals.size(),
1645 DL, MVT::i32))
1646 : DAG.getUNDEF(In.VT));
1647
1648 // Record the number and types of arguments.
1649 MFI->addParam(In.VT);
1650 }
1651
1652 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1653 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1654 // arguments are also added for callee signature. They are necessary to match
1655 // callee and caller signature for indirect call.
1656 auto PtrVT = getPointerTy(MF.getDataLayout());
1657 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1658 if (!HasSwiftSelfArg) {
1659 MFI->addParam(PtrVT);
1660 }
1661 if (!HasSwiftErrorArg) {
1662 MFI->addParam(PtrVT);
1663 }
1664 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1665 MFI->addParam(PtrVT);
1666 }
1667 }
1668 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1669 // the buffer is passed as an argument.
1670 if (IsVarArg) {
1671 MVT PtrVT = getPointerTy(MF.getDataLayout());
1672 Register VarargVreg =
1674 MFI->setVarargBufferVreg(VarargVreg);
1675 Chain = DAG.getCopyToReg(
1676 Chain, DL, VarargVreg,
1677 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1678 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1679 MFI->addParam(PtrVT);
1680 }
1681
1682 // Record the number and types of arguments and results.
1683 SmallVector<MVT, 4> Params;
1686 MF.getFunction(), DAG.getTarget(), Params, Results);
1687 for (MVT VT : Results)
1688 MFI->addResult(VT);
1689 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1690 // the param logic here with ComputeSignatureVTs
1691 assert(MFI->getParams().size() == Params.size() &&
1692 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1693 Params.begin()));
1694
1695 return Chain;
1696}
1697
1698void WebAssemblyTargetLowering::ReplaceNodeResults(
1700 switch (N->getOpcode()) {
1702 // Do not add any results, signifying that N should not be custom lowered
1703 // after all. This happens because simd128 turns on custom lowering for
1704 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1705 // illegal type.
1706 break;
1710 // Do not add any results, signifying that N should not be custom lowered.
1711 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1712 break;
1713 case ISD::ADD:
1714 case ISD::SUB:
1715 Results.push_back(Replace128Op(N, DAG));
1716 break;
1717 default:
1719 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1720 }
1721}
1722
1723//===----------------------------------------------------------------------===//
1724// Custom lowering hooks.
1725//===----------------------------------------------------------------------===//
1726
1727SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1728 SelectionDAG &DAG) const {
1729 SDLoc DL(Op);
1730 switch (Op.getOpcode()) {
1731 default:
1732 llvm_unreachable("unimplemented operation lowering");
1733 return SDValue();
1734 case ISD::FrameIndex:
1735 return LowerFrameIndex(Op, DAG);
1736 case ISD::GlobalAddress:
1737 return LowerGlobalAddress(Op, DAG);
1739 return LowerGlobalTLSAddress(Op, DAG);
1741 return LowerExternalSymbol(Op, DAG);
1742 case ISD::JumpTable:
1743 return LowerJumpTable(Op, DAG);
1744 case ISD::BR_JT:
1745 return LowerBR_JT(Op, DAG);
1746 case ISD::VASTART:
1747 return LowerVASTART(Op, DAG);
1748 case ISD::BlockAddress:
1749 case ISD::BRIND:
1750 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1751 return SDValue();
1752 case ISD::RETURNADDR:
1753 return LowerRETURNADDR(Op, DAG);
1754 case ISD::FRAMEADDR:
1755 return LowerFRAMEADDR(Op, DAG);
1756 case ISD::CopyToReg:
1757 return LowerCopyToReg(Op, DAG);
1760 return LowerAccessVectorElement(Op, DAG);
1764 return LowerIntrinsic(Op, DAG);
1766 return LowerSIGN_EXTEND_INREG(Op, DAG);
1770 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1771 case ISD::BUILD_VECTOR:
1772 return LowerBUILD_VECTOR(Op, DAG);
1774 return LowerVECTOR_SHUFFLE(Op, DAG);
1775 case ISD::SETCC:
1776 return LowerSETCC(Op, DAG);
1777 case ISD::SHL:
1778 case ISD::SRA:
1779 case ISD::SRL:
1780 return LowerShift(Op, DAG);
1783 return LowerFP_TO_INT_SAT(Op, DAG);
1784 case ISD::FMINNUM:
1785 case ISD::FMINIMUMNUM:
1786 return LowerFMIN(Op, DAG);
1787 case ISD::FMAXNUM:
1788 case ISD::FMAXIMUMNUM:
1789 return LowerFMAX(Op, DAG);
1790 case ISD::LOAD:
1791 return LowerLoad(Op, DAG);
1792 case ISD::STORE:
1793 return LowerStore(Op, DAG);
1794 case ISD::CTPOP:
1795 case ISD::CTLZ:
1796 case ISD::CTTZ:
1797 return DAG.UnrollVectorOp(Op.getNode());
1798 case ISD::CLEAR_CACHE:
1799 report_fatal_error("llvm.clear_cache is not supported on wasm");
1800 case ISD::SMUL_LOHI:
1801 case ISD::UMUL_LOHI:
1802 return LowerMUL_LOHI(Op, DAG);
1803 case ISD::UADDO:
1804 return LowerUADDO(Op, DAG);
1805 }
1806}
1807
1811
1812 return false;
1813}
1814
1815static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1816 SelectionDAG &DAG) {
1818 if (!FI)
1819 return std::nullopt;
1820
1821 auto &MF = DAG.getMachineFunction();
1823}
1824
1825SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1826 SelectionDAG &DAG) const {
1827 SDLoc DL(Op);
1828 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1829 const SDValue &Value = SN->getValue();
1830 const SDValue &Base = SN->getBasePtr();
1831 const SDValue &Offset = SN->getOffset();
1832
1834 if (!Offset->isUndef())
1835 report_fatal_error("unexpected offset when storing to webassembly global",
1836 false);
1837
1838 SDVTList Tys = DAG.getVTList(MVT::Other);
1839 SDValue Ops[] = {SN->getChain(), Value, Base};
1840 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1841 SN->getMemoryVT(), SN->getMemOperand());
1842 }
1843
1844 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1845 if (!Offset->isUndef())
1846 report_fatal_error("unexpected offset when storing to webassembly local",
1847 false);
1848
1849 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1850 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1851 SDValue Ops[] = {SN->getChain(), Idx, Value};
1852 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1853 }
1854
1857 "Encountered an unlowerable store to the wasm_var address space",
1858 false);
1859
1860 return Op;
1861}
1862
1863SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1864 SelectionDAG &DAG) const {
1865 SDLoc DL(Op);
1866 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1867 const SDValue &Base = LN->getBasePtr();
1868 const SDValue &Offset = LN->getOffset();
1869
1871 if (!Offset->isUndef())
1873 "unexpected offset when loading from webassembly global", false);
1874
1875 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1876 SDValue Ops[] = {LN->getChain(), Base};
1877 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1878 LN->getMemoryVT(), LN->getMemOperand());
1879 }
1880
1881 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1882 if (!Offset->isUndef())
1884 "unexpected offset when loading from webassembly local", false);
1885
1886 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1887 EVT LocalVT = LN->getValueType(0);
1888 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1889 {LN->getChain(), Idx});
1890 }
1891
1894 "Encountered an unlowerable load from the wasm_var address space",
1895 false);
1896
1897 return Op;
1898}
1899
1900SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1901 SelectionDAG &DAG) const {
1902 assert(Subtarget->hasWideArithmetic());
1903 assert(Op.getValueType() == MVT::i64);
1904 SDLoc DL(Op);
1905 unsigned Opcode;
1906 switch (Op.getOpcode()) {
1907 case ISD::UMUL_LOHI:
1908 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1909 break;
1910 case ISD::SMUL_LOHI:
1911 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1912 break;
1913 default:
1914 llvm_unreachable("unexpected opcode");
1915 }
1916 SDValue LHS = Op.getOperand(0);
1917 SDValue RHS = Op.getOperand(1);
1918 SDValue Lo =
1919 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1920 SDValue Hi(Lo.getNode(), 1);
1921 SDValue Ops[] = {Lo, Hi};
1922 return DAG.getMergeValues(Ops, DL);
1923}
1924
1925// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1926//
1927// This enables generating a single wasm instruction for this operation where
1928// the upper half of both operands are constant zeros. The upper half of the
1929// result is then whether the overflow happened.
1930SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1931 SelectionDAG &DAG) const {
1932 assert(Subtarget->hasWideArithmetic());
1933 assert(Op.getValueType() == MVT::i64);
1934 assert(Op.getOpcode() == ISD::UADDO);
1935 SDLoc DL(Op);
1936 SDValue LHS = Op.getOperand(0);
1937 SDValue RHS = Op.getOperand(1);
1938 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1939 SDValue Result =
1940 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1941 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1942 SDValue CarryI64(Result.getNode(), 1);
1943 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1944 SDValue Ops[] = {Result, CarryI32};
1945 return DAG.getMergeValues(Ops, DL);
1946}
1947
1948SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1949 SelectionDAG &DAG) const {
1950 assert(Subtarget->hasWideArithmetic());
1951 assert(N->getValueType(0) == MVT::i128);
1952 SDLoc DL(N);
1953 unsigned Opcode;
1954 switch (N->getOpcode()) {
1955 case ISD::ADD:
1956 Opcode = WebAssemblyISD::I64_ADD128;
1957 break;
1958 case ISD::SUB:
1959 Opcode = WebAssemblyISD::I64_SUB128;
1960 break;
1961 default:
1962 llvm_unreachable("unexpected opcode");
1963 }
1964 SDValue LHS = N->getOperand(0);
1965 SDValue RHS = N->getOperand(1);
1966
1967 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1968 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1969 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1970 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1971 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1972 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1973 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1974 LHS_0, LHS_1, RHS_0, RHS_1);
1975 SDValue Result_HI(Result_LO.getNode(), 1);
1976 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1977}
1978
1979SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1980 SelectionDAG &DAG) const {
1981 SDValue Src = Op.getOperand(2);
1982 if (isa<FrameIndexSDNode>(Src.getNode())) {
1983 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1984 // the FI to some LEA-like instruction, but since we don't have that, we
1985 // need to insert some kind of instruction that can take an FI operand and
1986 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1987 // local.copy between Op and its FI operand.
1988 SDValue Chain = Op.getOperand(0);
1989 SDLoc DL(Op);
1990 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1991 EVT VT = Src.getValueType();
1992 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1993 : WebAssembly::COPY_I64,
1994 DL, VT, Src),
1995 0);
1996 return Op.getNode()->getNumValues() == 1
1997 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1998 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1999 Op.getNumOperands() == 4 ? Op.getOperand(3)
2000 : SDValue());
2001 }
2002 return SDValue();
2003}
2004
2005SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
2006 SelectionDAG &DAG) const {
2007 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
2008 return DAG.getTargetFrameIndex(FI, Op.getValueType());
2009}
2010
2011SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
2012 SelectionDAG &DAG) const {
2013 SDLoc DL(Op);
2014
2015 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
2016 fail(DL, DAG,
2017 "Non-Emscripten WebAssembly hasn't implemented "
2018 "__builtin_return_address");
2019 return SDValue();
2020 }
2021
2022 unsigned Depth = Op.getConstantOperandVal(0);
2023 MakeLibCallOptions CallOptions;
2024 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
2025 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
2026 .first;
2027}
2028
2029SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2030 SelectionDAG &DAG) const {
2031 // Non-zero depths are not supported by WebAssembly currently. Use the
2032 // legalizer's default expansion, which is to return 0 (what this function is
2033 // documented to do).
2034 if (Op.getConstantOperandVal(0) > 0)
2035 return SDValue();
2036
2038 EVT VT = Op.getValueType();
2039 Register FP =
2040 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2041 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2042}
2043
2044SDValue
2045WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2046 SelectionDAG &DAG) const {
2047 SDLoc DL(Op);
2048 const auto *GA = cast<GlobalAddressSDNode>(Op);
2049
2050 MachineFunction &MF = DAG.getMachineFunction();
2051 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2052 report_fatal_error("cannot use thread-local storage without bulk memory",
2053 false);
2054
2055 const GlobalValue *GV = GA->getGlobal();
2056
2057 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2058 // on other targets, if we have thread-local storage, only the local-exec
2059 // model is possible.
2060 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2061 ? GV->getThreadLocalMode()
2063
2064 // Unsupported TLS modes
2067
2068 if (model == GlobalValue::LocalExecTLSModel ||
2071 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2072 // For DSO-local TLS variables we use offset from __tls_base
2073
2074 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2075 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2076 : WebAssembly::GLOBAL_GET_I32;
2077 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2078
2080 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2081 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2082 0);
2083
2084 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2085 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2086 SDValue SymOffset =
2087 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2088
2089 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2090 }
2091
2093
2094 EVT VT = Op.getValueType();
2095 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2096 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2097 GA->getOffset(),
2099}
2100
2101SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2102 SelectionDAG &DAG) const {
2103 SDLoc DL(Op);
2104 const auto *GA = cast<GlobalAddressSDNode>(Op);
2105 EVT VT = Op.getValueType();
2106 assert(GA->getTargetFlags() == 0 &&
2107 "Unexpected target flags on generic GlobalAddressSDNode");
2109 fail(DL, DAG, "Invalid address space for WebAssembly target");
2110
2111 unsigned OperandFlags = 0;
2112 const GlobalValue *GV = GA->getGlobal();
2113 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2114 // need special treatment for tables in PIC mode.
2115 if (isPositionIndependent() &&
2117 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2118 MachineFunction &MF = DAG.getMachineFunction();
2119 MVT PtrVT = getPointerTy(MF.getDataLayout());
2120 const char *BaseName;
2121 if (GV->getValueType()->isFunctionTy()) {
2122 BaseName = MF.createExternalSymbolName("__table_base");
2124 } else {
2125 BaseName = MF.createExternalSymbolName("__memory_base");
2127 }
2129 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2130 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2131
2132 SDValue SymAddr = DAG.getNode(
2133 WebAssemblyISD::WrapperREL, DL, VT,
2134 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2135 OperandFlags));
2136
2137 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2138 }
2140 }
2141
2142 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2143 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2144 GA->getOffset(), OperandFlags));
2145}
2146
2147SDValue
2148WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2149 SelectionDAG &DAG) const {
2150 SDLoc DL(Op);
2151 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2152 EVT VT = Op.getValueType();
2153 assert(ES->getTargetFlags() == 0 &&
2154 "Unexpected target flags on generic ExternalSymbolSDNode");
2155 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2156 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2157}
2158
2159SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2160 SelectionDAG &DAG) const {
2161 // There's no need for a Wrapper node because we always incorporate a jump
2162 // table operand into a BR_TABLE instruction, rather than ever
2163 // materializing it in a register.
2164 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2165 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2166 JT->getTargetFlags());
2167}
2168
2169SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2170 SelectionDAG &DAG) const {
2171 SDLoc DL(Op);
2172 SDValue Chain = Op.getOperand(0);
2173 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2174 SDValue Index = Op.getOperand(2);
2175 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2176
2178 Ops.push_back(Chain);
2179 Ops.push_back(Index);
2180
2181 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2182 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2183
2184 // Add an operand for each case.
2185 for (auto *MBB : MBBs)
2186 Ops.push_back(DAG.getBasicBlock(MBB));
2187
2188 // Add the first MBB as a dummy default target for now. This will be replaced
2189 // with the proper default target (and the preceding range check eliminated)
2190 // if possible by WebAssemblyFixBrTableDefaults.
2191 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2192 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2193}
2194
2195SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2196 SelectionDAG &DAG) const {
2197 SDLoc DL(Op);
2198 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2199
2200 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2201 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2202
2203 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2204 MFI->getVarargBufferVreg(), PtrVT);
2205 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2206 MachinePointerInfo(SV));
2207}
2208
2209SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2210 SelectionDAG &DAG) const {
2211 MachineFunction &MF = DAG.getMachineFunction();
2212 unsigned IntNo;
2213 switch (Op.getOpcode()) {
2216 IntNo = Op.getConstantOperandVal(1);
2217 break;
2219 IntNo = Op.getConstantOperandVal(0);
2220 break;
2221 default:
2222 llvm_unreachable("Invalid intrinsic");
2223 }
2224 SDLoc DL(Op);
2225
2226 switch (IntNo) {
2227 default:
2228 return SDValue(); // Don't custom lower most intrinsics.
2229
2230 case Intrinsic::wasm_lsda: {
2231 auto PtrVT = getPointerTy(MF.getDataLayout());
2232 const char *SymName = MF.createExternalSymbolName(
2233 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2234 if (isPositionIndependent()) {
2236 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2237 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2239 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2240 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2241 SDValue SymAddr =
2242 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2243 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2244 }
2245 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2246 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2247 }
2248
2249 case Intrinsic::wasm_shuffle: {
2250 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2251 SDValue Ops[18];
2252 size_t OpIdx = 0;
2253 Ops[OpIdx++] = Op.getOperand(1);
2254 Ops[OpIdx++] = Op.getOperand(2);
2255 while (OpIdx < 18) {
2256 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2257 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2258 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2259 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2260 } else {
2261 Ops[OpIdx++] = MaskIdx;
2262 }
2263 }
2264 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2265 }
2266
2267 case Intrinsic::thread_pointer: {
2268 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2269 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2270 : WebAssembly::GLOBAL_GET_I32;
2271 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2272 return SDValue(
2273 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2274 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2275 0);
2276 }
2277 }
2278}
2279
2280SDValue
2281WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2282 SelectionDAG &DAG) const {
2283 SDLoc DL(Op);
2284 // If sign extension operations are disabled, allow sext_inreg only if operand
2285 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2286 // extension operations, but allowing sext_inreg in this context lets us have
2287 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2288 // everywhere would be simpler in this file, but would necessitate large and
2289 // brittle patterns to undo the expansion and select extract_lane_s
2290 // instructions.
2291 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2292 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2293 return SDValue();
2294
2295 const SDValue &Extract = Op.getOperand(0);
2296 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2297 if (VecT.getVectorElementType().getSizeInBits() > 32)
2298 return SDValue();
2299 MVT ExtractedLaneT =
2300 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2301 MVT ExtractedVecT =
2302 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2303 if (ExtractedVecT == VecT)
2304 return Op;
2305
2306 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2307 const SDNode *Index = Extract.getOperand(1).getNode();
2308 if (!isa<ConstantSDNode>(Index))
2309 return SDValue();
2310 unsigned IndexVal = Index->getAsZExtVal();
2311 unsigned Scale =
2312 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2313 assert(Scale > 1);
2314 SDValue NewIndex =
2315 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2316 SDValue NewExtract = DAG.getNode(
2318 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2319 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2320 Op.getOperand(1));
2321}
2322
2323static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2324 SelectionDAG &DAG) {
2325 SDValue Source = peekThroughBitcasts(Op);
2326 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2327 return SDValue();
2328
2329 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2330 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2331 "expected extend_low");
2332 auto *Shuffle = cast<ShuffleVectorSDNode>(Source.getNode());
2333
2334 ArrayRef<int> Mask = Shuffle->getMask();
2335 // Look for a shuffle which moves from the high half to the low half.
2336 size_t FirstIdx = Mask.size() / 2;
2337 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2338 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2339 return SDValue();
2340 }
2341 }
2342
2343 SDLoc DL(Op);
2344 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2345 ? WebAssemblyISD::EXTEND_HIGH_S
2346 : WebAssemblyISD::EXTEND_HIGH_U;
2347 SDValue ShuffleSrc = Shuffle->getOperand(0);
2348 if (Op.getOpcode() == ISD::BITCAST)
2349 ShuffleSrc = DAG.getBitcast(Op.getValueType(), ShuffleSrc);
2350
2351 return DAG.getNode(Opc, DL, VT, ShuffleSrc);
2352}
2353
2354SDValue
2355WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2356 SelectionDAG &DAG) const {
2357 SDLoc DL(Op);
2358 EVT VT = Op.getValueType();
2359 SDValue Src = Op.getOperand(0);
2360 EVT SrcVT = Src.getValueType();
2361
2362 if (SrcVT.getVectorElementType() == MVT::i1 ||
2363 SrcVT.getVectorElementType() == MVT::i64)
2364 return SDValue();
2365
2366 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2367 "Unexpected extension factor.");
2368 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2369
2370 if (Scale != 2 && Scale != 4 && Scale != 8)
2371 return SDValue();
2372
2373 unsigned Ext;
2374 switch (Op.getOpcode()) {
2375 default:
2376 llvm_unreachable("unexpected opcode");
2379 Ext = WebAssemblyISD::EXTEND_LOW_U;
2380 break;
2382 Ext = WebAssemblyISD::EXTEND_LOW_S;
2383 break;
2384 }
2385
2386 if (Scale == 2) {
2387 // See if we can use EXTEND_HIGH.
2388 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2389 return ExtendHigh;
2390 }
2391
2392 SDValue Ret = Src;
2393 while (Scale != 1) {
2394 Ret = DAG.getNode(Ext, DL,
2395 Ret.getValueType()
2398 Ret);
2399 Scale /= 2;
2400 }
2401 assert(Ret.getValueType() == VT);
2402 return Ret;
2403}
2404
2406 SDLoc DL(Op);
2407 if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
2408 return SDValue();
2409
2410 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2411 unsigned &Index) -> bool {
2412 switch (Op.getOpcode()) {
2413 case ISD::SINT_TO_FP:
2414 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2415 break;
2416 case ISD::UINT_TO_FP:
2417 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2418 break;
2419 case ISD::FP_EXTEND:
2420 case ISD::FP16_TO_FP:
2421 Opcode = WebAssemblyISD::PROMOTE_LOW;
2422 break;
2423 default:
2424 return false;
2425 }
2426
2427 auto ExtractVector = Op.getOperand(0);
2428 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2429 return false;
2430
2431 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2432 return false;
2433
2434 SrcVec = ExtractVector.getOperand(0);
2435 Index = ExtractVector.getConstantOperandVal(1);
2436 return true;
2437 };
2438
2439 unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
2440 unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
2441 unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
2442 SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
2443
2444 if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec,
2445 FirstIndex) ||
2446 !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec,
2447 SecondIndex))
2448 return SDValue();
2449
2450 // If we're converting to v4f32, check the third and fourth lanes, too.
2451 if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode,
2452 ThirdSrcVec, ThirdIndex) ||
2453 !GetConvertedLane(Op.getOperand(3), FourthOpcode,
2454 FourthSrcVec, FourthIndex)))
2455 return SDValue();
2456
2457 if (FirstOpcode != SecondOpcode)
2458 return SDValue();
2459
2460 // TODO Add an optimization similar to the v2f64 below for shuffling the
2461 // vectors when the lanes are in the wrong order or come from different src
2462 // vectors.
2463 if (NumLanes == 4 &&
2464 (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
2465 FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec ||
2466 FirstSrcVec != FourthSrcVec || FirstIndex != 0 || SecondIndex != 1 ||
2467 ThirdIndex != 2 || FourthIndex != 3))
2468 return SDValue();
2469
2470 MVT ExpectedSrcVT;
2471 switch (FirstOpcode) {
2472 case WebAssemblyISD::CONVERT_LOW_S:
2473 case WebAssemblyISD::CONVERT_LOW_U:
2474 ExpectedSrcVT = MVT::v4i32;
2475 break;
2476 case WebAssemblyISD::PROMOTE_LOW:
2477 ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
2478 break;
2479 }
2480 if (FirstSrcVec.getValueType() != ExpectedSrcVT)
2481 return SDValue();
2482
2483 auto Src = FirstSrcVec;
2484 if (NumLanes == 2 &&
2485 (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
2486 // Shuffle the source vector so that the converted lanes are the low lanes.
2487 Src = DAG.getVectorShuffle(ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
2488 {static_cast<int>(FirstIndex),
2489 static_cast<int>(SecondIndex) + 4, -1, -1});
2490 }
2491 return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32,
2492 Src);
2493}
2494
2495SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2496 SelectionDAG &DAG) const {
2497 MVT VT = Op.getSimpleValueType();
2498 if (VT == MVT::v8f16) {
2499 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2500 // FP16 type, so cast them to I16s.
2501 MVT IVT = VT.changeVectorElementType(MVT::i16);
2503 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2504 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2505 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2506 return DAG.getBitcast(VT, Res);
2507 }
2508
2509 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2510 return ConvertLow;
2511
2512 SDLoc DL(Op);
2513 const EVT VecT = Op.getValueType();
2514 const EVT LaneT = Op.getOperand(0).getValueType();
2515 const size_t Lanes = Op.getNumOperands();
2516 bool CanSwizzle = VecT == MVT::v16i8;
2517
2518 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2519 // possible number of lanes at once followed by a sequence of replace_lane
2520 // instructions to individually initialize any remaining lanes.
2521
2522 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2523 // swizzled lanes should be given greater weight.
2524
2525 // TODO: Investigate looping rather than always extracting/replacing specific
2526 // lanes to fill gaps.
2527
2528 auto IsConstant = [](const SDValue &V) {
2529 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2530 };
2531
2532 // Returns the source vector and index vector pair if they exist. Checks for:
2533 // (extract_vector_elt
2534 // $src,
2535 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2536 // )
2537 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2538 auto Bail = std::make_pair(SDValue(), SDValue());
2539 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2540 return Bail;
2541 const SDValue &SwizzleSrc = Lane->getOperand(0);
2542 const SDValue &IndexExt = Lane->getOperand(1);
2543 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2544 return Bail;
2545 const SDValue &Index = IndexExt->getOperand(0);
2546 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2547 return Bail;
2548 const SDValue &SwizzleIndices = Index->getOperand(0);
2549 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2550 SwizzleIndices.getValueType() != MVT::v16i8 ||
2551 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2552 Index->getConstantOperandVal(1) != I)
2553 return Bail;
2554 return std::make_pair(SwizzleSrc, SwizzleIndices);
2555 };
2556
2557 // If the lane is extracted from another vector at a constant index, return
2558 // that vector. The source vector must not have more lanes than the dest
2559 // because the shufflevector indices are in terms of the destination lanes and
2560 // would not be able to address the smaller individual source lanes.
2561 auto GetShuffleSrc = [&](const SDValue &Lane) {
2562 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2563 return SDValue();
2564 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2565 return SDValue();
2566 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2567 VecT.getVectorNumElements())
2568 return SDValue();
2569 return Lane->getOperand(0);
2570 };
2571
2572 using ValueEntry = std::pair<SDValue, size_t>;
2573 SmallVector<ValueEntry, 16> SplatValueCounts;
2574
2575 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2576 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2577
2578 using ShuffleEntry = std::pair<SDValue, size_t>;
2579 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2580
2581 auto AddCount = [](auto &Counts, const auto &Val) {
2582 auto CountIt =
2583 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2584 if (CountIt == Counts.end()) {
2585 Counts.emplace_back(Val, 1);
2586 } else {
2587 CountIt->second++;
2588 }
2589 };
2590
2591 auto GetMostCommon = [](auto &Counts) {
2592 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2593 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2594 return *CommonIt;
2595 };
2596
2597 size_t NumConstantLanes = 0;
2598
2599 // Count eligible lanes for each type of vector creation op
2600 for (size_t I = 0; I < Lanes; ++I) {
2601 const SDValue &Lane = Op->getOperand(I);
2602 if (Lane.isUndef())
2603 continue;
2604
2605 AddCount(SplatValueCounts, Lane);
2606
2607 if (IsConstant(Lane))
2608 NumConstantLanes++;
2609 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2610 AddCount(ShuffleCounts, ShuffleSrc);
2611 if (CanSwizzle) {
2612 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2613 if (SwizzleSrcs.first)
2614 AddCount(SwizzleCounts, SwizzleSrcs);
2615 }
2616 }
2617
2618 SDValue SplatValue;
2619 size_t NumSplatLanes;
2620 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2621
2622 SDValue SwizzleSrc;
2623 SDValue SwizzleIndices;
2624 size_t NumSwizzleLanes = 0;
2625 if (SwizzleCounts.size())
2626 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2627 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2628
2629 // Shuffles can draw from up to two vectors, so find the two most common
2630 // sources.
2631 SDValue ShuffleSrc1, ShuffleSrc2;
2632 size_t NumShuffleLanes = 0;
2633 if (ShuffleCounts.size()) {
2634 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2635 llvm::erase_if(ShuffleCounts,
2636 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2637 }
2638 if (ShuffleCounts.size()) {
2639 size_t AdditionalShuffleLanes;
2640 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2641 GetMostCommon(ShuffleCounts);
2642 NumShuffleLanes += AdditionalShuffleLanes;
2643 }
2644
2645 // Predicate returning true if the lane is properly initialized by the
2646 // original instruction
2647 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2649 // Prefer swizzles over shuffles over vector consts over splats
2650 if (NumSwizzleLanes >= NumShuffleLanes &&
2651 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2652 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2653 SwizzleIndices);
2654 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2655 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2656 return Swizzled == GetSwizzleSrcs(I, Lane);
2657 };
2658 } else if (NumShuffleLanes >= NumConstantLanes &&
2659 NumShuffleLanes >= NumSplatLanes) {
2660 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2661 size_t DestLaneCount = VecT.getVectorNumElements();
2662 size_t Scale1 = 1;
2663 size_t Scale2 = 1;
2664 SDValue Src1 = ShuffleSrc1;
2665 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2666 if (Src1.getValueType() != VecT) {
2667 size_t LaneSize =
2669 assert(LaneSize > DestLaneSize);
2670 Scale1 = LaneSize / DestLaneSize;
2671 Src1 = DAG.getBitcast(VecT, Src1);
2672 }
2673 if (Src2.getValueType() != VecT) {
2674 size_t LaneSize =
2676 assert(LaneSize > DestLaneSize);
2677 Scale2 = LaneSize / DestLaneSize;
2678 Src2 = DAG.getBitcast(VecT, Src2);
2679 }
2680
2681 int Mask[16];
2682 assert(DestLaneCount <= 16);
2683 for (size_t I = 0; I < DestLaneCount; ++I) {
2684 const SDValue &Lane = Op->getOperand(I);
2685 SDValue Src = GetShuffleSrc(Lane);
2686 if (Src == ShuffleSrc1) {
2687 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2688 } else if (Src && Src == ShuffleSrc2) {
2689 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2690 } else {
2691 Mask[I] = -1;
2692 }
2693 }
2694 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2695 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2696 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2697 auto Src = GetShuffleSrc(Lane);
2698 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2699 };
2700 } else if (NumConstantLanes >= NumSplatLanes) {
2701 SmallVector<SDValue, 16> ConstLanes;
2702 for (const SDValue &Lane : Op->op_values()) {
2703 if (IsConstant(Lane)) {
2704 // Values may need to be fixed so that they will sign extend to be
2705 // within the expected range during ISel. Check whether the value is in
2706 // bounds based on the lane bit width and if it is out of bounds, lop
2707 // off the extra bits.
2708 uint64_t LaneBits = 128 / Lanes;
2709 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2710 ConstLanes.push_back(DAG.getConstant(
2711 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2712 SDLoc(Lane), LaneT));
2713 } else {
2714 ConstLanes.push_back(Lane);
2715 }
2716 } else if (LaneT.isFloatingPoint()) {
2717 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2718 } else {
2719 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2720 }
2721 }
2722 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2723 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2724 return IsConstant(Lane);
2725 };
2726 } else {
2727 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2728 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2729 (DestLaneSize == 32 || DestLaneSize == 64)) {
2730 // Could be selected to load_zero.
2731 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2732 } else {
2733 // Use a splat (which might be selected as a load splat)
2734 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2735 }
2736 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2737 return Lane == SplatValue;
2738 };
2739 }
2740
2741 assert(Result);
2742 assert(IsLaneConstructed);
2743
2744 // Add replace_lane instructions for any unhandled values
2745 for (size_t I = 0; I < Lanes; ++I) {
2746 const SDValue &Lane = Op->getOperand(I);
2747 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2748 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2749 DAG.getConstant(I, DL, MVT::i32));
2750 }
2751
2752 return Result;
2753}
2754
2755SDValue
2756WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2757 SelectionDAG &DAG) const {
2758 SDLoc DL(Op);
2759 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2760 MVT VecType = Op.getOperand(0).getSimpleValueType();
2761 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2762 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2763
2764 // Space for two vector args and sixteen mask indices
2765 SDValue Ops[18];
2766 size_t OpIdx = 0;
2767 Ops[OpIdx++] = Op.getOperand(0);
2768 Ops[OpIdx++] = Op.getOperand(1);
2769
2770 // Expand mask indices to byte indices and materialize them as operands
2771 for (int M : Mask) {
2772 for (size_t J = 0; J < LaneBytes; ++J) {
2773 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2774 // whole lane of vector input, to allow further reduction at VM. E.g.
2775 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2776 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2777 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2778 }
2779 }
2780
2781 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2782}
2783
2784SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2785 SelectionDAG &DAG) const {
2786 SDLoc DL(Op);
2787 // The legalizer does not know how to expand the unsupported comparison modes
2788 // of i64x2 vectors, so we manually unroll them here.
2789 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2791 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2792 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2793 const SDValue &CC = Op->getOperand(2);
2794 auto MakeLane = [&](unsigned I) {
2795 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2796 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2797 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2798 };
2799 return DAG.getBuildVector(Op->getValueType(0), DL,
2800 {MakeLane(0), MakeLane(1)});
2801}
2802
2803SDValue
2804WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2805 SelectionDAG &DAG) const {
2806 // Allow constant lane indices, expand variable lane indices
2807 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2808 if (isa<ConstantSDNode>(IdxNode)) {
2809 // Ensure the index type is i32 to match the tablegen patterns
2810 uint64_t Idx = IdxNode->getAsZExtVal();
2811 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2812 Ops[Op.getNumOperands() - 1] =
2813 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2814 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2815 }
2816 // Perform default expansion
2817 return SDValue();
2818}
2819
2821 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2822 // 32-bit and 64-bit unrolled shifts will have proper semantics
2823 if (LaneT.bitsGE(MVT::i32))
2824 return DAG.UnrollVectorOp(Op.getNode());
2825 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2826 SDLoc DL(Op);
2827 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2828 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2829 unsigned ShiftOpcode = Op.getOpcode();
2830 SmallVector<SDValue, 16> ShiftedElements;
2831 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2832 SmallVector<SDValue, 16> ShiftElements;
2833 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2834 SmallVector<SDValue, 16> UnrolledOps;
2835 for (size_t i = 0; i < NumLanes; ++i) {
2836 SDValue MaskedShiftValue =
2837 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2838 SDValue ShiftedValue = ShiftedElements[i];
2839 if (ShiftOpcode == ISD::SRA)
2840 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2841 ShiftedValue, DAG.getValueType(LaneT));
2842 UnrolledOps.push_back(
2843 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2844 }
2845 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2846}
2847
2848SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2849 SelectionDAG &DAG) const {
2850 SDLoc DL(Op);
2851 // Only manually lower vector shifts
2852 assert(Op.getSimpleValueType().isVector());
2853
2854 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2855 auto ShiftVal = Op.getOperand(1);
2856
2857 // Try to skip bitmask operation since it is implied inside shift instruction
2858 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2859 if (MaskOp.getOpcode() != ISD::AND)
2860 return MaskOp;
2861 SDValue LHS = MaskOp.getOperand(0);
2862 SDValue RHS = MaskOp.getOperand(1);
2863 if (MaskOp.getValueType().isVector()) {
2864 APInt MaskVal;
2865 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2866 std::swap(LHS, RHS);
2867
2868 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2869 MaskVal == MaskBits)
2870 MaskOp = LHS;
2871 } else {
2872 if (!isa<ConstantSDNode>(RHS.getNode()))
2873 std::swap(LHS, RHS);
2874
2875 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2876 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2877 MaskOp = LHS;
2878 }
2879
2880 return MaskOp;
2881 };
2882
2883 // Skip vector and operation
2884 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2885 ShiftVal = DAG.getSplatValue(ShiftVal);
2886 if (!ShiftVal)
2887 return unrollVectorShift(Op, DAG);
2888
2889 // Skip scalar and operation
2890 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2891 // Use anyext because none of the high bits can affect the shift
2892 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2893
2894 unsigned Opcode;
2895 switch (Op.getOpcode()) {
2896 case ISD::SHL:
2897 Opcode = WebAssemblyISD::VEC_SHL;
2898 break;
2899 case ISD::SRA:
2900 Opcode = WebAssemblyISD::VEC_SHR_S;
2901 break;
2902 case ISD::SRL:
2903 Opcode = WebAssemblyISD::VEC_SHR_U;
2904 break;
2905 default:
2906 llvm_unreachable("unexpected opcode");
2907 }
2908
2909 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2910}
2911
2912SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2913 SelectionDAG &DAG) const {
2914 EVT ResT = Op.getValueType();
2915 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2916
2917 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2918 (SatVT == MVT::i32 || SatVT == MVT::i64))
2919 return Op;
2920
2921 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2922 return Op;
2923
2924 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2925 return Op;
2926
2927 return SDValue();
2928}
2929
2931 return (Op->getFlags().hasNoNaNs() ||
2932 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2933 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2934 (Op->getFlags().hasNoSignedZeros() ||
2935 DAG.isKnownNeverLogicalZero(Op->getOperand(0)) ||
2936 DAG.isKnownNeverLogicalZero(Op->getOperand(1)));
2937}
2938
2939SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2940 SelectionDAG &DAG) const {
2941 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2942 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2943 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2944 }
2945 return SDValue();
2946}
2947
2948SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2949 SelectionDAG &DAG) const {
2950 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2951 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2952 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2953 }
2954 return SDValue();
2955}
2956
2957//===----------------------------------------------------------------------===//
2958// Custom DAG combine hooks
2959//===----------------------------------------------------------------------===//
2960static SDValue
2962 auto &DAG = DCI.DAG;
2963 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2964
2965 // Hoist vector bitcasts that don't change the number of lanes out of unary
2966 // shuffles, where they are less likely to get in the way of other combines.
2967 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2968 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2969 SDValue Bitcast = N->getOperand(0);
2970 if (Bitcast.getOpcode() != ISD::BITCAST)
2971 return SDValue();
2972 if (!N->getOperand(1).isUndef())
2973 return SDValue();
2974 SDValue CastOp = Bitcast.getOperand(0);
2975 EVT SrcType = CastOp.getValueType();
2976 EVT DstType = Bitcast.getValueType();
2977 if (!SrcType.is128BitVector() ||
2978 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2979 return SDValue();
2980 SDValue NewShuffle = DAG.getVectorShuffle(
2981 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2982 return DAG.getBitcast(DstType, NewShuffle);
2983}
2984
2985/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2986/// split up into scalar instructions during legalization, and the vector
2987/// extending instructions are selected in performVectorExtendCombine below.
2988static SDValue
2991 auto &DAG = DCI.DAG;
2992 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2993 N->getOpcode() == ISD::SINT_TO_FP);
2994
2995 EVT InVT = N->getOperand(0)->getValueType(0);
2996 EVT ResVT = N->getValueType(0);
2997 MVT ExtVT;
2998 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2999 ExtVT = MVT::v4i32;
3000 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
3001 ExtVT = MVT::v2i32;
3002 else
3003 return SDValue();
3004
3005 unsigned Op =
3007 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
3008 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
3009}
3010
3011static SDValue
3014 auto &DAG = DCI.DAG;
3015
3016 SDNodeFlags Flags = N->getFlags();
3017 SDValue Op0 = N->getOperand(0);
3018 EVT VT = N->getValueType(0);
3019
3020 // Optimize uitofp to sitofp when the sign bit is known to be zero.
3021 // Depending on the target (runtime) backend, this might be performance
3022 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
3023 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
3024 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
3025 }
3026
3027 return SDValue();
3028}
3029
3030static SDValue
3032 auto &DAG = DCI.DAG;
3033 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
3034 N->getOpcode() == ISD::ZERO_EXTEND);
3035
3036 EVT ResVT = N->getValueType(0);
3037 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3038 SDLoc DL(N);
3039
3040 if (ResVT == MVT::v16i32 && N->getOperand(0)->getValueType(0) == MVT::v16i8) {
3041 // Use a tree of extend low/high to split and extend the input in two
3042 // layers to avoid doing several shuffles and even more extends.
3043 unsigned LowOp =
3044 IsSext ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3045 unsigned HighOp =
3046 IsSext ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3047 SDValue Input = N->getOperand(0);
3048 SDValue LowHalf = DAG.getNode(LowOp, DL, MVT::v8i16, Input);
3049 SDValue HighHalf = DAG.getNode(HighOp, DL, MVT::v8i16, Input);
3050 SDValue Subvectors[] = {
3051 DAG.getNode(LowOp, DL, MVT::v4i32, LowHalf),
3052 DAG.getNode(HighOp, DL, MVT::v4i32, LowHalf),
3053 DAG.getNode(LowOp, DL, MVT::v4i32, HighHalf),
3054 DAG.getNode(HighOp, DL, MVT::v4i32, HighHalf),
3055 };
3056 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Subvectors);
3057 }
3058
3059 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
3060 // possible before the extract_subvector can be expanded.
3061 auto Extract = N->getOperand(0);
3062 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3063 return SDValue();
3064 auto Source = Extract.getOperand(0);
3065 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
3066 if (IndexNode == nullptr)
3067 return SDValue();
3068 auto Index = IndexNode->getZExtValue();
3069
3070 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3071 // extracted subvector is the low or high half of its source.
3072 if (ResVT == MVT::v8i16) {
3073 if (Extract.getValueType() != MVT::v8i8 ||
3074 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3075 return SDValue();
3076 } else if (ResVT == MVT::v4i32) {
3077 if (Extract.getValueType() != MVT::v4i16 ||
3078 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3079 return SDValue();
3080 } else if (ResVT == MVT::v2i64) {
3081 if (Extract.getValueType() != MVT::v2i32 ||
3082 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3083 return SDValue();
3084 } else {
3085 return SDValue();
3086 }
3087
3088 bool IsLow = Index == 0;
3089
3090 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3091 : WebAssemblyISD::EXTEND_HIGH_S)
3092 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3093 : WebAssemblyISD::EXTEND_HIGH_U);
3094
3095 return DAG.getNode(Op, DL, ResVT, Source);
3096}
3097
3098static SDValue
3100 auto &DAG = DCI.DAG;
3101
3102 auto GetWasmConversionOp = [](unsigned Op) {
3103 switch (Op) {
3105 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3107 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3108 case ISD::FP_ROUND:
3109 return WebAssemblyISD::DEMOTE_ZERO;
3110 }
3111 llvm_unreachable("unexpected op");
3112 };
3113
3114 auto IsZeroSplat = [](SDValue SplatVal) {
3115 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3116 APInt SplatValue, SplatUndef;
3117 unsigned SplatBitSize;
3118 bool HasAnyUndefs;
3119 // Endianness doesn't matter in this context because we are looking for
3120 // an all-zero value.
3121 return Splat &&
3122 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3123 HasAnyUndefs) &&
3124 SplatValue == 0;
3125 };
3126
3127 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3128 // Combine this:
3129 //
3130 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3131 //
3132 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3133 //
3134 // Or this:
3135 //
3136 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3137 //
3138 // into (f32x4.demote_zero_f64x2 $x).
3139 EVT ResVT;
3140 EVT ExpectedConversionType;
3141 auto Conversion = N->getOperand(0);
3142 auto ConversionOp = Conversion.getOpcode();
3143 switch (ConversionOp) {
3146 ResVT = MVT::v4i32;
3147 ExpectedConversionType = MVT::v2i32;
3148 break;
3149 case ISD::FP_ROUND:
3150 ResVT = MVT::v4f32;
3151 ExpectedConversionType = MVT::v2f32;
3152 break;
3153 default:
3154 return SDValue();
3155 }
3156
3157 if (N->getValueType(0) != ResVT)
3158 return SDValue();
3159
3160 if (Conversion.getValueType() != ExpectedConversionType)
3161 return SDValue();
3162
3163 auto Source = Conversion.getOperand(0);
3164 if (Source.getValueType() != MVT::v2f64)
3165 return SDValue();
3166
3167 if (!IsZeroSplat(N->getOperand(1)) ||
3168 N->getOperand(1).getValueType() != ExpectedConversionType)
3169 return SDValue();
3170
3171 unsigned Op = GetWasmConversionOp(ConversionOp);
3172 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3173 }
3174
3175 // Combine this:
3176 //
3177 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3178 //
3179 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3180 //
3181 // Or this:
3182 //
3183 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3184 //
3185 // into (f32x4.demote_zero_f64x2 $x).
3186 EVT ResVT;
3187 auto ConversionOp = N->getOpcode();
3188 switch (ConversionOp) {
3191 ResVT = MVT::v4i32;
3192 break;
3193 case ISD::FP_ROUND:
3194 ResVT = MVT::v4f32;
3195 break;
3196 default:
3197 llvm_unreachable("unexpected op");
3198 }
3199
3200 if (N->getValueType(0) != ResVT)
3201 return SDValue();
3202
3203 auto Concat = N->getOperand(0);
3204 if (Concat.getValueType() != MVT::v4f64)
3205 return SDValue();
3206
3207 auto Source = Concat.getOperand(0);
3208 if (Source.getValueType() != MVT::v2f64)
3209 return SDValue();
3210
3211 if (!IsZeroSplat(Concat.getOperand(1)) ||
3212 Concat.getOperand(1).getValueType() != MVT::v2f64)
3213 return SDValue();
3214
3215 unsigned Op = GetWasmConversionOp(ConversionOp);
3216 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3217}
3218
3219// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3220static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3221 const SDLoc &DL, unsigned VectorWidth) {
3222 EVT VT = Vec.getValueType();
3223 EVT ElVT = VT.getVectorElementType();
3224 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3225 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3226 VT.getVectorNumElements() / Factor);
3227
3228 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3229 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3230 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3231
3232 // This is the index of the first element of the VectorWidth-bit chunk
3233 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3234 IdxVal &= ~(ElemsPerChunk - 1);
3235
3236 // If the input is a buildvector just emit a smaller one.
3237 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3238 return DAG.getBuildVector(ResultVT, DL,
3239 Vec->ops().slice(IdxVal, ElemsPerChunk));
3240
3241 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3242 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3243}
3244
3245// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3246// is the expected destination value type after recursion. In is the initial
3247// input. Note that the input should have enough leading zero bits to prevent
3248// NARROW_U from saturating results.
3250 SelectionDAG &DAG) {
3251 EVT SrcVT = In.getValueType();
3252
3253 // No truncation required, we might get here due to recursive calls.
3254 if (SrcVT == DstVT)
3255 return In;
3256
3257 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3258 unsigned NumElems = SrcVT.getVectorNumElements();
3259 if (!isPowerOf2_32(NumElems))
3260 return SDValue();
3261 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3262 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3263
3264 LLVMContext &Ctx = *DAG.getContext();
3265 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3266
3267 // Narrow to the largest type possible:
3268 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3269 EVT InVT = MVT::i16, OutVT = MVT::i8;
3270 if (SrcVT.getScalarSizeInBits() > 16) {
3271 InVT = MVT::i32;
3272 OutVT = MVT::i16;
3273 }
3274 unsigned SubSizeInBits = SrcSizeInBits / 2;
3275 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3276 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3277
3278 // Split lower/upper subvectors.
3279 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3280 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3281
3282 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3283 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3284 Lo = DAG.getBitcast(InVT, Lo);
3285 Hi = DAG.getBitcast(InVT, Hi);
3286 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3287 return DAG.getBitcast(DstVT, Res);
3288 }
3289
3290 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3291 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3292 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3293 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3294
3295 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3296 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3297 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3298}
3299
3302 auto &DAG = DCI.DAG;
3303
3304 SDValue In = N->getOperand(0);
3305 EVT InVT = In.getValueType();
3306 if (!InVT.isSimple())
3307 return SDValue();
3308
3309 EVT OutVT = N->getValueType(0);
3310 if (!OutVT.isVector())
3311 return SDValue();
3312
3313 EVT OutSVT = OutVT.getVectorElementType();
3314 EVT InSVT = InVT.getVectorElementType();
3315 // Currently only cover truncate to v16i8 or v8i16.
3316 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3317 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3318 return SDValue();
3319
3320 SDLoc DL(N);
3322 OutVT.getScalarSizeInBits());
3323 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3324 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3325}
3326
3329 using namespace llvm::SDPatternMatch;
3330 auto &DAG = DCI.DAG;
3331 SDLoc DL(N);
3332 SDValue Src = N->getOperand(0);
3333 EVT VT = N->getValueType(0);
3334 EVT SrcVT = Src.getValueType();
3335
3336 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3337 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3338 return SDValue();
3339
3340 unsigned NumElts = SrcVT.getVectorNumElements();
3341 EVT Width = MVT::getIntegerVT(128 / NumElts);
3342
3343 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3344 // ==> bitmask
3345 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3346 return DAG.getZExtOrTrunc(
3347 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3348 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3349 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3350 SrcVT.changeVectorElementType(
3351 *DAG.getContext(), Width))}),
3352 DL, VT);
3353 }
3354
3355 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3356 if (NumElts == 32 || NumElts == 64) {
3357 // Strategy: We will setcc them separately in v16i8 -> v16i1
3358 // Bitcast them to i16, extend them to either i32 or i64.
3359 // Add them together, shifting left 1 by 1.
3360 SDValue Concat, SetCCVector;
3361 ISD::CondCode SetCond;
3362
3363 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3364 m_CondCode(SetCond)))))
3365 return SDValue();
3366 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3367 return SDValue();
3368
3369 uint64_t ElementWidth =
3371
3372 SmallVector<SDValue> VectorsToShuffle;
3373 for (size_t I = 0; I < Concat->ops().size(); I++) {
3374 VectorsToShuffle.push_back(DAG.getBitcast(
3375 MVT::i16,
3376 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3377 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3378 DAG, DL, 128),
3379 SetCond)));
3380 }
3381
3382 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3383 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3384
3385 for (SDValue V : VectorsToShuffle) {
3386 ReturningInteger = DAG.getNode(
3387 ISD::SHL, DL, ReturnType,
3388 {ReturningInteger, DAG.getShiftAmountConstant(16, ReturnType, DL)});
3389
3390 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3391 ReturningInteger =
3392 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3393 }
3394
3395 return ReturningInteger;
3396 }
3397
3398 return SDValue();
3399}
3400
3402 // bitmask (setcc <X>, 0, setlt) => bitmask X
3403 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3404 using namespace llvm::SDPatternMatch;
3405
3406 if (N->getConstantOperandVal(0) != Intrinsic::wasm_bitmask)
3407 return SDValue();
3408
3409 SDValue LHS;
3410 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3412 return SDValue();
3413
3414 SDLoc DL(N);
3415 return DAG.getNode(
3416 ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
3417 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), LHS});
3418}
3419
3421 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3422 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3423 // any_true (setcc <X>, 0, ne) => (any_true X)
3424 // all_true (setcc <X>, 0, ne) => (all_true X)
3425 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3426 using namespace llvm::SDPatternMatch;
3427
3428 SDValue LHS;
3429 if (N->getNumOperands() < 2 ||
3430 !sd_match(N->getOperand(1),
3432 return SDValue();
3433 EVT LT = LHS.getValueType();
3434 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3435 return SDValue();
3436
3437 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3438 ISD::CondCode SetType,
3439 Intrinsic::WASMIntrinsics InPost) {
3440 if (N->getConstantOperandVal(0) != InPre)
3441 return SDValue();
3442
3443 SDValue LHS;
3444 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3445 m_SpecificCondCode(SetType))))
3446 return SDValue();
3447
3448 SDLoc DL(N);
3449 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3450 {DAG.getConstant(InPost, DL, MVT::i32), LHS});
3451 if (SetType == ISD::SETEQ)
3452 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3453 DAG.getConstant(1, DL, MVT::i32));
3454 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3455 };
3456
3457 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3458 Intrinsic::wasm_alltrue))
3459 return AnyTrueEQ;
3460 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3461 Intrinsic::wasm_anytrue))
3462 return AllTrueEQ;
3463 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3464 Intrinsic::wasm_anytrue))
3465 return AnyTrueNE;
3466 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3467 Intrinsic::wasm_alltrue))
3468 return AllTrueNE;
3469
3470 return SDValue();
3471}
3472
3473template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3474 Intrinsic::ID Intrin>
3476 SDValue LHS = N->getOperand(0);
3477 SDValue RHS = N->getOperand(1);
3478 SDValue Cond = N->getOperand(2);
3479 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3480 return SDValue();
3481
3482 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3483 return SDValue();
3484
3485 SDLoc DL(N);
3486 SDValue Ret =
3487 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3488 {DAG.getConstant(Intrin, DL, MVT::i32),
3489 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)});
3490 if (RequiresNegate)
3491 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3492 DAG.getConstant(1, DL, MVT::i32));
3493 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3494}
3495
3496/// Try to convert a i128 comparison to a v16i8 comparison before type
3497/// legalization splits it up into chunks
3498static SDValue
3500 const WebAssemblySubtarget *Subtarget) {
3501
3502 SDLoc DL(N);
3503 SDValue X = N->getOperand(0);
3504 SDValue Y = N->getOperand(1);
3505 EVT VT = N->getValueType(0);
3506 EVT OpVT = X.getValueType();
3507
3508 SelectionDAG &DAG = DCI.DAG;
3510 Attribute::NoImplicitFloat))
3511 return SDValue();
3512
3513 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3514 // We're looking for an oversized integer equality comparison with SIMD
3515 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3516 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3517 return SDValue();
3518
3519 // Don't perform this combine if constructing the vector will be expensive.
3520 auto IsVectorBitCastCheap = [](SDValue X) {
3522 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3523 };
3524
3525 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3526 return SDValue();
3527
3528 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3529 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3530 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3531
3532 SDValue Intr =
3533 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3534 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3535 : Intrinsic::wasm_anytrue,
3536 DL, MVT::i32),
3537 Cmp});
3538
3539 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3540 ISD::SETNE);
3541}
3542
3545 const WebAssemblySubtarget *Subtarget) {
3546 if (!DCI.isBeforeLegalize())
3547 return SDValue();
3548
3549 EVT VT = N->getValueType(0);
3550 if (!VT.isScalarInteger())
3551 return SDValue();
3552
3553 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3554 return V;
3555
3556 SDValue LHS = N->getOperand(0);
3557 if (LHS->getOpcode() != ISD::BITCAST)
3558 return SDValue();
3559
3560 EVT FromVT = LHS->getOperand(0).getValueType();
3561 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3562 return SDValue();
3563
3564 unsigned NumElts = FromVT.getVectorNumElements();
3565 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3566 return SDValue();
3567
3568 if (!cast<ConstantSDNode>(N->getOperand(1)))
3569 return SDValue();
3570
3571 auto &DAG = DCI.DAG;
3572 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3573 MVT::getIntegerVT(128 / NumElts));
3574 // setcc (iN (bitcast (vNi1 X))), 0, ne
3575 // ==> any_true (vNi1 X)
3577 N, VecVT, DAG)) {
3578 return Match;
3579 }
3580 // setcc (iN (bitcast (vNi1 X))), 0, eq
3581 // ==> xor (any_true (vNi1 X)), -1
3583 N, VecVT, DAG)) {
3584 return Match;
3585 }
3586 // setcc (iN (bitcast (vNi1 X))), -1, eq
3587 // ==> all_true (vNi1 X)
3589 N, VecVT, DAG)) {
3590 return Match;
3591 }
3592 // setcc (iN (bitcast (vNi1 X))), -1, ne
3593 // ==> xor (all_true (vNi1 X)), -1
3595 N, VecVT, DAG)) {
3596 return Match;
3597 }
3598 return SDValue();
3599}
3600
3602 EVT VT = N->getValueType(0);
3603 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3604 return SDValue();
3605
3606 // Mul with extending inputs.
3607 SDValue LHS = N->getOperand(0);
3608 SDValue RHS = N->getOperand(1);
3609 if (LHS.getOpcode() != RHS.getOpcode())
3610 return SDValue();
3611
3612 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3613 LHS.getOpcode() != ISD::ZERO_EXTEND)
3614 return SDValue();
3615
3616 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3617 return SDValue();
3618
3619 EVT FromVT = LHS->getOperand(0).getValueType();
3620 EVT EltTy = FromVT.getVectorElementType();
3621 if (EltTy != MVT::i8)
3622 return SDValue();
3623
3624 // For an input DAG that looks like this
3625 // %a = input_type
3626 // %b = input_type
3627 // %lhs = extend %a to output_type
3628 // %rhs = extend %b to output_type
3629 // %mul = mul %lhs, %rhs
3630
3631 // input_type | output_type | instructions
3632 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3633 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3634 // | | %low_low = i32x4.ext_low_i16x8_ %low
3635 // | | %low_high = i32x4.ext_high_i16x8_ %low
3636 // | | %high_low = i32x4.ext_low_i16x8_ %high
3637 // | | %high_high = i32x4.ext_high_i16x8_ %high
3638 // | | %res = concat_vector(...)
3639 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3640 // | | %low_low = i32x4.ext_low_i16x8_ %low
3641 // | | %low_high = i32x4.ext_high_i16x8_ %low
3642 // | | %res = concat_vector(%low_low, %low_high)
3643
3644 SDLoc DL(N);
3645 unsigned NumElts = VT.getVectorNumElements();
3646 SDValue ExtendInLHS = LHS->getOperand(0);
3647 SDValue ExtendInRHS = RHS->getOperand(0);
3648 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3649 unsigned ExtendLowOpc =
3650 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3651 unsigned ExtendHighOpc =
3652 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3653
3654 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3655 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3656 };
3657 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3658 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3659 };
3660
3661 if (NumElts == 16) {
3662 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3663 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3664 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3665 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3666 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3667 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3668 SDValue SubVectors[] = {
3669 GetExtendLow(MVT::v4i32, MulLow),
3670 GetExtendHigh(MVT::v4i32, MulLow),
3671 GetExtendLow(MVT::v4i32, MulHigh),
3672 GetExtendHigh(MVT::v4i32, MulHigh),
3673 };
3674 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3675 } else {
3676 assert(NumElts == 8);
3677 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3678 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3679 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3680 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3681 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3682 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3683 }
3684 return SDValue();
3685}
3686
3689 assert(N->getOpcode() == ISD::MUL);
3690 EVT VT = N->getValueType(0);
3691 if (!VT.isVector())
3692 return SDValue();
3693
3694 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3695 return Res;
3696
3697 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3698 // extend them to v8i16.
3699 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3700 return SDValue();
3701
3702 SDLoc DL(N);
3703 SelectionDAG &DAG = DCI.DAG;
3704 SDValue LHS = N->getOperand(0);
3705 SDValue RHS = N->getOperand(1);
3706 EVT MulVT = MVT::v8i16;
3707
3708 if (VT == MVT::v8i8) {
3709 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3710 DAG.getUNDEF(MVT::v8i8));
3711 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3712 DAG.getUNDEF(MVT::v8i8));
3713 SDValue LowLHS =
3714 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3715 SDValue LowRHS =
3716 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3717 SDValue MulLow = DAG.getBitcast(
3718 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3719 // Take the low byte of each lane.
3720 SDValue Shuffle = DAG.getVectorShuffle(
3721 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3722 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3723 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3724 } else {
3725 assert(VT == MVT::v16i8 && "Expected v16i8");
3726 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3727 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3728 SDValue HighLHS =
3729 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3730 SDValue HighRHS =
3731 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3732
3733 SDValue MulLow =
3734 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3735 SDValue MulHigh =
3736 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3737
3738 // Take the low byte of each lane.
3739 return DAG.getVectorShuffle(
3740 VT, DL, MulLow, MulHigh,
3741 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3742 }
3743}
3744
3745SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3746 SelectionDAG &DAG) {
3747 SDLoc DL(In);
3748 LLVMContext &Ctx = *DAG.getContext();
3749 EVT InVT = In.getValueType();
3750 unsigned NumElems = InVT.getVectorNumElements() * 2;
3751 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3752 SDValue Concat =
3753 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3754 if (NumElems < RequiredNumElems) {
3755 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3756 }
3757 return Concat;
3758}
3759
3761 EVT OutVT = N->getValueType(0);
3762 if (!OutVT.isVector())
3763 return SDValue();
3764
3765 EVT OutElTy = OutVT.getVectorElementType();
3766 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3767 return SDValue();
3768
3769 unsigned NumElems = OutVT.getVectorNumElements();
3770 if (!isPowerOf2_32(NumElems))
3771 return SDValue();
3772
3773 EVT FPVT = N->getOperand(0)->getValueType(0);
3774 if (FPVT.getVectorElementType() != MVT::f32)
3775 return SDValue();
3776
3777 SDLoc DL(N);
3778
3779 // First, convert to i32.
3780 LLVMContext &Ctx = *DAG.getContext();
3781 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3782 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3784 OutVT.getScalarSizeInBits());
3785 // Mask out the top MSBs.
3786 SDValue Masked =
3787 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3788
3789 if (OutVT.getSizeInBits() < 128) {
3790 // Create a wide enough vector that we can use narrow.
3791 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3792 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3793 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3794 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3795 return DAG.getBitcast(
3796 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3797 } else {
3798 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3799 }
3800 return SDValue();
3801}
3802
3803// Wide vector shift operations such as v8i32 with sign-extended
3804// operands cause Type Legalizer crashes because the target-specific
3805// extension nodes cannot be directly mapped to the 256-bit size.
3806//
3807// To resolve the crash and optimize performance, we intercept the
3808// illegal v8i32 shift in DAGCombine. We convert the shift amounts
3809// into multipliers and manually split the vector into two v4i32 halves.
3810//
3811// Before: t1: v8i32 = shl (sign_extend v8i16), const_vec
3812// After : t2: v4i32 = mul (ext_low_s v8i16), (ext_low_s narrow_vec)
3813// t3: v4i32 = mul (ext_high_s v8i16), (ext_high_s narrow_vec)
3814// t4: v8i32 = concat_vectors t2, t3
3817 SelectionDAG &DAG = DCI.DAG;
3818 assert(N->getOpcode() == ISD::SHL);
3819 EVT VT = N->getValueType(0);
3820 if (VT != MVT::v8i32)
3821 return SDValue();
3822
3823 SDValue LHS = N->getOperand(0);
3824 SDValue RHS = N->getOperand(1);
3825 unsigned ExtOpc = LHS.getOpcode();
3826 if (ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND)
3827 return SDValue();
3828
3829 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3830 return SDValue();
3831
3832 SDLoc DL(N);
3833 SDValue ExtendIn = LHS.getOperand(0);
3834 EVT FromVT = ExtendIn.getValueType();
3835 if (FromVT != MVT::v8i16)
3836 return SDValue();
3837
3838 unsigned NumElts = VT.getVectorNumElements();
3839 unsigned BitWidth = FromVT.getScalarSizeInBits();
3840 bool IsSigned = (ExtOpc == ISD::SIGN_EXTEND);
3841 unsigned MaxValidShift = IsSigned ? (BitWidth - 1) : BitWidth;
3842 SmallVector<SDValue, 16> MulConsts;
3843 for (unsigned I = 0; I < NumElts; ++I) {
3844 auto *C = dyn_cast<ConstantSDNode>(RHS.getOperand(I));
3845 if (!C)
3846 return SDValue();
3847
3848 const APInt &ShiftAmt = C->getAPIntValue();
3849 if (ShiftAmt.uge(MaxValidShift))
3850 return SDValue();
3851
3852 APInt MulAmt = APInt::getOneBitSet(BitWidth, ShiftAmt.getZExtValue());
3853 MulConsts.push_back(DAG.getConstant(MulAmt, DL, FromVT.getScalarType(),
3854 /*isTarget=*/false, /*isOpaque=*/true));
3855 }
3856
3857 SDValue NarrowConst = DAG.getBuildVector(FromVT, DL, MulConsts);
3858 unsigned ExtLowOpc =
3859 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3860 unsigned ExtHighOpc =
3861 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3862
3863 EVT HalfVT = MVT::v4i32;
3864 SDValue LHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, ExtendIn);
3865 SDValue LHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, ExtendIn);
3866 SDValue RHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, NarrowConst);
3867 SDValue RHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, NarrowConst);
3868 SDValue MulLo = DAG.getNode(ISD::MUL, DL, HalfVT, LHSLo, RHSLo);
3869 SDValue MulHi = DAG.getNode(ISD::MUL, DL, HalfVT, LHSHi, RHSHi);
3870 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MulLo, MulHi);
3871}
3872
3873SDValue
3874WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3875 DAGCombinerInfo &DCI) const {
3876 switch (N->getOpcode()) {
3877 default:
3878 return SDValue();
3879 case ISD::BITCAST:
3880 return performBitcastCombine(N, DCI);
3881 case ISD::SETCC:
3882 return performSETCCCombine(N, DCI, Subtarget);
3884 return performVECTOR_SHUFFLECombine(N, DCI);
3885 case ISD::SIGN_EXTEND:
3886 case ISD::ZERO_EXTEND:
3887 return performVectorExtendCombine(N, DCI);
3888 case ISD::UINT_TO_FP:
3889 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3890 return ExtCombine;
3891 return performVectorNonNegToFPCombine(N, DCI);
3892 case ISD::SINT_TO_FP:
3893 return performVectorExtendToFPCombine(N, DCI);
3896 case ISD::FP_ROUND:
3898 return performVectorTruncZeroCombine(N, DCI);
3899 case ISD::FP_TO_SINT:
3900 case ISD::FP_TO_UINT:
3901 return performConvertFPCombine(N, DCI.DAG);
3902 case ISD::TRUNCATE:
3903 return performTruncateCombine(N, DCI);
3905 if (SDValue V = performBitmaskCombine(N, DCI.DAG))
3906 return V;
3907 return performAnyAllCombine(N, DCI.DAG);
3908 }
3909 case ISD::MUL:
3910 return performMulCombine(N, DCI);
3911 case ISD::SHL:
3912 return performShiftCombine(N, DCI);
3913 }
3914}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool callingConvSupported(CallingConv::ID CallConv)
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue performShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:713
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
auto m_Value()
Match an arbitrary value and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2088
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:460
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.