LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
162 setTruncStoreAction(T, MVT::f16, Expand);
163 }
164
165 // Expand unavailable integer operations.
166 for (auto Op :
170 for (auto T : {MVT::i32, MVT::i64})
172 if (Subtarget->hasSIMD128())
173 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175 }
176
177 if (Subtarget->hasWideArithmetic()) {
183 }
184
185 if (Subtarget->hasNontrappingFPToInt())
187 for (auto T : {MVT::i32, MVT::i64})
189
190 if (Subtarget->hasRelaxedSIMD()) {
193 {MVT::v4f32, MVT::v2f64}, Custom);
194 }
195 // SIMD-specific configuration
196 if (Subtarget->hasSIMD128()) {
197
199
200 // Combine wide-vector muls, with extend inputs, to extmul_half.
202
203 // Combine vector mask reductions into alltrue/anytrue
205
206 // Convert vector to integer bitcasts to bitmask
208
209 // Hoist bitcasts out of shuffles
211
212 // Combine extends of extract_subvectors into widening ops
214
215 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
216 // conversions ops
219
220 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
221 // into conversion ops
225
227
228 // Support saturating add/sub for i8x16 and i16x8
230 for (auto T : {MVT::v16i8, MVT::v8i16})
232
233 // Support integer abs
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
236
237 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
238 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
239 MVT::v2f64})
241
242 if (Subtarget->hasFP16())
244
245 // We have custom shuffle lowering to expose the shuffle mask
246 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
247 MVT::v2f64})
249
250 if (Subtarget->hasFP16())
252
253 // Support splatting
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
259
260 // Custom lowering since wasm shifts must have a scalar shift amount
261 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
264
265 // Custom lower lane accesses to expand out variable indices
267 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
268 MVT::v2f64})
270
271 // There is no i8x16.mul instruction
272 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
273
274 // Expand integer operations supported for scalars but not SIMD
275 for (auto Op :
277 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
279
280 // But we do have integer min and max operations
281 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
282 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
284
285 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
286 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
287 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
288 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
289
290 // Custom lower bit counting operations for other types to scalarize them.
291 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
292 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
294
295 // Expand float operations supported for scalars but not SIMD
298 for (auto T : {MVT::v4f32, MVT::v2f64})
300
301 // Unsigned comparison operations are unavailable for i64x2 vectors.
303 setCondCodeAction(CC, MVT::v2i64, Custom);
304
305 // 64x2 conversions are not in the spec
306 for (auto Op :
308 for (auto T : {MVT::v2i64, MVT::v2f64})
310
311 // But saturating fp_to_int converstions are
313 setOperationAction(Op, MVT::v4i32, Custom);
314 if (Subtarget->hasFP16()) {
315 setOperationAction(Op, MVT::v8i16, Custom);
316 }
317 }
318
319 // Support vector extending
324 }
325
326 if (Subtarget->hasFP16()) {
327 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
328 }
329
330 if (Subtarget->hasRelaxedSIMD()) {
333 }
334
335 // Partial MLA reductions.
337 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
338 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
339 }
340 }
341
342 // As a special case, these operators use the type to mean the type to
343 // sign-extend from.
345 if (!Subtarget->hasSignExt()) {
346 // Sign extends are legal only when extending a vector extract
347 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
348 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
350 }
353
354 // Dynamic stack allocation: use the default expansion.
358
362
363 // Expand these forms; we pattern-match the forms that we can handle in isel.
364 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
365 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
367
368 if (Subtarget->hasReferenceTypes())
369 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
370 for (auto T : {MVT::externref, MVT::funcref})
372
373 // There is no vector conditional select instruction
374 for (auto T :
375 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
377
378 // We have custom switch handling.
380
381 // WebAssembly doesn't have:
382 // - Floating-point extending loads.
383 // - Floating-point truncating stores.
384 // - i1 extending loads.
385 // - truncating SIMD stores and most extending loads
386 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
387 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
388 for (auto T : MVT::integer_valuetypes())
389 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
390 setLoadExtAction(Ext, T, MVT::i1, Promote);
391 if (Subtarget->hasSIMD128()) {
392 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
393 MVT::v2f64}) {
394 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
395 if (MVT(T) != MemT) {
397 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
398 setLoadExtAction(Ext, T, MemT, Expand);
399 }
400 }
401 }
402 // But some vector extending loads are legal
403 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
404 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
405 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
406 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
407 }
408 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
409 }
410
411 // Don't do anything clever with build_pairs
413
414 // Trap lowers to wasm unreachable
415 setOperationAction(ISD::TRAP, MVT::Other, Legal);
417
418 // Exception handling intrinsics
422
424
425 // Always convert switches to br_tables unless there is only one case, which
426 // is equivalent to a simple branch. This reduces code size for wasm, and we
427 // defer possible jump table optimizations to the VM.
429}
430
439
448
450WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
451 const AtomicRMWInst *AI) const {
452 // We have wasm instructions for these
453 switch (AI->getOperation()) {
461 default:
462 break;
463 }
465}
466
467bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
468 // Implementation copied from X86TargetLowering.
469 unsigned Opc = VecOp.getOpcode();
470
471 // Assume target opcodes can't be scalarized.
472 // TODO - do we have any exceptions?
474 return false;
475
476 // If the vector op is not supported, try to convert to scalar.
477 EVT VecVT = VecOp.getValueType();
479 return true;
480
481 // If the vector op is supported, but the scalar op is not, the transform may
482 // not be worthwhile.
483 EVT ScalarVT = VecVT.getScalarType();
484 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
485}
486
487FastISel *WebAssemblyTargetLowering::createFastISel(
488 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
489 const LibcallLoweringInfo *LibcallLowering) const {
490 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
491}
492
493MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
494 EVT VT) const {
495 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
496 if (BitWidth > 1 && BitWidth < 8)
497 BitWidth = 8;
498
499 if (BitWidth > 64) {
500 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
501 // the count to be an i32.
502 BitWidth = 32;
504 "32-bit shift counts ought to be enough for anyone");
505 }
506
509 "Unable to represent scalar shift amount type");
510 return Result;
511}
512
513// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
514// undefined result on invalid/overflow, to the WebAssembly opcode, which
515// traps on invalid/overflow.
518 const TargetInstrInfo &TII,
519 bool IsUnsigned, bool Int64,
520 bool Float64, unsigned LoweredOpcode) {
522
523 Register OutReg = MI.getOperand(0).getReg();
524 Register InReg = MI.getOperand(1).getReg();
525
526 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
527 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
528 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
529 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
530 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
531 unsigned Eqz = WebAssembly::EQZ_I32;
532 unsigned And = WebAssembly::AND_I32;
533 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
534 int64_t Substitute = IsUnsigned ? 0 : Limit;
535 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
536 auto &Context = BB->getParent()->getFunction().getContext();
537 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
538
539 const BasicBlock *LLVMBB = BB->getBasicBlock();
540 MachineFunction *F = BB->getParent();
541 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
542 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
543 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
544
546 F->insert(It, FalseMBB);
547 F->insert(It, TrueMBB);
548 F->insert(It, DoneMBB);
549
550 // Transfer the remainder of BB and its successor edges to DoneMBB.
551 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
553
554 BB->addSuccessor(TrueMBB);
555 BB->addSuccessor(FalseMBB);
556 TrueMBB->addSuccessor(DoneMBB);
557 FalseMBB->addSuccessor(DoneMBB);
558
559 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
560 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
561 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
562 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
563 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
564 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
565 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
566
567 MI.eraseFromParent();
568 // For signed numbers, we can do a single comparison to determine whether
569 // fabs(x) is within range.
570 if (IsUnsigned) {
571 Tmp0 = InReg;
572 } else {
573 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
574 }
575 BuildMI(BB, DL, TII.get(FConst), Tmp1)
576 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
577 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
578
579 // For unsigned numbers, we have to do a separate comparison with zero.
580 if (IsUnsigned) {
581 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
582 Register SecondCmpReg =
583 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
584 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
585 BuildMI(BB, DL, TII.get(FConst), Tmp1)
586 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
587 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
588 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
589 CmpReg = AndReg;
590 }
591
592 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
593
594 // Create the CFG diamond to select between doing the conversion or using
595 // the substitute value.
596 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
597 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
598 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
599 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
600 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
601 .addReg(FalseReg)
602 .addMBB(FalseMBB)
603 .addReg(TrueReg)
604 .addMBB(TrueMBB);
605
606 return DoneMBB;
607}
608
609// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
610// instuction to handle the zero-length case.
613 const TargetInstrInfo &TII, bool Int64) {
615
616 MachineOperand DstMem = MI.getOperand(0);
617 MachineOperand SrcMem = MI.getOperand(1);
618 MachineOperand Dst = MI.getOperand(2);
619 MachineOperand Src = MI.getOperand(3);
620 MachineOperand Len = MI.getOperand(4);
621
622 // If the length is a constant, we don't actually need the check.
623 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
624 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
625 Def->getOpcode() == WebAssembly::CONST_I64) {
626 if (Def->getOperand(1).getImm() == 0) {
627 // A zero-length memcpy is a no-op.
628 MI.eraseFromParent();
629 return BB;
630 }
631 // A non-zero-length memcpy doesn't need a zero check.
632 unsigned MemoryCopy =
633 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
634 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
635 .add(DstMem)
636 .add(SrcMem)
637 .add(Dst)
638 .add(Src)
639 .add(Len);
640 MI.eraseFromParent();
641 return BB;
642 }
643 }
644
645 // We're going to add an extra use to `Len` to test if it's zero; that
646 // use shouldn't be a kill, even if the original use is.
647 MachineOperand NoKillLen = Len;
648 NoKillLen.setIsKill(false);
649
650 // Decide on which `MachineInstr` opcode we're going to use.
651 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
652 unsigned MemoryCopy =
653 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
654
655 // Create two new basic blocks; one for the new `memory.fill` that we can
656 // branch over, and one for the rest of the instructions after the original
657 // `memory.fill`.
658 const BasicBlock *LLVMBB = BB->getBasicBlock();
659 MachineFunction *F = BB->getParent();
660 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
661 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
662
664 F->insert(It, TrueMBB);
665 F->insert(It, DoneMBB);
666
667 // Transfer the remainder of BB and its successor edges to DoneMBB.
668 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
670
671 // Connect the CFG edges.
672 BB->addSuccessor(TrueMBB);
673 BB->addSuccessor(DoneMBB);
674 TrueMBB->addSuccessor(DoneMBB);
675
676 // Create a virtual register for the `Eqz` result.
677 unsigned EqzReg;
678 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
679
680 // Erase the original `memory.copy`.
681 MI.eraseFromParent();
682
683 // Test if `Len` is zero.
684 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
685
686 // Insert a new `memory.copy`.
687 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
688 .add(DstMem)
689 .add(SrcMem)
690 .add(Dst)
691 .add(Src)
692 .add(Len);
693
694 // Create the CFG triangle.
695 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
696 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
697
698 return DoneMBB;
699}
700
701// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
702// instuction to handle the zero-length case.
705 const TargetInstrInfo &TII, bool Int64) {
707
708 MachineOperand Mem = MI.getOperand(0);
709 MachineOperand Dst = MI.getOperand(1);
710 MachineOperand Val = MI.getOperand(2);
711 MachineOperand Len = MI.getOperand(3);
712
713 // If the length is a constant, we don't actually need the check.
714 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
715 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
716 Def->getOpcode() == WebAssembly::CONST_I64) {
717 if (Def->getOperand(1).getImm() == 0) {
718 // A zero-length memset is a no-op.
719 MI.eraseFromParent();
720 return BB;
721 }
722 // A non-zero-length memset doesn't need a zero check.
723 unsigned MemoryFill =
724 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
725 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
726 .add(Mem)
727 .add(Dst)
728 .add(Val)
729 .add(Len);
730 MI.eraseFromParent();
731 return BB;
732 }
733 }
734
735 // We're going to add an extra use to `Len` to test if it's zero; that
736 // use shouldn't be a kill, even if the original use is.
737 MachineOperand NoKillLen = Len;
738 NoKillLen.setIsKill(false);
739
740 // Decide on which `MachineInstr` opcode we're going to use.
741 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
742 unsigned MemoryFill =
743 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
744
745 // Create two new basic blocks; one for the new `memory.fill` that we can
746 // branch over, and one for the rest of the instructions after the original
747 // `memory.fill`.
748 const BasicBlock *LLVMBB = BB->getBasicBlock();
749 MachineFunction *F = BB->getParent();
750 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
751 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
752
754 F->insert(It, TrueMBB);
755 F->insert(It, DoneMBB);
756
757 // Transfer the remainder of BB and its successor edges to DoneMBB.
758 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
760
761 // Connect the CFG edges.
762 BB->addSuccessor(TrueMBB);
763 BB->addSuccessor(DoneMBB);
764 TrueMBB->addSuccessor(DoneMBB);
765
766 // Create a virtual register for the `Eqz` result.
767 unsigned EqzReg;
768 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
769
770 // Erase the original `memory.fill`.
771 MI.eraseFromParent();
772
773 // Test if `Len` is zero.
774 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
775
776 // Insert a new `memory.copy`.
777 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
778
779 // Create the CFG triangle.
780 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
781 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
782
783 return DoneMBB;
784}
785
786static MachineBasicBlock *
788 const WebAssemblySubtarget *Subtarget,
789 const TargetInstrInfo &TII) {
790 MachineInstr &CallParams = *CallResults.getPrevNode();
791 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
792 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
793 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
794
795 bool IsIndirect =
796 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
797 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
798
799 bool IsFuncrefCall = false;
800 if (IsIndirect && CallParams.getOperand(0).isReg()) {
801 Register Reg = CallParams.getOperand(0).getReg();
802 const MachineFunction *MF = BB->getParent();
803 const MachineRegisterInfo &MRI = MF->getRegInfo();
804 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
805 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
806 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
807 }
808
809 unsigned CallOp;
810 if (IsIndirect && IsRetCall) {
811 CallOp = WebAssembly::RET_CALL_INDIRECT;
812 } else if (IsIndirect) {
813 CallOp = WebAssembly::CALL_INDIRECT;
814 } else if (IsRetCall) {
815 CallOp = WebAssembly::RET_CALL;
816 } else {
817 CallOp = WebAssembly::CALL;
818 }
819
820 MachineFunction &MF = *BB->getParent();
821 const MCInstrDesc &MCID = TII.get(CallOp);
822 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
823
824 // Move the function pointer to the end of the arguments for indirect calls
825 if (IsIndirect) {
826 auto FnPtr = CallParams.getOperand(0);
827 CallParams.removeOperand(0);
828
829 // For funcrefs, call_indirect is done through __funcref_call_table and the
830 // funcref is always installed in slot 0 of the table, therefore instead of
831 // having the function pointer added at the end of the params list, a zero
832 // (the index in
833 // __funcref_call_table is added).
834 if (IsFuncrefCall) {
835 Register RegZero =
836 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
837 MachineInstrBuilder MIBC0 =
838 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
839
840 BB->insert(CallResults.getIterator(), MIBC0);
841 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
842 } else
843 CallParams.addOperand(FnPtr);
844 }
845
846 for (auto Def : CallResults.defs())
847 MIB.add(Def);
848
849 if (IsIndirect) {
850 // Placeholder for the type index.
851 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
852 MIB.addImm(0);
853 // The table into which this call_indirect indexes.
854 MCSymbolWasm *Table = IsFuncrefCall
856 MF.getContext(), Subtarget)
858 MF.getContext(), Subtarget);
859 if (Subtarget->hasCallIndirectOverlong()) {
860 MIB.addSym(Table);
861 } else {
862 // For the MVP there is at most one table whose number is 0, but we can't
863 // write a table symbol or issue relocations. Instead we just ensure the
864 // table is live and write a zero.
865 Table->setNoStrip();
866 MIB.addImm(0);
867 }
868 }
869
870 for (auto Use : CallParams.uses())
871 MIB.add(Use);
872
873 BB->insert(CallResults.getIterator(), MIB);
874 CallParams.eraseFromParent();
875 CallResults.eraseFromParent();
876
877 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
878 // table slot with ref.null upon call_indirect return.
879 //
880 // This generates the following code, which comes right after a call_indirect
881 // of a funcref:
882 //
883 // i32.const 0
884 // ref.null func
885 // table.set __funcref_call_table
886 if (IsIndirect && IsFuncrefCall) {
888 MF.getContext(), Subtarget);
889 Register RegZero =
890 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
891 MachineInstr *Const0 =
892 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
893 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
894
895 Register RegFuncref =
896 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
897 MachineInstr *RefNull =
898 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
899 BB->insertAfter(Const0->getIterator(), RefNull);
900
901 MachineInstr *TableSet =
902 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
903 .addSym(Table)
904 .addReg(RegZero)
905 .addReg(RegFuncref);
906 BB->insertAfter(RefNull->getIterator(), TableSet);
907 }
908
909 return BB;
910}
911
912MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
913 MachineInstr &MI, MachineBasicBlock *BB) const {
914 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
915 DebugLoc DL = MI.getDebugLoc();
916
917 switch (MI.getOpcode()) {
918 default:
919 llvm_unreachable("Unexpected instr type to insert");
920 case WebAssembly::FP_TO_SINT_I32_F32:
921 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
922 WebAssembly::I32_TRUNC_S_F32);
923 case WebAssembly::FP_TO_UINT_I32_F32:
924 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
925 WebAssembly::I32_TRUNC_U_F32);
926 case WebAssembly::FP_TO_SINT_I64_F32:
927 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
928 WebAssembly::I64_TRUNC_S_F32);
929 case WebAssembly::FP_TO_UINT_I64_F32:
930 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
931 WebAssembly::I64_TRUNC_U_F32);
932 case WebAssembly::FP_TO_SINT_I32_F64:
933 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
934 WebAssembly::I32_TRUNC_S_F64);
935 case WebAssembly::FP_TO_UINT_I32_F64:
936 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
937 WebAssembly::I32_TRUNC_U_F64);
938 case WebAssembly::FP_TO_SINT_I64_F64:
939 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
940 WebAssembly::I64_TRUNC_S_F64);
941 case WebAssembly::FP_TO_UINT_I64_F64:
942 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
943 WebAssembly::I64_TRUNC_U_F64);
944 case WebAssembly::MEMCPY_A32:
945 return LowerMemcpy(MI, DL, BB, TII, false);
946 case WebAssembly::MEMCPY_A64:
947 return LowerMemcpy(MI, DL, BB, TII, true);
948 case WebAssembly::MEMSET_A32:
949 return LowerMemset(MI, DL, BB, TII, false);
950 case WebAssembly::MEMSET_A64:
951 return LowerMemset(MI, DL, BB, TII, true);
952 case WebAssembly::CALL_RESULTS:
953 case WebAssembly::RET_CALL_RESULTS:
954 return LowerCallResults(MI, DL, BB, Subtarget, TII);
955 }
956}
957
958std::pair<unsigned, const TargetRegisterClass *>
959WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
960 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
961 // First, see if this is a constraint that directly corresponds to a
962 // WebAssembly register class.
963 if (Constraint.size() == 1) {
964 switch (Constraint[0]) {
965 case 'r':
966 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
967 if (Subtarget->hasSIMD128() && VT.isVector()) {
968 if (VT.getSizeInBits() == 128)
969 return std::make_pair(0U, &WebAssembly::V128RegClass);
970 }
971 if (VT.isInteger() && !VT.isVector()) {
972 if (VT.getSizeInBits() <= 32)
973 return std::make_pair(0U, &WebAssembly::I32RegClass);
974 if (VT.getSizeInBits() <= 64)
975 return std::make_pair(0U, &WebAssembly::I64RegClass);
976 }
977 if (VT.isFloatingPoint() && !VT.isVector()) {
978 switch (VT.getSizeInBits()) {
979 case 32:
980 return std::make_pair(0U, &WebAssembly::F32RegClass);
981 case 64:
982 return std::make_pair(0U, &WebAssembly::F64RegClass);
983 default:
984 break;
985 }
986 }
987 break;
988 default:
989 break;
990 }
991 }
992
994}
995
996bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
997 // Assume ctz is a relatively cheap operation.
998 return true;
999}
1000
1001bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1002 // Assume clz is a relatively cheap operation.
1003 return true;
1004}
1005
1006bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1007 const AddrMode &AM,
1008 Type *Ty, unsigned AS,
1009 Instruction *I) const {
1010 // WebAssembly offsets are added as unsigned without wrapping. The
1011 // isLegalAddressingMode gives us no way to determine if wrapping could be
1012 // happening, so we approximate this by accepting only non-negative offsets.
1013 if (AM.BaseOffs < 0)
1014 return false;
1015
1016 // WebAssembly has no scale register operands.
1017 if (AM.Scale != 0)
1018 return false;
1019
1020 // Everything else is legal.
1021 return true;
1022}
1023
1024bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1025 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1026 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1027 // WebAssembly supports unaligned accesses, though it should be declared
1028 // with the p2align attribute on loads and stores which do so, and there
1029 // may be a performance impact. We tell LLVM they're "fast" because
1030 // for the kinds of things that LLVM uses this for (merging adjacent stores
1031 // of constants, etc.), WebAssembly implementations will either want the
1032 // unaligned access or they'll split anyway.
1033 if (Fast)
1034 *Fast = 1;
1035 return true;
1036}
1037
1038bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1039 AttributeList Attr) const {
1040 // The current thinking is that wasm engines will perform this optimization,
1041 // so we can save on code size.
1042 return true;
1043}
1044
1045bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1046 EVT ExtT = ExtVal.getValueType();
1047 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1048 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1049 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1050 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1051}
1052
1053bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1054 const GlobalAddressSDNode *GA) const {
1055 // Wasm doesn't support function addresses with offsets
1056 const GlobalValue *GV = GA->getGlobal();
1058}
1059
1060EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1061 LLVMContext &C,
1062 EVT VT) const {
1063 if (VT.isVector())
1065
1066 // So far, all branch instructions in Wasm take an I32 condition.
1067 // The default TargetLowering::getSetCCResultType returns the pointer size,
1068 // which would be useful to reduce instruction counts when testing
1069 // against 64-bit pointers/values if at some point Wasm supports that.
1070 return EVT::getIntegerVT(C, 32);
1071}
1072
1073void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1075 MachineFunction &MF, unsigned Intrinsic) const {
1077 switch (Intrinsic) {
1078 case Intrinsic::wasm_memory_atomic_notify:
1080 Info.memVT = MVT::i32;
1081 Info.ptrVal = I.getArgOperand(0);
1082 Info.offset = 0;
1083 Info.align = Align(4);
1084 // atomic.notify instruction does not really load the memory specified with
1085 // this argument, but MachineMemOperand should either be load or store, so
1086 // we set this to a load.
1087 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1088 // instructions are treated as volatiles in the backend, so we should be
1089 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1091 Infos.push_back(Info);
1092 return;
1093 case Intrinsic::wasm_memory_atomic_wait32:
1095 Info.memVT = MVT::i32;
1096 Info.ptrVal = I.getArgOperand(0);
1097 Info.offset = 0;
1098 Info.align = Align(4);
1100 Infos.push_back(Info);
1101 return;
1102 case Intrinsic::wasm_memory_atomic_wait64:
1104 Info.memVT = MVT::i64;
1105 Info.ptrVal = I.getArgOperand(0);
1106 Info.offset = 0;
1107 Info.align = Align(8);
1109 Infos.push_back(Info);
1110 return;
1111 case Intrinsic::wasm_loadf16_f32:
1113 Info.memVT = MVT::f16;
1114 Info.ptrVal = I.getArgOperand(0);
1115 Info.offset = 0;
1116 Info.align = Align(2);
1118 Infos.push_back(Info);
1119 return;
1120 case Intrinsic::wasm_storef16_f32:
1122 Info.memVT = MVT::f16;
1123 Info.ptrVal = I.getArgOperand(1);
1124 Info.offset = 0;
1125 Info.align = Align(2);
1127 Infos.push_back(Info);
1128 return;
1129 default:
1130 return;
1131 }
1132}
1133
1134void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1135 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1136 const SelectionDAG &DAG, unsigned Depth) const {
1137 switch (Op.getOpcode()) {
1138 default:
1139 break;
1141 unsigned IntNo = Op.getConstantOperandVal(0);
1142 switch (IntNo) {
1143 default:
1144 break;
1145 case Intrinsic::wasm_bitmask: {
1146 unsigned BitWidth = Known.getBitWidth();
1147 EVT VT = Op.getOperand(1).getSimpleValueType();
1148 unsigned PossibleBits = VT.getVectorNumElements();
1149 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1150 Known.Zero |= ZeroMask;
1151 break;
1152 }
1153 }
1154 break;
1155 }
1156 case WebAssemblyISD::EXTEND_LOW_U:
1157 case WebAssemblyISD::EXTEND_HIGH_U: {
1158 // We know the high half, of each destination vector element, will be zero.
1159 SDValue SrcOp = Op.getOperand(0);
1160 EVT VT = SrcOp.getSimpleValueType();
1161 unsigned BitWidth = Known.getBitWidth();
1162 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1163 assert(BitWidth >= 8 && "Unexpected width!");
1165 Known.Zero |= Mask;
1166 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1167 assert(BitWidth >= 16 && "Unexpected width!");
1169 Known.Zero |= Mask;
1170 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1171 assert(BitWidth >= 32 && "Unexpected width!");
1173 Known.Zero |= Mask;
1174 }
1175 break;
1176 }
1177 // For 128-bit addition if the upper bits are all zero then it's known that
1178 // the upper bits of the result will have all bits guaranteed zero except the
1179 // first.
1180 case WebAssemblyISD::I64_ADD128:
1181 if (Op.getResNo() == 1) {
1182 SDValue LHS_HI = Op.getOperand(1);
1183 SDValue RHS_HI = Op.getOperand(3);
1184 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1185 Known.Zero.setBitsFrom(1);
1186 }
1187 break;
1188 }
1189}
1190
1192WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1193 if (VT.isFixedLengthVector()) {
1194 MVT EltVT = VT.getVectorElementType();
1195 // We have legal vector types with these lane types, so widening the
1196 // vector would let us use some of the lanes directly without having to
1197 // extend or truncate values.
1198 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1199 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1200 return TypeWidenVector;
1201 }
1202
1204}
1205
1206bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1207 const MachineFunction &MF, EVT VT) const {
1208 if (!Subtarget->hasFP16() || !VT.isVector())
1209 return false;
1210
1211 EVT ScalarVT = VT.getScalarType();
1212 if (!ScalarVT.isSimple())
1213 return false;
1214
1215 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1216}
1217
1218bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1219 SDValue Op, const TargetLoweringOpt &TLO) const {
1220 // ISel process runs DAGCombiner after legalization; this step is called
1221 // SelectionDAG optimization phase. This post-legalization combining process
1222 // runs DAGCombiner on each node, and if there was a change to be made,
1223 // re-runs legalization again on it and its user nodes to make sure
1224 // everythiing is in a legalized state.
1225 //
1226 // The legalization calls lowering routines, and we do our custom lowering for
1227 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1228 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1229 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1230 // turns unused vector elements into undefs. But this routine does not work
1231 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1232 // combination can result in a infinite loop, in which undefs are converted to
1233 // zeros in legalization and back to undefs in combining.
1234 //
1235 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1236 // running for build_vectors.
1237 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1238 return false;
1239 return true;
1240}
1241
1242//===----------------------------------------------------------------------===//
1243// WebAssembly Lowering private implementation.
1244//===----------------------------------------------------------------------===//
1245
1246//===----------------------------------------------------------------------===//
1247// Lowering Code
1248//===----------------------------------------------------------------------===//
1249
1250static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1252 DAG.getContext()->diagnose(
1253 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1254}
1255
1256// Test whether the given calling convention is supported.
1258 // We currently support the language-independent target-independent
1259 // conventions. We don't yet have a way to annotate calls with properties like
1260 // "cold", and we don't have any call-clobbered registers, so these are mostly
1261 // all handled the same.
1262 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1263 CallConv == CallingConv::Cold ||
1264 CallConv == CallingConv::PreserveMost ||
1265 CallConv == CallingConv::PreserveAll ||
1266 CallConv == CallingConv::CXX_FAST_TLS ||
1268 CallConv == CallingConv::Swift;
1269}
1270
1271SDValue
1272WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1273 SmallVectorImpl<SDValue> &InVals) const {
1274 SelectionDAG &DAG = CLI.DAG;
1275 SDLoc DL = CLI.DL;
1276 SDValue Chain = CLI.Chain;
1277 SDValue Callee = CLI.Callee;
1278 MachineFunction &MF = DAG.getMachineFunction();
1279 auto Layout = MF.getDataLayout();
1280
1281 CallingConv::ID CallConv = CLI.CallConv;
1282 if (!callingConvSupported(CallConv))
1283 fail(DL, DAG,
1284 "WebAssembly doesn't support language-specific or target-specific "
1285 "calling conventions yet");
1286 if (CLI.IsPatchPoint)
1287 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1288
1289 if (CLI.IsTailCall) {
1290 auto NoTail = [&](const char *Msg) {
1291 if (CLI.CB && CLI.CB->isMustTailCall())
1292 fail(DL, DAG, Msg);
1293 CLI.IsTailCall = false;
1294 };
1295
1296 if (!Subtarget->hasTailCall())
1297 NoTail("WebAssembly 'tail-call' feature not enabled");
1298
1299 // Varargs calls cannot be tail calls because the buffer is on the stack
1300 if (CLI.IsVarArg)
1301 NoTail("WebAssembly does not support varargs tail calls");
1302
1303 // Do not tail call unless caller and callee return types match
1304 const Function &F = MF.getFunction();
1305 const TargetMachine &TM = getTargetMachine();
1306 Type *RetTy = F.getReturnType();
1307 SmallVector<MVT, 4> CallerRetTys;
1308 SmallVector<MVT, 4> CalleeRetTys;
1309 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1310 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1311 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1312 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1313 CalleeRetTys.begin());
1314 if (!TypesMatch)
1315 NoTail("WebAssembly tail call requires caller and callee return types to "
1316 "match");
1317
1318 // If pointers to local stack values are passed, we cannot tail call
1319 if (CLI.CB) {
1320 for (auto &Arg : CLI.CB->args()) {
1321 Value *Val = Arg.get();
1322 // Trace the value back through pointer operations
1323 while (true) {
1324 Value *Src = Val->stripPointerCastsAndAliases();
1325 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1326 Src = GEP->getPointerOperand();
1327 if (Val == Src)
1328 break;
1329 Val = Src;
1330 }
1331 if (isa<AllocaInst>(Val)) {
1332 NoTail(
1333 "WebAssembly does not support tail calling with stack arguments");
1334 break;
1335 }
1336 }
1337 }
1338 }
1339
1340 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1341 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1342 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1343
1344 // The generic code may have added an sret argument. If we're lowering an
1345 // invoke function, the ABI requires that the function pointer be the first
1346 // argument, so we may have to swap the arguments.
1347 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1348 Outs[0].Flags.isSRet()) {
1349 std::swap(Outs[0], Outs[1]);
1350 std::swap(OutVals[0], OutVals[1]);
1351 }
1352
1353 bool HasSwiftSelfArg = false;
1354 bool HasSwiftErrorArg = false;
1355 unsigned NumFixedArgs = 0;
1356 for (unsigned I = 0; I < Outs.size(); ++I) {
1357 const ISD::OutputArg &Out = Outs[I];
1358 SDValue &OutVal = OutVals[I];
1359 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1360 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1361 if (Out.Flags.isNest())
1362 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1363 if (Out.Flags.isInAlloca())
1364 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1365 if (Out.Flags.isInConsecutiveRegs())
1366 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1368 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1369 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1370 auto &MFI = MF.getFrameInfo();
1371 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1373 /*isSS=*/false);
1374 SDValue SizeNode =
1375 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1376 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1377 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1379 /*isVolatile*/ false, /*AlwaysInline=*/false,
1380 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1381 MachinePointerInfo());
1382 OutVal = FINode;
1383 }
1384 // Count the number of fixed args *after* legalization.
1385 NumFixedArgs += !Out.Flags.isVarArg();
1386 }
1387
1388 bool IsVarArg = CLI.IsVarArg;
1389 auto PtrVT = getPointerTy(Layout);
1390
1391 // For swiftcc, emit additional swiftself and swifterror arguments
1392 // if there aren't. These additional arguments are also added for callee
1393 // signature They are necessary to match callee and caller signature for
1394 // indirect call.
1395 if (CallConv == CallingConv::Swift) {
1396 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1397 if (!HasSwiftSelfArg) {
1398 NumFixedArgs++;
1399 ISD::ArgFlagsTy Flags;
1400 Flags.setSwiftSelf();
1401 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1402 CLI.Outs.push_back(Arg);
1403 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1404 CLI.OutVals.push_back(ArgVal);
1405 }
1406 if (!HasSwiftErrorArg) {
1407 NumFixedArgs++;
1408 ISD::ArgFlagsTy Flags;
1409 Flags.setSwiftError();
1410 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1411 CLI.Outs.push_back(Arg);
1412 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1413 CLI.OutVals.push_back(ArgVal);
1414 }
1415 }
1416
1417 // Analyze operands of the call, assigning locations to each operand.
1419 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1420
1421 if (IsVarArg) {
1422 // Outgoing non-fixed arguments are placed in a buffer. First
1423 // compute their offsets and the total amount of buffer space needed.
1424 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1425 const ISD::OutputArg &Out = Outs[I];
1426 SDValue &Arg = OutVals[I];
1427 EVT VT = Arg.getValueType();
1428 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1429 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1430 Align Alignment =
1431 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1432 unsigned Offset =
1433 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1434 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1435 Offset, VT.getSimpleVT(),
1437 }
1438 }
1439
1440 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1441
1442 SDValue FINode;
1443 if (IsVarArg && NumBytes) {
1444 // For non-fixed arguments, next emit stores to store the argument values
1445 // to the stack buffer at the offsets computed above.
1446 MaybeAlign StackAlign = Layout.getStackAlignment();
1447 assert(StackAlign && "data layout string is missing stack alignment");
1448 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1449 /*isSS=*/false);
1450 unsigned ValNo = 0;
1452 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1453 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1454 "ArgLocs should remain in order and only hold varargs args");
1455 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1456 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1457 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1458 DAG.getConstant(Offset, DL, PtrVT));
1459 Chains.push_back(
1460 DAG.getStore(Chain, DL, Arg, Add,
1462 }
1463 if (!Chains.empty())
1464 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1465 } else if (IsVarArg) {
1466 FINode = DAG.getIntPtrConstant(0, DL);
1467 }
1468
1469 if (Callee->getOpcode() == ISD::GlobalAddress) {
1470 // If the callee is a GlobalAddress node (quite common, every direct call
1471 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1472 // doesn't at MO_GOT which is not needed for direct calls.
1473 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1476 GA->getOffset());
1477 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1478 getPointerTy(DAG.getDataLayout()), Callee);
1479 }
1480
1481 // Compute the operands for the CALLn node.
1483 Ops.push_back(Chain);
1484 Ops.push_back(Callee);
1485
1486 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1487 // isn't reliable.
1488 Ops.append(OutVals.begin(),
1489 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1490 // Add a pointer to the vararg buffer.
1491 if (IsVarArg)
1492 Ops.push_back(FINode);
1493
1494 SmallVector<EVT, 8> InTys;
1495 for (const auto &In : Ins) {
1496 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1497 assert(!In.Flags.isNest() && "nest is not valid for return values");
1498 if (In.Flags.isInAlloca())
1499 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1500 if (In.Flags.isInConsecutiveRegs())
1501 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1502 if (In.Flags.isInConsecutiveRegsLast())
1503 fail(DL, DAG,
1504 "WebAssembly hasn't implemented cons regs last return values");
1505 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1506 // registers.
1507 InTys.push_back(In.VT);
1508 }
1509
1510 // Lastly, if this is a call to a funcref we need to add an instruction
1511 // table.set to the chain and transform the call.
1513 CLI.CB->getCalledOperand()->getType())) {
1514 // In the absence of function references proposal where a funcref call is
1515 // lowered to call_ref, using reference types we generate a table.set to set
1516 // the funcref to a special table used solely for this purpose, followed by
1517 // a call_indirect. Here we just generate the table set, and return the
1518 // SDValue of the table.set so that LowerCall can finalize the lowering by
1519 // generating the call_indirect.
1520 SDValue Chain = Ops[0];
1521
1523 MF.getContext(), Subtarget);
1524 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1525 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1526 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1527 SDValue TableSet = DAG.getMemIntrinsicNode(
1528 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1529 MVT::funcref,
1530 // Machine Mem Operand args
1531 MachinePointerInfo(
1533 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1535
1536 Ops[0] = TableSet; // The new chain is the TableSet itself
1537 }
1538
1539 if (CLI.IsTailCall) {
1540 // ret_calls do not return values to the current frame
1541 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1542 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1543 }
1544
1545 InTys.push_back(MVT::Other);
1546 SDVTList InTyList = DAG.getVTList(InTys);
1547 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1548
1549 for (size_t I = 0; I < Ins.size(); ++I)
1550 InVals.push_back(Res.getValue(I));
1551
1552 // Return the chain
1553 return Res.getValue(Ins.size());
1554}
1555
1556bool WebAssemblyTargetLowering::CanLowerReturn(
1557 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1558 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1559 const Type *RetTy) const {
1560 // WebAssembly can only handle returning tuples with multivalue enabled
1561 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1562}
1563
1564SDValue WebAssemblyTargetLowering::LowerReturn(
1565 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1567 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1568 SelectionDAG &DAG) const {
1569 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1570 "MVP WebAssembly can only return up to one value");
1571 if (!callingConvSupported(CallConv))
1572 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1573
1574 SmallVector<SDValue, 4> RetOps(1, Chain);
1575 RetOps.append(OutVals.begin(), OutVals.end());
1576 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1577
1578 // Record the number and types of the return values.
1579 for (const ISD::OutputArg &Out : Outs) {
1580 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1581 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1582 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1583 if (Out.Flags.isInAlloca())
1584 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1585 if (Out.Flags.isInConsecutiveRegs())
1586 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1588 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1589 }
1590
1591 return Chain;
1592}
1593
1594SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1595 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1596 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1597 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1598 if (!callingConvSupported(CallConv))
1599 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1600
1601 MachineFunction &MF = DAG.getMachineFunction();
1602 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1603
1604 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1605 // of the incoming values before they're represented by virtual registers.
1606 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1607
1608 bool HasSwiftErrorArg = false;
1609 bool HasSwiftSelfArg = false;
1610 for (const ISD::InputArg &In : Ins) {
1611 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1612 HasSwiftErrorArg |= In.Flags.isSwiftError();
1613 if (In.Flags.isInAlloca())
1614 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1615 if (In.Flags.isNest())
1616 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1617 if (In.Flags.isInConsecutiveRegs())
1618 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1619 if (In.Flags.isInConsecutiveRegsLast())
1620 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1621 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1622 // registers.
1623 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1624 DAG.getTargetConstant(InVals.size(),
1625 DL, MVT::i32))
1626 : DAG.getUNDEF(In.VT));
1627
1628 // Record the number and types of arguments.
1629 MFI->addParam(In.VT);
1630 }
1631
1632 // For swiftcc, emit additional swiftself and swifterror arguments
1633 // if there aren't. These additional arguments are also added for callee
1634 // signature They are necessary to match callee and caller signature for
1635 // indirect call.
1636 auto PtrVT = getPointerTy(MF.getDataLayout());
1637 if (CallConv == CallingConv::Swift) {
1638 if (!HasSwiftSelfArg) {
1639 MFI->addParam(PtrVT);
1640 }
1641 if (!HasSwiftErrorArg) {
1642 MFI->addParam(PtrVT);
1643 }
1644 }
1645 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1646 // the buffer is passed as an argument.
1647 if (IsVarArg) {
1648 MVT PtrVT = getPointerTy(MF.getDataLayout());
1649 Register VarargVreg =
1651 MFI->setVarargBufferVreg(VarargVreg);
1652 Chain = DAG.getCopyToReg(
1653 Chain, DL, VarargVreg,
1654 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1655 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1656 MFI->addParam(PtrVT);
1657 }
1658
1659 // Record the number and types of arguments and results.
1660 SmallVector<MVT, 4> Params;
1663 MF.getFunction(), DAG.getTarget(), Params, Results);
1664 for (MVT VT : Results)
1665 MFI->addResult(VT);
1666 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1667 // the param logic here with ComputeSignatureVTs
1668 assert(MFI->getParams().size() == Params.size() &&
1669 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1670 Params.begin()));
1671
1672 return Chain;
1673}
1674
1675void WebAssemblyTargetLowering::ReplaceNodeResults(
1677 switch (N->getOpcode()) {
1679 // Do not add any results, signifying that N should not be custom lowered
1680 // after all. This happens because simd128 turns on custom lowering for
1681 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1682 // illegal type.
1683 break;
1687 // Do not add any results, signifying that N should not be custom lowered.
1688 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1689 break;
1690 case ISD::ADD:
1691 case ISD::SUB:
1692 Results.push_back(Replace128Op(N, DAG));
1693 break;
1694 default:
1696 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1697 }
1698}
1699
1700//===----------------------------------------------------------------------===//
1701// Custom lowering hooks.
1702//===----------------------------------------------------------------------===//
1703
1704SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1705 SelectionDAG &DAG) const {
1706 SDLoc DL(Op);
1707 switch (Op.getOpcode()) {
1708 default:
1709 llvm_unreachable("unimplemented operation lowering");
1710 return SDValue();
1711 case ISD::FrameIndex:
1712 return LowerFrameIndex(Op, DAG);
1713 case ISD::GlobalAddress:
1714 return LowerGlobalAddress(Op, DAG);
1716 return LowerGlobalTLSAddress(Op, DAG);
1718 return LowerExternalSymbol(Op, DAG);
1719 case ISD::JumpTable:
1720 return LowerJumpTable(Op, DAG);
1721 case ISD::BR_JT:
1722 return LowerBR_JT(Op, DAG);
1723 case ISD::VASTART:
1724 return LowerVASTART(Op, DAG);
1725 case ISD::BlockAddress:
1726 case ISD::BRIND:
1727 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1728 return SDValue();
1729 case ISD::RETURNADDR:
1730 return LowerRETURNADDR(Op, DAG);
1731 case ISD::FRAMEADDR:
1732 return LowerFRAMEADDR(Op, DAG);
1733 case ISD::CopyToReg:
1734 return LowerCopyToReg(Op, DAG);
1737 return LowerAccessVectorElement(Op, DAG);
1741 return LowerIntrinsic(Op, DAG);
1743 return LowerSIGN_EXTEND_INREG(Op, DAG);
1747 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1748 case ISD::BUILD_VECTOR:
1749 return LowerBUILD_VECTOR(Op, DAG);
1751 return LowerVECTOR_SHUFFLE(Op, DAG);
1752 case ISD::SETCC:
1753 return LowerSETCC(Op, DAG);
1754 case ISD::SHL:
1755 case ISD::SRA:
1756 case ISD::SRL:
1757 return LowerShift(Op, DAG);
1760 return LowerFP_TO_INT_SAT(Op, DAG);
1761 case ISD::FMINNUM:
1762 case ISD::FMINIMUMNUM:
1763 return LowerFMIN(Op, DAG);
1764 case ISD::FMAXNUM:
1765 case ISD::FMAXIMUMNUM:
1766 return LowerFMAX(Op, DAG);
1767 case ISD::LOAD:
1768 return LowerLoad(Op, DAG);
1769 case ISD::STORE:
1770 return LowerStore(Op, DAG);
1771 case ISD::CTPOP:
1772 case ISD::CTLZ:
1773 case ISD::CTTZ:
1774 return DAG.UnrollVectorOp(Op.getNode());
1775 case ISD::CLEAR_CACHE:
1776 report_fatal_error("llvm.clear_cache is not supported on wasm");
1777 case ISD::SMUL_LOHI:
1778 case ISD::UMUL_LOHI:
1779 return LowerMUL_LOHI(Op, DAG);
1780 case ISD::UADDO:
1781 return LowerUADDO(Op, DAG);
1782 }
1783}
1784
1788
1789 return false;
1790}
1791
1792static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1793 SelectionDAG &DAG) {
1795 if (!FI)
1796 return std::nullopt;
1797
1798 auto &MF = DAG.getMachineFunction();
1800}
1801
1802SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1803 SelectionDAG &DAG) const {
1804 SDLoc DL(Op);
1805 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1806 const SDValue &Value = SN->getValue();
1807 const SDValue &Base = SN->getBasePtr();
1808 const SDValue &Offset = SN->getOffset();
1809
1811 if (!Offset->isUndef())
1812 report_fatal_error("unexpected offset when storing to webassembly global",
1813 false);
1814
1815 SDVTList Tys = DAG.getVTList(MVT::Other);
1816 SDValue Ops[] = {SN->getChain(), Value, Base};
1817 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1818 SN->getMemoryVT(), SN->getMemOperand());
1819 }
1820
1821 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1822 if (!Offset->isUndef())
1823 report_fatal_error("unexpected offset when storing to webassembly local",
1824 false);
1825
1826 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1827 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1828 SDValue Ops[] = {SN->getChain(), Idx, Value};
1829 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1830 }
1831
1834 "Encountered an unlowerable store to the wasm_var address space",
1835 false);
1836
1837 return Op;
1838}
1839
1840SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1841 SelectionDAG &DAG) const {
1842 SDLoc DL(Op);
1843 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1844 const SDValue &Base = LN->getBasePtr();
1845 const SDValue &Offset = LN->getOffset();
1846
1848 if (!Offset->isUndef())
1850 "unexpected offset when loading from webassembly global", false);
1851
1852 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1853 SDValue Ops[] = {LN->getChain(), Base};
1854 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1855 LN->getMemoryVT(), LN->getMemOperand());
1856 }
1857
1858 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1859 if (!Offset->isUndef())
1861 "unexpected offset when loading from webassembly local", false);
1862
1863 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1864 EVT LocalVT = LN->getValueType(0);
1865 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1866 {LN->getChain(), Idx});
1867 }
1868
1871 "Encountered an unlowerable load from the wasm_var address space",
1872 false);
1873
1874 return Op;
1875}
1876
1877SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1878 SelectionDAG &DAG) const {
1879 assert(Subtarget->hasWideArithmetic());
1880 assert(Op.getValueType() == MVT::i64);
1881 SDLoc DL(Op);
1882 unsigned Opcode;
1883 switch (Op.getOpcode()) {
1884 case ISD::UMUL_LOHI:
1885 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1886 break;
1887 case ISD::SMUL_LOHI:
1888 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1889 break;
1890 default:
1891 llvm_unreachable("unexpected opcode");
1892 }
1893 SDValue LHS = Op.getOperand(0);
1894 SDValue RHS = Op.getOperand(1);
1895 SDValue Lo =
1896 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1897 SDValue Hi(Lo.getNode(), 1);
1898 SDValue Ops[] = {Lo, Hi};
1899 return DAG.getMergeValues(Ops, DL);
1900}
1901
1902// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1903//
1904// This enables generating a single wasm instruction for this operation where
1905// the upper half of both operands are constant zeros. The upper half of the
1906// result is then whether the overflow happened.
1907SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1908 SelectionDAG &DAG) const {
1909 assert(Subtarget->hasWideArithmetic());
1910 assert(Op.getValueType() == MVT::i64);
1911 assert(Op.getOpcode() == ISD::UADDO);
1912 SDLoc DL(Op);
1913 SDValue LHS = Op.getOperand(0);
1914 SDValue RHS = Op.getOperand(1);
1915 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1916 SDValue Result =
1917 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1918 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1919 SDValue CarryI64(Result.getNode(), 1);
1920 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1921 SDValue Ops[] = {Result, CarryI32};
1922 return DAG.getMergeValues(Ops, DL);
1923}
1924
1925SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1926 SelectionDAG &DAG) const {
1927 assert(Subtarget->hasWideArithmetic());
1928 assert(N->getValueType(0) == MVT::i128);
1929 SDLoc DL(N);
1930 unsigned Opcode;
1931 switch (N->getOpcode()) {
1932 case ISD::ADD:
1933 Opcode = WebAssemblyISD::I64_ADD128;
1934 break;
1935 case ISD::SUB:
1936 Opcode = WebAssemblyISD::I64_SUB128;
1937 break;
1938 default:
1939 llvm_unreachable("unexpected opcode");
1940 }
1941 SDValue LHS = N->getOperand(0);
1942 SDValue RHS = N->getOperand(1);
1943
1944 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1945 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1946 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1947 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1948 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1949 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1950 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1951 LHS_0, LHS_1, RHS_0, RHS_1);
1952 SDValue Result_HI(Result_LO.getNode(), 1);
1953 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1954}
1955
1956SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1957 SelectionDAG &DAG) const {
1958 SDValue Src = Op.getOperand(2);
1959 if (isa<FrameIndexSDNode>(Src.getNode())) {
1960 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1961 // the FI to some LEA-like instruction, but since we don't have that, we
1962 // need to insert some kind of instruction that can take an FI operand and
1963 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1964 // local.copy between Op and its FI operand.
1965 SDValue Chain = Op.getOperand(0);
1966 SDLoc DL(Op);
1967 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1968 EVT VT = Src.getValueType();
1969 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1970 : WebAssembly::COPY_I64,
1971 DL, VT, Src),
1972 0);
1973 return Op.getNode()->getNumValues() == 1
1974 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1975 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1976 Op.getNumOperands() == 4 ? Op.getOperand(3)
1977 : SDValue());
1978 }
1979 return SDValue();
1980}
1981
1982SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1983 SelectionDAG &DAG) const {
1984 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1985 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1986}
1987
1988SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1989 SelectionDAG &DAG) const {
1990 SDLoc DL(Op);
1991
1992 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1993 fail(DL, DAG,
1994 "Non-Emscripten WebAssembly hasn't implemented "
1995 "__builtin_return_address");
1996 return SDValue();
1997 }
1998
1999 unsigned Depth = Op.getConstantOperandVal(0);
2000 MakeLibCallOptions CallOptions;
2001 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
2002 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
2003 .first;
2004}
2005
2006SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2007 SelectionDAG &DAG) const {
2008 // Non-zero depths are not supported by WebAssembly currently. Use the
2009 // legalizer's default expansion, which is to return 0 (what this function is
2010 // documented to do).
2011 if (Op.getConstantOperandVal(0) > 0)
2012 return SDValue();
2013
2015 EVT VT = Op.getValueType();
2016 Register FP =
2017 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2018 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2019}
2020
2021SDValue
2022WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2023 SelectionDAG &DAG) const {
2024 SDLoc DL(Op);
2025 const auto *GA = cast<GlobalAddressSDNode>(Op);
2026
2027 MachineFunction &MF = DAG.getMachineFunction();
2028 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2029 report_fatal_error("cannot use thread-local storage without bulk memory",
2030 false);
2031
2032 const GlobalValue *GV = GA->getGlobal();
2033
2034 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2035 // on other targets, if we have thread-local storage, only the local-exec
2036 // model is possible.
2037 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2038 ? GV->getThreadLocalMode()
2040
2041 // Unsupported TLS modes
2044
2045 if (model == GlobalValue::LocalExecTLSModel ||
2048 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2049 // For DSO-local TLS variables we use offset from __tls_base
2050
2051 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2052 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2053 : WebAssembly::GLOBAL_GET_I32;
2054 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2055
2057 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2058 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2059 0);
2060
2061 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2062 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2063 SDValue SymOffset =
2064 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2065
2066 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2067 }
2068
2070
2071 EVT VT = Op.getValueType();
2072 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2073 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2074 GA->getOffset(),
2076}
2077
2078SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2079 SelectionDAG &DAG) const {
2080 SDLoc DL(Op);
2081 const auto *GA = cast<GlobalAddressSDNode>(Op);
2082 EVT VT = Op.getValueType();
2083 assert(GA->getTargetFlags() == 0 &&
2084 "Unexpected target flags on generic GlobalAddressSDNode");
2086 fail(DL, DAG, "Invalid address space for WebAssembly target");
2087
2088 unsigned OperandFlags = 0;
2089 const GlobalValue *GV = GA->getGlobal();
2090 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2091 // need special treatment for tables in PIC mode.
2092 if (isPositionIndependent() &&
2094 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2095 MachineFunction &MF = DAG.getMachineFunction();
2096 MVT PtrVT = getPointerTy(MF.getDataLayout());
2097 const char *BaseName;
2098 if (GV->getValueType()->isFunctionTy()) {
2099 BaseName = MF.createExternalSymbolName("__table_base");
2101 } else {
2102 BaseName = MF.createExternalSymbolName("__memory_base");
2104 }
2106 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2107 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2108
2109 SDValue SymAddr = DAG.getNode(
2110 WebAssemblyISD::WrapperREL, DL, VT,
2111 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2112 OperandFlags));
2113
2114 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2115 }
2117 }
2118
2119 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2120 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2121 GA->getOffset(), OperandFlags));
2122}
2123
2124SDValue
2125WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2126 SelectionDAG &DAG) const {
2127 SDLoc DL(Op);
2128 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2129 EVT VT = Op.getValueType();
2130 assert(ES->getTargetFlags() == 0 &&
2131 "Unexpected target flags on generic ExternalSymbolSDNode");
2132 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2133 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2134}
2135
2136SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2137 SelectionDAG &DAG) const {
2138 // There's no need for a Wrapper node because we always incorporate a jump
2139 // table operand into a BR_TABLE instruction, rather than ever
2140 // materializing it in a register.
2141 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2142 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2143 JT->getTargetFlags());
2144}
2145
2146SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2147 SelectionDAG &DAG) const {
2148 SDLoc DL(Op);
2149 SDValue Chain = Op.getOperand(0);
2150 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2151 SDValue Index = Op.getOperand(2);
2152 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2153
2155 Ops.push_back(Chain);
2156 Ops.push_back(Index);
2157
2158 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2159 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2160
2161 // Add an operand for each case.
2162 for (auto *MBB : MBBs)
2163 Ops.push_back(DAG.getBasicBlock(MBB));
2164
2165 // Add the first MBB as a dummy default target for now. This will be replaced
2166 // with the proper default target (and the preceding range check eliminated)
2167 // if possible by WebAssemblyFixBrTableDefaults.
2168 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2169 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2170}
2171
2172SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2173 SelectionDAG &DAG) const {
2174 SDLoc DL(Op);
2175 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2176
2177 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2178 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2179
2180 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2181 MFI->getVarargBufferVreg(), PtrVT);
2182 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2183 MachinePointerInfo(SV));
2184}
2185
2186SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2187 SelectionDAG &DAG) const {
2188 MachineFunction &MF = DAG.getMachineFunction();
2189 unsigned IntNo;
2190 switch (Op.getOpcode()) {
2193 IntNo = Op.getConstantOperandVal(1);
2194 break;
2196 IntNo = Op.getConstantOperandVal(0);
2197 break;
2198 default:
2199 llvm_unreachable("Invalid intrinsic");
2200 }
2201 SDLoc DL(Op);
2202
2203 switch (IntNo) {
2204 default:
2205 return SDValue(); // Don't custom lower most intrinsics.
2206
2207 case Intrinsic::wasm_lsda: {
2208 auto PtrVT = getPointerTy(MF.getDataLayout());
2209 const char *SymName = MF.createExternalSymbolName(
2210 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2211 if (isPositionIndependent()) {
2213 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2214 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2216 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2217 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2218 SDValue SymAddr =
2219 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2220 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2221 }
2222 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2223 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2224 }
2225
2226 case Intrinsic::wasm_shuffle: {
2227 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2228 SDValue Ops[18];
2229 size_t OpIdx = 0;
2230 Ops[OpIdx++] = Op.getOperand(1);
2231 Ops[OpIdx++] = Op.getOperand(2);
2232 while (OpIdx < 18) {
2233 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2234 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2235 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2236 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2237 } else {
2238 Ops[OpIdx++] = MaskIdx;
2239 }
2240 }
2241 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2242 }
2243
2244 case Intrinsic::thread_pointer: {
2245 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2246 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2247 : WebAssembly::GLOBAL_GET_I32;
2248 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2249 return SDValue(
2250 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2251 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2252 0);
2253 }
2254 }
2255}
2256
2257SDValue
2258WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2259 SelectionDAG &DAG) const {
2260 SDLoc DL(Op);
2261 // If sign extension operations are disabled, allow sext_inreg only if operand
2262 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2263 // extension operations, but allowing sext_inreg in this context lets us have
2264 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2265 // everywhere would be simpler in this file, but would necessitate large and
2266 // brittle patterns to undo the expansion and select extract_lane_s
2267 // instructions.
2268 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2269 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2270 return SDValue();
2271
2272 const SDValue &Extract = Op.getOperand(0);
2273 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2274 if (VecT.getVectorElementType().getSizeInBits() > 32)
2275 return SDValue();
2276 MVT ExtractedLaneT =
2277 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2278 MVT ExtractedVecT =
2279 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2280 if (ExtractedVecT == VecT)
2281 return Op;
2282
2283 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2284 const SDNode *Index = Extract.getOperand(1).getNode();
2285 if (!isa<ConstantSDNode>(Index))
2286 return SDValue();
2287 unsigned IndexVal = Index->getAsZExtVal();
2288 unsigned Scale =
2289 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2290 assert(Scale > 1);
2291 SDValue NewIndex =
2292 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2293 SDValue NewExtract = DAG.getNode(
2295 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2296 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2297 Op.getOperand(1));
2298}
2299
2300static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2301 SelectionDAG &DAG) {
2302 SDValue Source = peekThroughBitcasts(Op);
2303 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2304 return SDValue();
2305
2306 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2307 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2308 "expected extend_low");
2309 auto *Shuffle = cast<ShuffleVectorSDNode>(Source.getNode());
2310
2311 ArrayRef<int> Mask = Shuffle->getMask();
2312 // Look for a shuffle which moves from the high half to the low half.
2313 size_t FirstIdx = Mask.size() / 2;
2314 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2315 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2316 return SDValue();
2317 }
2318 }
2319
2320 SDLoc DL(Op);
2321 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2322 ? WebAssemblyISD::EXTEND_HIGH_S
2323 : WebAssemblyISD::EXTEND_HIGH_U;
2324 SDValue ShuffleSrc = Shuffle->getOperand(0);
2325 if (Op.getOpcode() == ISD::BITCAST)
2326 ShuffleSrc = DAG.getBitcast(Op.getValueType(), ShuffleSrc);
2327
2328 return DAG.getNode(Opc, DL, VT, ShuffleSrc);
2329}
2330
2331SDValue
2332WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2333 SelectionDAG &DAG) const {
2334 SDLoc DL(Op);
2335 EVT VT = Op.getValueType();
2336 SDValue Src = Op.getOperand(0);
2337 EVT SrcVT = Src.getValueType();
2338
2339 if (SrcVT.getVectorElementType() == MVT::i1 ||
2340 SrcVT.getVectorElementType() == MVT::i64)
2341 return SDValue();
2342
2343 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2344 "Unexpected extension factor.");
2345 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2346
2347 if (Scale != 2 && Scale != 4 && Scale != 8)
2348 return SDValue();
2349
2350 unsigned Ext;
2351 switch (Op.getOpcode()) {
2352 default:
2353 llvm_unreachable("unexpected opcode");
2356 Ext = WebAssemblyISD::EXTEND_LOW_U;
2357 break;
2359 Ext = WebAssemblyISD::EXTEND_LOW_S;
2360 break;
2361 }
2362
2363 if (Scale == 2) {
2364 // See if we can use EXTEND_HIGH.
2365 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2366 return ExtendHigh;
2367 }
2368
2369 SDValue Ret = Src;
2370 while (Scale != 1) {
2371 Ret = DAG.getNode(Ext, DL,
2372 Ret.getValueType()
2375 Ret);
2376 Scale /= 2;
2377 }
2378 assert(Ret.getValueType() == VT);
2379 return Ret;
2380}
2381
2383 SDLoc DL(Op);
2384 if (Op.getValueType() != MVT::v2f64)
2385 return SDValue();
2386
2387 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2388 unsigned &Index) -> bool {
2389 switch (Op.getOpcode()) {
2390 case ISD::SINT_TO_FP:
2391 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2392 break;
2393 case ISD::UINT_TO_FP:
2394 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2395 break;
2396 case ISD::FP_EXTEND:
2397 Opcode = WebAssemblyISD::PROMOTE_LOW;
2398 break;
2399 default:
2400 return false;
2401 }
2402
2403 auto ExtractVector = Op.getOperand(0);
2404 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2405 return false;
2406
2407 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2408 return false;
2409
2410 SrcVec = ExtractVector.getOperand(0);
2411 Index = ExtractVector.getConstantOperandVal(1);
2412 return true;
2413 };
2414
2415 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2416 SDValue LHSSrcVec, RHSSrcVec;
2417 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2418 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2419 return SDValue();
2420
2421 if (LHSOpcode != RHSOpcode)
2422 return SDValue();
2423
2424 MVT ExpectedSrcVT;
2425 switch (LHSOpcode) {
2426 case WebAssemblyISD::CONVERT_LOW_S:
2427 case WebAssemblyISD::CONVERT_LOW_U:
2428 ExpectedSrcVT = MVT::v4i32;
2429 break;
2430 case WebAssemblyISD::PROMOTE_LOW:
2431 ExpectedSrcVT = MVT::v4f32;
2432 break;
2433 }
2434 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2435 return SDValue();
2436
2437 auto Src = LHSSrcVec;
2438 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2439 // Shuffle the source vector so that the converted lanes are the low lanes.
2440 Src = DAG.getVectorShuffle(
2441 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2442 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2443 }
2444 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2445}
2446
2447SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2448 SelectionDAG &DAG) const {
2449 MVT VT = Op.getSimpleValueType();
2450 if (VT == MVT::v8f16) {
2451 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2452 // FP16 type, so cast them to I16s.
2453 MVT IVT = VT.changeVectorElementType(MVT::i16);
2455 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2456 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2457 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2458 return DAG.getBitcast(VT, Res);
2459 }
2460
2461 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2462 return ConvertLow;
2463
2464 SDLoc DL(Op);
2465 const EVT VecT = Op.getValueType();
2466 const EVT LaneT = Op.getOperand(0).getValueType();
2467 const size_t Lanes = Op.getNumOperands();
2468 bool CanSwizzle = VecT == MVT::v16i8;
2469
2470 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2471 // possible number of lanes at once followed by a sequence of replace_lane
2472 // instructions to individually initialize any remaining lanes.
2473
2474 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2475 // swizzled lanes should be given greater weight.
2476
2477 // TODO: Investigate looping rather than always extracting/replacing specific
2478 // lanes to fill gaps.
2479
2480 auto IsConstant = [](const SDValue &V) {
2481 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2482 };
2483
2484 // Returns the source vector and index vector pair if they exist. Checks for:
2485 // (extract_vector_elt
2486 // $src,
2487 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2488 // )
2489 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2490 auto Bail = std::make_pair(SDValue(), SDValue());
2491 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2492 return Bail;
2493 const SDValue &SwizzleSrc = Lane->getOperand(0);
2494 const SDValue &IndexExt = Lane->getOperand(1);
2495 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2496 return Bail;
2497 const SDValue &Index = IndexExt->getOperand(0);
2498 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2499 return Bail;
2500 const SDValue &SwizzleIndices = Index->getOperand(0);
2501 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2502 SwizzleIndices.getValueType() != MVT::v16i8 ||
2503 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2504 Index->getConstantOperandVal(1) != I)
2505 return Bail;
2506 return std::make_pair(SwizzleSrc, SwizzleIndices);
2507 };
2508
2509 // If the lane is extracted from another vector at a constant index, return
2510 // that vector. The source vector must not have more lanes than the dest
2511 // because the shufflevector indices are in terms of the destination lanes and
2512 // would not be able to address the smaller individual source lanes.
2513 auto GetShuffleSrc = [&](const SDValue &Lane) {
2514 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2515 return SDValue();
2516 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2517 return SDValue();
2518 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2519 VecT.getVectorNumElements())
2520 return SDValue();
2521 return Lane->getOperand(0);
2522 };
2523
2524 using ValueEntry = std::pair<SDValue, size_t>;
2525 SmallVector<ValueEntry, 16> SplatValueCounts;
2526
2527 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2528 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2529
2530 using ShuffleEntry = std::pair<SDValue, size_t>;
2531 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2532
2533 auto AddCount = [](auto &Counts, const auto &Val) {
2534 auto CountIt =
2535 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2536 if (CountIt == Counts.end()) {
2537 Counts.emplace_back(Val, 1);
2538 } else {
2539 CountIt->second++;
2540 }
2541 };
2542
2543 auto GetMostCommon = [](auto &Counts) {
2544 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2545 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2546 return *CommonIt;
2547 };
2548
2549 size_t NumConstantLanes = 0;
2550
2551 // Count eligible lanes for each type of vector creation op
2552 for (size_t I = 0; I < Lanes; ++I) {
2553 const SDValue &Lane = Op->getOperand(I);
2554 if (Lane.isUndef())
2555 continue;
2556
2557 AddCount(SplatValueCounts, Lane);
2558
2559 if (IsConstant(Lane))
2560 NumConstantLanes++;
2561 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2562 AddCount(ShuffleCounts, ShuffleSrc);
2563 if (CanSwizzle) {
2564 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2565 if (SwizzleSrcs.first)
2566 AddCount(SwizzleCounts, SwizzleSrcs);
2567 }
2568 }
2569
2570 SDValue SplatValue;
2571 size_t NumSplatLanes;
2572 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2573
2574 SDValue SwizzleSrc;
2575 SDValue SwizzleIndices;
2576 size_t NumSwizzleLanes = 0;
2577 if (SwizzleCounts.size())
2578 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2579 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2580
2581 // Shuffles can draw from up to two vectors, so find the two most common
2582 // sources.
2583 SDValue ShuffleSrc1, ShuffleSrc2;
2584 size_t NumShuffleLanes = 0;
2585 if (ShuffleCounts.size()) {
2586 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2587 llvm::erase_if(ShuffleCounts,
2588 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2589 }
2590 if (ShuffleCounts.size()) {
2591 size_t AdditionalShuffleLanes;
2592 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2593 GetMostCommon(ShuffleCounts);
2594 NumShuffleLanes += AdditionalShuffleLanes;
2595 }
2596
2597 // Predicate returning true if the lane is properly initialized by the
2598 // original instruction
2599 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2601 // Prefer swizzles over shuffles over vector consts over splats
2602 if (NumSwizzleLanes >= NumShuffleLanes &&
2603 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2604 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2605 SwizzleIndices);
2606 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2607 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2608 return Swizzled == GetSwizzleSrcs(I, Lane);
2609 };
2610 } else if (NumShuffleLanes >= NumConstantLanes &&
2611 NumShuffleLanes >= NumSplatLanes) {
2612 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2613 size_t DestLaneCount = VecT.getVectorNumElements();
2614 size_t Scale1 = 1;
2615 size_t Scale2 = 1;
2616 SDValue Src1 = ShuffleSrc1;
2617 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2618 if (Src1.getValueType() != VecT) {
2619 size_t LaneSize =
2621 assert(LaneSize > DestLaneSize);
2622 Scale1 = LaneSize / DestLaneSize;
2623 Src1 = DAG.getBitcast(VecT, Src1);
2624 }
2625 if (Src2.getValueType() != VecT) {
2626 size_t LaneSize =
2628 assert(LaneSize > DestLaneSize);
2629 Scale2 = LaneSize / DestLaneSize;
2630 Src2 = DAG.getBitcast(VecT, Src2);
2631 }
2632
2633 int Mask[16];
2634 assert(DestLaneCount <= 16);
2635 for (size_t I = 0; I < DestLaneCount; ++I) {
2636 const SDValue &Lane = Op->getOperand(I);
2637 SDValue Src = GetShuffleSrc(Lane);
2638 if (Src == ShuffleSrc1) {
2639 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2640 } else if (Src && Src == ShuffleSrc2) {
2641 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2642 } else {
2643 Mask[I] = -1;
2644 }
2645 }
2646 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2647 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2648 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2649 auto Src = GetShuffleSrc(Lane);
2650 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2651 };
2652 } else if (NumConstantLanes >= NumSplatLanes) {
2653 SmallVector<SDValue, 16> ConstLanes;
2654 for (const SDValue &Lane : Op->op_values()) {
2655 if (IsConstant(Lane)) {
2656 // Values may need to be fixed so that they will sign extend to be
2657 // within the expected range during ISel. Check whether the value is in
2658 // bounds based on the lane bit width and if it is out of bounds, lop
2659 // off the extra bits.
2660 uint64_t LaneBits = 128 / Lanes;
2661 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2662 ConstLanes.push_back(DAG.getConstant(
2663 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2664 SDLoc(Lane), LaneT));
2665 } else {
2666 ConstLanes.push_back(Lane);
2667 }
2668 } else if (LaneT.isFloatingPoint()) {
2669 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2670 } else {
2671 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2672 }
2673 }
2674 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2675 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2676 return IsConstant(Lane);
2677 };
2678 } else {
2679 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2680 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2681 (DestLaneSize == 32 || DestLaneSize == 64)) {
2682 // Could be selected to load_zero.
2683 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2684 } else {
2685 // Use a splat (which might be selected as a load splat)
2686 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2687 }
2688 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2689 return Lane == SplatValue;
2690 };
2691 }
2692
2693 assert(Result);
2694 assert(IsLaneConstructed);
2695
2696 // Add replace_lane instructions for any unhandled values
2697 for (size_t I = 0; I < Lanes; ++I) {
2698 const SDValue &Lane = Op->getOperand(I);
2699 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2700 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2701 DAG.getConstant(I, DL, MVT::i32));
2702 }
2703
2704 return Result;
2705}
2706
2707SDValue
2708WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2709 SelectionDAG &DAG) const {
2710 SDLoc DL(Op);
2711 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2712 MVT VecType = Op.getOperand(0).getSimpleValueType();
2713 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2714 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2715
2716 // Space for two vector args and sixteen mask indices
2717 SDValue Ops[18];
2718 size_t OpIdx = 0;
2719 Ops[OpIdx++] = Op.getOperand(0);
2720 Ops[OpIdx++] = Op.getOperand(1);
2721
2722 // Expand mask indices to byte indices and materialize them as operands
2723 for (int M : Mask) {
2724 for (size_t J = 0; J < LaneBytes; ++J) {
2725 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2726 // whole lane of vector input, to allow further reduction at VM. E.g.
2727 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2728 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2729 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2730 }
2731 }
2732
2733 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2734}
2735
2736SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2737 SelectionDAG &DAG) const {
2738 SDLoc DL(Op);
2739 // The legalizer does not know how to expand the unsupported comparison modes
2740 // of i64x2 vectors, so we manually unroll them here.
2741 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2743 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2744 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2745 const SDValue &CC = Op->getOperand(2);
2746 auto MakeLane = [&](unsigned I) {
2747 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2748 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2749 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2750 };
2751 return DAG.getBuildVector(Op->getValueType(0), DL,
2752 {MakeLane(0), MakeLane(1)});
2753}
2754
2755SDValue
2756WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2757 SelectionDAG &DAG) const {
2758 // Allow constant lane indices, expand variable lane indices
2759 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2760 if (isa<ConstantSDNode>(IdxNode)) {
2761 // Ensure the index type is i32 to match the tablegen patterns
2762 uint64_t Idx = IdxNode->getAsZExtVal();
2763 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2764 Ops[Op.getNumOperands() - 1] =
2765 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2766 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2767 }
2768 // Perform default expansion
2769 return SDValue();
2770}
2771
2773 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2774 // 32-bit and 64-bit unrolled shifts will have proper semantics
2775 if (LaneT.bitsGE(MVT::i32))
2776 return DAG.UnrollVectorOp(Op.getNode());
2777 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2778 SDLoc DL(Op);
2779 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2780 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2781 unsigned ShiftOpcode = Op.getOpcode();
2782 SmallVector<SDValue, 16> ShiftedElements;
2783 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2784 SmallVector<SDValue, 16> ShiftElements;
2785 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2786 SmallVector<SDValue, 16> UnrolledOps;
2787 for (size_t i = 0; i < NumLanes; ++i) {
2788 SDValue MaskedShiftValue =
2789 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2790 SDValue ShiftedValue = ShiftedElements[i];
2791 if (ShiftOpcode == ISD::SRA)
2792 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2793 ShiftedValue, DAG.getValueType(LaneT));
2794 UnrolledOps.push_back(
2795 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2796 }
2797 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2798}
2799
2800SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2801 SelectionDAG &DAG) const {
2802 SDLoc DL(Op);
2803
2804 // Only manually lower vector shifts
2805 assert(Op.getSimpleValueType().isVector());
2806
2807 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2808 auto ShiftVal = Op.getOperand(1);
2809
2810 // Try to skip bitmask operation since it is implied inside shift instruction
2811 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2812 if (MaskOp.getOpcode() != ISD::AND)
2813 return MaskOp;
2814 SDValue LHS = MaskOp.getOperand(0);
2815 SDValue RHS = MaskOp.getOperand(1);
2816 if (MaskOp.getValueType().isVector()) {
2817 APInt MaskVal;
2818 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2819 std::swap(LHS, RHS);
2820
2821 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2822 MaskVal == MaskBits)
2823 MaskOp = LHS;
2824 } else {
2825 if (!isa<ConstantSDNode>(RHS.getNode()))
2826 std::swap(LHS, RHS);
2827
2828 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2829 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2830 MaskOp = LHS;
2831 }
2832
2833 return MaskOp;
2834 };
2835
2836 // Skip vector and operation
2837 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2838 ShiftVal = DAG.getSplatValue(ShiftVal);
2839 if (!ShiftVal)
2840 return unrollVectorShift(Op, DAG);
2841
2842 // Skip scalar and operation
2843 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2844 // Use anyext because none of the high bits can affect the shift
2845 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2846
2847 unsigned Opcode;
2848 switch (Op.getOpcode()) {
2849 case ISD::SHL:
2850 Opcode = WebAssemblyISD::VEC_SHL;
2851 break;
2852 case ISD::SRA:
2853 Opcode = WebAssemblyISD::VEC_SHR_S;
2854 break;
2855 case ISD::SRL:
2856 Opcode = WebAssemblyISD::VEC_SHR_U;
2857 break;
2858 default:
2859 llvm_unreachable("unexpected opcode");
2860 }
2861
2862 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2863}
2864
2865SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2866 SelectionDAG &DAG) const {
2867 EVT ResT = Op.getValueType();
2868 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2869
2870 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2871 (SatVT == MVT::i32 || SatVT == MVT::i64))
2872 return Op;
2873
2874 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2875 return Op;
2876
2877 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2878 return Op;
2879
2880 return SDValue();
2881}
2882
2884 return (Op->getFlags().hasNoNaNs() ||
2885 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2886 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2887 (Op->getFlags().hasNoSignedZeros() ||
2888 DAG.isKnownNeverZeroFloat(Op->getOperand(0)) ||
2889 DAG.isKnownNeverZeroFloat(Op->getOperand(1)));
2890}
2891
2892SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2893 SelectionDAG &DAG) const {
2894 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2895 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2896 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2897 }
2898 return SDValue();
2899}
2900
2901SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2902 SelectionDAG &DAG) const {
2903 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2904 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2905 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2906 }
2907 return SDValue();
2908}
2909
2910//===----------------------------------------------------------------------===//
2911// Custom DAG combine hooks
2912//===----------------------------------------------------------------------===//
2913static SDValue
2915 auto &DAG = DCI.DAG;
2916 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2917
2918 // Hoist vector bitcasts that don't change the number of lanes out of unary
2919 // shuffles, where they are less likely to get in the way of other combines.
2920 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2921 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2922 SDValue Bitcast = N->getOperand(0);
2923 if (Bitcast.getOpcode() != ISD::BITCAST)
2924 return SDValue();
2925 if (!N->getOperand(1).isUndef())
2926 return SDValue();
2927 SDValue CastOp = Bitcast.getOperand(0);
2928 EVT SrcType = CastOp.getValueType();
2929 EVT DstType = Bitcast.getValueType();
2930 if (!SrcType.is128BitVector() ||
2931 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2932 return SDValue();
2933 SDValue NewShuffle = DAG.getVectorShuffle(
2934 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2935 return DAG.getBitcast(DstType, NewShuffle);
2936}
2937
2938/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2939/// split up into scalar instructions during legalization, and the vector
2940/// extending instructions are selected in performVectorExtendCombine below.
2941static SDValue
2944 auto &DAG = DCI.DAG;
2945 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2946 N->getOpcode() == ISD::SINT_TO_FP);
2947
2948 EVT InVT = N->getOperand(0)->getValueType(0);
2949 EVT ResVT = N->getValueType(0);
2950 MVT ExtVT;
2951 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2952 ExtVT = MVT::v4i32;
2953 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2954 ExtVT = MVT::v2i32;
2955 else
2956 return SDValue();
2957
2958 unsigned Op =
2960 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2961 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2962}
2963
2964static SDValue
2967 auto &DAG = DCI.DAG;
2968
2969 SDNodeFlags Flags = N->getFlags();
2970 SDValue Op0 = N->getOperand(0);
2971 EVT VT = N->getValueType(0);
2972
2973 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2974 // Depending on the target (runtime) backend, this might be performance
2975 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2976 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2977 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2978 }
2979
2980 return SDValue();
2981}
2982
2983static SDValue
2985 auto &DAG = DCI.DAG;
2986 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2987 N->getOpcode() == ISD::ZERO_EXTEND);
2988
2989 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2990 // possible before the extract_subvector can be expanded.
2991 auto Extract = N->getOperand(0);
2992 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2993 return SDValue();
2994 auto Source = Extract.getOperand(0);
2995 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2996 if (IndexNode == nullptr)
2997 return SDValue();
2998 auto Index = IndexNode->getZExtValue();
2999
3000 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3001 // extracted subvector is the low or high half of its source.
3002 EVT ResVT = N->getValueType(0);
3003 if (ResVT == MVT::v8i16) {
3004 if (Extract.getValueType() != MVT::v8i8 ||
3005 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3006 return SDValue();
3007 } else if (ResVT == MVT::v4i32) {
3008 if (Extract.getValueType() != MVT::v4i16 ||
3009 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3010 return SDValue();
3011 } else if (ResVT == MVT::v2i64) {
3012 if (Extract.getValueType() != MVT::v2i32 ||
3013 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3014 return SDValue();
3015 } else {
3016 return SDValue();
3017 }
3018
3019 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3020 bool IsLow = Index == 0;
3021
3022 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3023 : WebAssemblyISD::EXTEND_HIGH_S)
3024 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3025 : WebAssemblyISD::EXTEND_HIGH_U);
3026
3027 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3028}
3029
3030static SDValue
3032 auto &DAG = DCI.DAG;
3033
3034 auto GetWasmConversionOp = [](unsigned Op) {
3035 switch (Op) {
3037 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3039 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3040 case ISD::FP_ROUND:
3041 return WebAssemblyISD::DEMOTE_ZERO;
3042 }
3043 llvm_unreachable("unexpected op");
3044 };
3045
3046 auto IsZeroSplat = [](SDValue SplatVal) {
3047 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3048 APInt SplatValue, SplatUndef;
3049 unsigned SplatBitSize;
3050 bool HasAnyUndefs;
3051 // Endianness doesn't matter in this context because we are looking for
3052 // an all-zero value.
3053 return Splat &&
3054 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3055 HasAnyUndefs) &&
3056 SplatValue == 0;
3057 };
3058
3059 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3060 // Combine this:
3061 //
3062 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3063 //
3064 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3065 //
3066 // Or this:
3067 //
3068 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3069 //
3070 // into (f32x4.demote_zero_f64x2 $x).
3071 EVT ResVT;
3072 EVT ExpectedConversionType;
3073 auto Conversion = N->getOperand(0);
3074 auto ConversionOp = Conversion.getOpcode();
3075 switch (ConversionOp) {
3078 ResVT = MVT::v4i32;
3079 ExpectedConversionType = MVT::v2i32;
3080 break;
3081 case ISD::FP_ROUND:
3082 ResVT = MVT::v4f32;
3083 ExpectedConversionType = MVT::v2f32;
3084 break;
3085 default:
3086 return SDValue();
3087 }
3088
3089 if (N->getValueType(0) != ResVT)
3090 return SDValue();
3091
3092 if (Conversion.getValueType() != ExpectedConversionType)
3093 return SDValue();
3094
3095 auto Source = Conversion.getOperand(0);
3096 if (Source.getValueType() != MVT::v2f64)
3097 return SDValue();
3098
3099 if (!IsZeroSplat(N->getOperand(1)) ||
3100 N->getOperand(1).getValueType() != ExpectedConversionType)
3101 return SDValue();
3102
3103 unsigned Op = GetWasmConversionOp(ConversionOp);
3104 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3105 }
3106
3107 // Combine this:
3108 //
3109 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3110 //
3111 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3112 //
3113 // Or this:
3114 //
3115 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3116 //
3117 // into (f32x4.demote_zero_f64x2 $x).
3118 EVT ResVT;
3119 auto ConversionOp = N->getOpcode();
3120 switch (ConversionOp) {
3123 ResVT = MVT::v4i32;
3124 break;
3125 case ISD::FP_ROUND:
3126 ResVT = MVT::v4f32;
3127 break;
3128 default:
3129 llvm_unreachable("unexpected op");
3130 }
3131
3132 if (N->getValueType(0) != ResVT)
3133 return SDValue();
3134
3135 auto Concat = N->getOperand(0);
3136 if (Concat.getValueType() != MVT::v4f64)
3137 return SDValue();
3138
3139 auto Source = Concat.getOperand(0);
3140 if (Source.getValueType() != MVT::v2f64)
3141 return SDValue();
3142
3143 if (!IsZeroSplat(Concat.getOperand(1)) ||
3144 Concat.getOperand(1).getValueType() != MVT::v2f64)
3145 return SDValue();
3146
3147 unsigned Op = GetWasmConversionOp(ConversionOp);
3148 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3149}
3150
3151// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3152static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3153 const SDLoc &DL, unsigned VectorWidth) {
3154 EVT VT = Vec.getValueType();
3155 EVT ElVT = VT.getVectorElementType();
3156 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3157 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3158 VT.getVectorNumElements() / Factor);
3159
3160 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3161 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3162 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3163
3164 // This is the index of the first element of the VectorWidth-bit chunk
3165 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3166 IdxVal &= ~(ElemsPerChunk - 1);
3167
3168 // If the input is a buildvector just emit a smaller one.
3169 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3170 return DAG.getBuildVector(ResultVT, DL,
3171 Vec->ops().slice(IdxVal, ElemsPerChunk));
3172
3173 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3174 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3175}
3176
3177// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3178// is the expected destination value type after recursion. In is the initial
3179// input. Note that the input should have enough leading zero bits to prevent
3180// NARROW_U from saturating results.
3182 SelectionDAG &DAG) {
3183 EVT SrcVT = In.getValueType();
3184
3185 // No truncation required, we might get here due to recursive calls.
3186 if (SrcVT == DstVT)
3187 return In;
3188
3189 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3190 unsigned NumElems = SrcVT.getVectorNumElements();
3191 if (!isPowerOf2_32(NumElems))
3192 return SDValue();
3193 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3194 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3195
3196 LLVMContext &Ctx = *DAG.getContext();
3197 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3198
3199 // Narrow to the largest type possible:
3200 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3201 EVT InVT = MVT::i16, OutVT = MVT::i8;
3202 if (SrcVT.getScalarSizeInBits() > 16) {
3203 InVT = MVT::i32;
3204 OutVT = MVT::i16;
3205 }
3206 unsigned SubSizeInBits = SrcSizeInBits / 2;
3207 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3208 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3209
3210 // Split lower/upper subvectors.
3211 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3212 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3213
3214 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3215 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3216 Lo = DAG.getBitcast(InVT, Lo);
3217 Hi = DAG.getBitcast(InVT, Hi);
3218 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3219 return DAG.getBitcast(DstVT, Res);
3220 }
3221
3222 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3223 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3224 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3225 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3226
3227 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3228 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3229 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3230}
3231
3234 auto &DAG = DCI.DAG;
3235
3236 SDValue In = N->getOperand(0);
3237 EVT InVT = In.getValueType();
3238 if (!InVT.isSimple())
3239 return SDValue();
3240
3241 EVT OutVT = N->getValueType(0);
3242 if (!OutVT.isVector())
3243 return SDValue();
3244
3245 EVT OutSVT = OutVT.getVectorElementType();
3246 EVT InSVT = InVT.getVectorElementType();
3247 // Currently only cover truncate to v16i8 or v8i16.
3248 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3249 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3250 return SDValue();
3251
3252 SDLoc DL(N);
3254 OutVT.getScalarSizeInBits());
3255 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3256 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3257}
3258
3261 using namespace llvm::SDPatternMatch;
3262 auto &DAG = DCI.DAG;
3263 SDLoc DL(N);
3264 SDValue Src = N->getOperand(0);
3265 EVT VT = N->getValueType(0);
3266 EVT SrcVT = Src.getValueType();
3267
3268 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3269 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3270 return SDValue();
3271
3272 unsigned NumElts = SrcVT.getVectorNumElements();
3273 EVT Width = MVT::getIntegerVT(128 / NumElts);
3274
3275 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3276 // ==> bitmask
3277 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3278 return DAG.getZExtOrTrunc(
3279 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3280 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3281 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3282 SrcVT.changeVectorElementType(
3283 *DAG.getContext(), Width))}),
3284 DL, VT);
3285 }
3286
3287 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3288 if (NumElts == 32 || NumElts == 64) {
3289 // Strategy: We will setcc them separately in v16i8 -> v16i1
3290 // Bitcast them to i16, extend them to either i32 or i64.
3291 // Add them together, shifting left 1 by 1.
3292 SDValue Concat, SetCCVector;
3293 ISD::CondCode SetCond;
3294
3295 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3296 m_CondCode(SetCond)))))
3297 return SDValue();
3298 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3299 return SDValue();
3300
3301 uint64_t ElementWidth =
3303
3304 SmallVector<SDValue> VectorsToShuffle;
3305 for (size_t I = 0; I < Concat->ops().size(); I++) {
3306 VectorsToShuffle.push_back(DAG.getBitcast(
3307 MVT::i16,
3308 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3309 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3310 DAG, DL, 128),
3311 SetCond)));
3312 }
3313
3314 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3315 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3316
3317 for (SDValue V : VectorsToShuffle) {
3318 ReturningInteger = DAG.getNode(
3319 ISD::SHL, DL, ReturnType,
3320 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3321
3322 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3323 ReturningInteger =
3324 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3325 }
3326
3327 return ReturningInteger;
3328 }
3329
3330 return SDValue();
3331}
3332
3334 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3335 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3336 // any_true (setcc <X>, 0, ne) => (any_true X)
3337 // all_true (setcc <X>, 0, ne) => (all_true X)
3338 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3339 using namespace llvm::SDPatternMatch;
3340
3341 SDValue LHS;
3342 if (N->getNumOperands() < 2 ||
3343 !sd_match(N->getOperand(1),
3345 return SDValue();
3346 EVT LT = LHS.getValueType();
3347 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3348 return SDValue();
3349
3350 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3351 ISD::CondCode SetType,
3352 Intrinsic::WASMIntrinsics InPost) {
3353 if (N->getConstantOperandVal(0) != InPre)
3354 return SDValue();
3355
3356 SDValue LHS;
3357 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3358 m_SpecificCondCode(SetType))))
3359 return SDValue();
3360
3361 SDLoc DL(N);
3362 SDValue Ret = DAG.getZExtOrTrunc(
3363 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3364 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3365 DL, MVT::i1);
3366 if (SetType == ISD::SETEQ)
3367 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3368 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3369 };
3370
3371 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3372 Intrinsic::wasm_alltrue))
3373 return AnyTrueEQ;
3374 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3375 Intrinsic::wasm_anytrue))
3376 return AllTrueEQ;
3377 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3378 Intrinsic::wasm_anytrue))
3379 return AnyTrueNE;
3380 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3381 Intrinsic::wasm_alltrue))
3382 return AllTrueNE;
3383
3384 return SDValue();
3385}
3386
3387template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3388 Intrinsic::ID Intrin>
3390 SDValue LHS = N->getOperand(0);
3391 SDValue RHS = N->getOperand(1);
3392 SDValue Cond = N->getOperand(2);
3393 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3394 return SDValue();
3395
3396 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3397 return SDValue();
3398
3399 SDLoc DL(N);
3400 SDValue Ret = DAG.getZExtOrTrunc(
3401 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3402 {DAG.getConstant(Intrin, DL, MVT::i32),
3403 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3404 DL, MVT::i1);
3405 if (RequiresNegate)
3406 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3407 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3408}
3409
3410/// Try to convert a i128 comparison to a v16i8 comparison before type
3411/// legalization splits it up into chunks
3412static SDValue
3414 const WebAssemblySubtarget *Subtarget) {
3415
3416 SDLoc DL(N);
3417 SDValue X = N->getOperand(0);
3418 SDValue Y = N->getOperand(1);
3419 EVT VT = N->getValueType(0);
3420 EVT OpVT = X.getValueType();
3421
3422 SelectionDAG &DAG = DCI.DAG;
3424 Attribute::NoImplicitFloat))
3425 return SDValue();
3426
3427 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3428 // We're looking for an oversized integer equality comparison with SIMD
3429 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3430 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3431 return SDValue();
3432
3433 // Don't perform this combine if constructing the vector will be expensive.
3434 auto IsVectorBitCastCheap = [](SDValue X) {
3436 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3437 };
3438
3439 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3440 return SDValue();
3441
3442 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3443 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3444 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3445
3446 SDValue Intr =
3447 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3448 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3449 : Intrinsic::wasm_anytrue,
3450 DL, MVT::i32),
3451 Cmp});
3452
3453 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3454 ISD::SETNE);
3455}
3456
3459 const WebAssemblySubtarget *Subtarget) {
3460 if (!DCI.isBeforeLegalize())
3461 return SDValue();
3462
3463 EVT VT = N->getValueType(0);
3464 if (!VT.isScalarInteger())
3465 return SDValue();
3466
3467 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3468 return V;
3469
3470 SDValue LHS = N->getOperand(0);
3471 if (LHS->getOpcode() != ISD::BITCAST)
3472 return SDValue();
3473
3474 EVT FromVT = LHS->getOperand(0).getValueType();
3475 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3476 return SDValue();
3477
3478 unsigned NumElts = FromVT.getVectorNumElements();
3479 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3480 return SDValue();
3481
3482 if (!cast<ConstantSDNode>(N->getOperand(1)))
3483 return SDValue();
3484
3485 auto &DAG = DCI.DAG;
3486 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3487 MVT::getIntegerVT(128 / NumElts));
3488 // setcc (iN (bitcast (vNi1 X))), 0, ne
3489 // ==> any_true (vNi1 X)
3491 N, VecVT, DAG)) {
3492 return Match;
3493 }
3494 // setcc (iN (bitcast (vNi1 X))), 0, eq
3495 // ==> xor (any_true (vNi1 X)), -1
3497 N, VecVT, DAG)) {
3498 return Match;
3499 }
3500 // setcc (iN (bitcast (vNi1 X))), -1, eq
3501 // ==> all_true (vNi1 X)
3503 N, VecVT, DAG)) {
3504 return Match;
3505 }
3506 // setcc (iN (bitcast (vNi1 X))), -1, ne
3507 // ==> xor (all_true (vNi1 X)), -1
3509 N, VecVT, DAG)) {
3510 return Match;
3511 }
3512 return SDValue();
3513}
3514
3516 EVT VT = N->getValueType(0);
3517 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3518 return SDValue();
3519
3520 // Mul with extending inputs.
3521 SDValue LHS = N->getOperand(0);
3522 SDValue RHS = N->getOperand(1);
3523 if (LHS.getOpcode() != RHS.getOpcode())
3524 return SDValue();
3525
3526 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3527 LHS.getOpcode() != ISD::ZERO_EXTEND)
3528 return SDValue();
3529
3530 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3531 return SDValue();
3532
3533 EVT FromVT = LHS->getOperand(0).getValueType();
3534 EVT EltTy = FromVT.getVectorElementType();
3535 if (EltTy != MVT::i8)
3536 return SDValue();
3537
3538 // For an input DAG that looks like this
3539 // %a = input_type
3540 // %b = input_type
3541 // %lhs = extend %a to output_type
3542 // %rhs = extend %b to output_type
3543 // %mul = mul %lhs, %rhs
3544
3545 // input_type | output_type | instructions
3546 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3547 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3548 // | | %low_low = i32x4.ext_low_i16x8_ %low
3549 // | | %low_high = i32x4.ext_high_i16x8_ %low
3550 // | | %high_low = i32x4.ext_low_i16x8_ %high
3551 // | | %high_high = i32x4.ext_high_i16x8_ %high
3552 // | | %res = concat_vector(...)
3553 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3554 // | | %low_low = i32x4.ext_low_i16x8_ %low
3555 // | | %low_high = i32x4.ext_high_i16x8_ %low
3556 // | | %res = concat_vector(%low_low, %low_high)
3557
3558 SDLoc DL(N);
3559 unsigned NumElts = VT.getVectorNumElements();
3560 SDValue ExtendInLHS = LHS->getOperand(0);
3561 SDValue ExtendInRHS = RHS->getOperand(0);
3562 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3563 unsigned ExtendLowOpc =
3564 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3565 unsigned ExtendHighOpc =
3566 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3567
3568 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3569 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3570 };
3571 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3572 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3573 };
3574
3575 if (NumElts == 16) {
3576 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3577 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3578 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3579 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3580 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3581 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3582 SDValue SubVectors[] = {
3583 GetExtendLow(MVT::v4i32, MulLow),
3584 GetExtendHigh(MVT::v4i32, MulLow),
3585 GetExtendLow(MVT::v4i32, MulHigh),
3586 GetExtendHigh(MVT::v4i32, MulHigh),
3587 };
3588 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3589 } else {
3590 assert(NumElts == 8);
3591 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3592 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3593 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3594 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3595 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3596 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3597 }
3598 return SDValue();
3599}
3600
3603 assert(N->getOpcode() == ISD::MUL);
3604 EVT VT = N->getValueType(0);
3605 if (!VT.isVector())
3606 return SDValue();
3607
3608 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3609 return Res;
3610
3611 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3612 // extend them to v8i16.
3613 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3614 return SDValue();
3615
3616 SDLoc DL(N);
3617 SelectionDAG &DAG = DCI.DAG;
3618 SDValue LHS = N->getOperand(0);
3619 SDValue RHS = N->getOperand(1);
3620 EVT MulVT = MVT::v8i16;
3621
3622 if (VT == MVT::v8i8) {
3623 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3624 DAG.getUNDEF(MVT::v8i8));
3625 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3626 DAG.getUNDEF(MVT::v8i8));
3627 SDValue LowLHS =
3628 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3629 SDValue LowRHS =
3630 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3631 SDValue MulLow = DAG.getBitcast(
3632 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3633 // Take the low byte of each lane.
3634 SDValue Shuffle = DAG.getVectorShuffle(
3635 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3636 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3637 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3638 } else {
3639 assert(VT == MVT::v16i8 && "Expected v16i8");
3640 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3641 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3642 SDValue HighLHS =
3643 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3644 SDValue HighRHS =
3645 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3646
3647 SDValue MulLow =
3648 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3649 SDValue MulHigh =
3650 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3651
3652 // Take the low byte of each lane.
3653 return DAG.getVectorShuffle(
3654 VT, DL, MulLow, MulHigh,
3655 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3656 }
3657}
3658
3659SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3660 SelectionDAG &DAG) {
3661 SDLoc DL(In);
3662 LLVMContext &Ctx = *DAG.getContext();
3663 EVT InVT = In.getValueType();
3664 unsigned NumElems = InVT.getVectorNumElements() * 2;
3665 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3666 SDValue Concat =
3667 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3668 if (NumElems < RequiredNumElems) {
3669 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3670 }
3671 return Concat;
3672}
3673
3675 EVT OutVT = N->getValueType(0);
3676 if (!OutVT.isVector())
3677 return SDValue();
3678
3679 EVT OutElTy = OutVT.getVectorElementType();
3680 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3681 return SDValue();
3682
3683 unsigned NumElems = OutVT.getVectorNumElements();
3684 if (!isPowerOf2_32(NumElems))
3685 return SDValue();
3686
3687 EVT FPVT = N->getOperand(0)->getValueType(0);
3688 if (FPVT.getVectorElementType() != MVT::f32)
3689 return SDValue();
3690
3691 SDLoc DL(N);
3692
3693 // First, convert to i32.
3694 LLVMContext &Ctx = *DAG.getContext();
3695 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3696 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3698 OutVT.getScalarSizeInBits());
3699 // Mask out the top MSBs.
3700 SDValue Masked =
3701 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3702
3703 if (OutVT.getSizeInBits() < 128) {
3704 // Create a wide enough vector that we can use narrow.
3705 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3706 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3707 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3708 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3709 return DAG.getBitcast(
3710 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3711 } else {
3712 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3713 }
3714 return SDValue();
3715}
3716
3717SDValue
3718WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3719 DAGCombinerInfo &DCI) const {
3720 switch (N->getOpcode()) {
3721 default:
3722 return SDValue();
3723 case ISD::BITCAST:
3724 return performBitcastCombine(N, DCI);
3725 case ISD::SETCC:
3726 return performSETCCCombine(N, DCI, Subtarget);
3728 return performVECTOR_SHUFFLECombine(N, DCI);
3729 case ISD::SIGN_EXTEND:
3730 case ISD::ZERO_EXTEND:
3731 return performVectorExtendCombine(N, DCI);
3732 case ISD::UINT_TO_FP:
3733 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3734 return ExtCombine;
3735 return performVectorNonNegToFPCombine(N, DCI);
3736 case ISD::SINT_TO_FP:
3737 return performVectorExtendToFPCombine(N, DCI);
3740 case ISD::FP_ROUND:
3742 return performVectorTruncZeroCombine(N, DCI);
3743 case ISD::FP_TO_SINT:
3744 case ISD::FP_TO_UINT:
3745 return performConvertFPCombine(N, DCI.DAG);
3746 case ISD::TRUNCATE:
3747 return performTruncateCombine(N, DCI);
3749 return performAnyAllCombine(N, DCI.DAG);
3750 case ISD::MUL:
3751 return performMulCombine(N, DCI);
3752 }
3753}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:712
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2088
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:444
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.