LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
161 if (Subtarget->hasFP16() && T == MVT::f32) {
163 setTruncStoreAction(T, MVT::f16, Legal);
164 } else {
166 setTruncStoreAction(T, MVT::f16, Expand);
167 }
168 }
169
170 // Expand unavailable integer operations.
171 for (auto Op :
175 for (auto T : {MVT::i32, MVT::i64})
177 if (Subtarget->hasSIMD128())
178 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
180 }
181
182 if (Subtarget->hasWideArithmetic()) {
188 }
189
190 if (Subtarget->hasNontrappingFPToInt())
192 for (auto T : {MVT::i32, MVT::i64})
194
195 if (Subtarget->hasRelaxedSIMD()) {
198 {MVT::v4f32, MVT::v2f64}, Custom);
199 }
200 // SIMD-specific configuration
201 if (Subtarget->hasSIMD128()) {
202
204
205 // Combine wide-vector muls, with extend inputs, to extmul_half.
208
209 // Combine vector mask reductions into alltrue/anytrue
211
212 // Convert vector to integer bitcasts to bitmask
214
215 // Hoist bitcasts out of shuffles
217
218 // Combine extends of extract_subvectors into widening ops
220
221 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
222 // conversions ops
225
226 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
227 // into conversion ops
231
233
234 // Support saturating add/sub for i8x16 and i16x8
236 for (auto T : {MVT::v16i8, MVT::v8i16})
238
239 // Support integer abs
240 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
242
243 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
244 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
245 MVT::v2f64})
247
248 if (Subtarget->hasFP16()) {
251 }
252
253 // We have custom shuffle lowering to expose the shuffle mask
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 if (Subtarget->hasFP16())
260
261 // Support splatting
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
263 MVT::v2f64})
265
266 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
267
268 // Custom lowering since wasm shifts must have a scalar shift amount
269 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
270 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
272
273 // Custom lower lane accesses to expand out variable indices
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
278
279 // There is no i8x16.mul instruction
280 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
281
282 // Expand integer operations supported for scalars but not SIMD
283 for (auto Op :
285 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
287
288 // But we do have integer min and max operations
289 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
290 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
292
293 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
294 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
295 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
296 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
297
298 // Custom lower bit counting operations for other types to scalarize them.
299 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
300 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
302
303 // Expand float operations supported for scalars but not SIMD
306 for (auto T : {MVT::v4f32, MVT::v2f64})
308
309 // Unsigned comparison operations are unavailable for i64x2 vectors.
311 setCondCodeAction(CC, MVT::v2i64, Custom);
312
313 // 64x2 conversions are not in the spec
314 for (auto Op :
316 for (auto T : {MVT::v2i64, MVT::v2f64})
318
319 // But saturating fp_to_int converstions are
321 setOperationAction(Op, MVT::v4i32, Custom);
322 if (Subtarget->hasFP16()) {
323 setOperationAction(Op, MVT::v8i16, Custom);
324 }
325 }
326
327 // Support vector extending
332 }
333
334 if (Subtarget->hasFP16()) {
335 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
336 }
337
338 if (Subtarget->hasRelaxedSIMD()) {
341 }
342
343 // Partial MLA reductions.
345 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
346 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
347 }
348 }
349
350 // As a special case, these operators use the type to mean the type to
351 // sign-extend from.
353 if (!Subtarget->hasSignExt()) {
354 // Sign extends are legal only when extending a vector extract
355 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
356 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
358 }
361
362 // Dynamic stack allocation: use the default expansion.
366
370
371 // Expand these forms; we pattern-match the forms that we can handle in isel.
372 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
373 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
375
376 if (Subtarget->hasReferenceTypes())
377 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
378 for (auto T : {MVT::externref, MVT::funcref})
380
381 // There is no vector conditional select instruction
382 for (auto T :
383 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
385
386 // We have custom switch handling.
388
389 // WebAssembly doesn't have:
390 // - Floating-point extending loads.
391 // - Floating-point truncating stores.
392 // - i1 extending loads.
393 // - truncating SIMD stores and most extending loads
394 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
395 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
396 for (auto T : MVT::integer_valuetypes())
397 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
398 setLoadExtAction(Ext, T, MVT::i1, Promote);
399 if (Subtarget->hasSIMD128()) {
400 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
401 MVT::v2f64}) {
402 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
403 if (MVT(T) != MemT) {
405 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
406 setLoadExtAction(Ext, T, MemT, Expand);
407 }
408 }
409 }
410 // But some vector extending loads are legal
411 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
412 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
413 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
414 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
415 }
416 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
417 }
418
419 // Don't do anything clever with build_pairs
421
422 // Trap lowers to wasm unreachable
423 setOperationAction(ISD::TRAP, MVT::Other, Legal);
425
426 // Exception handling intrinsics
430
432
433 // Always convert switches to br_tables unless there is only one case, which
434 // is equivalent to a simple branch. This reduces code size for wasm, and we
435 // defer possible jump table optimizations to the VM.
437}
438
447
456
458WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
459 const AtomicRMWInst *AI) const {
460 // We have wasm instructions for these
461 switch (AI->getOperation()) {
469 default:
470 break;
471 }
473}
474
475bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
476 // Implementation copied from X86TargetLowering.
477 unsigned Opc = VecOp.getOpcode();
478
479 // Assume target opcodes can't be scalarized.
480 // TODO - do we have any exceptions?
482 return false;
483
484 // If the vector op is not supported, try to convert to scalar.
485 EVT VecVT = VecOp.getValueType();
487 return true;
488
489 // If the vector op is supported, but the scalar op is not, the transform may
490 // not be worthwhile.
491 EVT ScalarVT = VecVT.getScalarType();
492 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
493}
494
495FastISel *WebAssemblyTargetLowering::createFastISel(
496 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
497 const LibcallLoweringInfo *LibcallLowering) const {
498 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
499}
500
501MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
502 EVT VT) const {
503 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
504 if (BitWidth > 1 && BitWidth < 8)
505 BitWidth = 8;
506
507 if (BitWidth > 64) {
508 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
509 // the count to be an i32.
510 BitWidth = 32;
512 "32-bit shift counts ought to be enough for anyone");
513 }
514
517 "Unable to represent scalar shift amount type");
518 return Result;
519}
520
521// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
522// undefined result on invalid/overflow, to the WebAssembly opcode, which
523// traps on invalid/overflow.
526 const TargetInstrInfo &TII,
527 bool IsUnsigned, bool Int64,
528 bool Float64, unsigned LoweredOpcode) {
530
531 Register OutReg = MI.getOperand(0).getReg();
532 Register InReg = MI.getOperand(1).getReg();
533
534 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
535 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
536 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
537 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
538 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
539 unsigned Eqz = WebAssembly::EQZ_I32;
540 unsigned And = WebAssembly::AND_I32;
541 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
542 int64_t Substitute = IsUnsigned ? 0 : Limit;
543 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
544 auto &Context = BB->getParent()->getFunction().getContext();
545 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
546
547 const BasicBlock *LLVMBB = BB->getBasicBlock();
548 MachineFunction *F = BB->getParent();
549 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
550 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
551 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
552
554 F->insert(It, FalseMBB);
555 F->insert(It, TrueMBB);
556 F->insert(It, DoneMBB);
557
558 // Transfer the remainder of BB and its successor edges to DoneMBB.
559 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
561
562 BB->addSuccessor(TrueMBB);
563 BB->addSuccessor(FalseMBB);
564 TrueMBB->addSuccessor(DoneMBB);
565 FalseMBB->addSuccessor(DoneMBB);
566
567 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
568 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
569 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
570 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
571 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
572 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
573 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
574
575 MI.eraseFromParent();
576 // For signed numbers, we can do a single comparison to determine whether
577 // fabs(x) is within range.
578 if (IsUnsigned) {
579 Tmp0 = InReg;
580 } else {
581 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
582 }
583 BuildMI(BB, DL, TII.get(FConst), Tmp1)
584 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
585 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
586
587 // For unsigned numbers, we have to do a separate comparison with zero.
588 if (IsUnsigned) {
589 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
590 Register SecondCmpReg =
591 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
592 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
593 BuildMI(BB, DL, TII.get(FConst), Tmp1)
594 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
595 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
596 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
597 CmpReg = AndReg;
598 }
599
600 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
601
602 // Create the CFG diamond to select between doing the conversion or using
603 // the substitute value.
604 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
605 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
606 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
607 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
608 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
609 .addReg(FalseReg)
610 .addMBB(FalseMBB)
611 .addReg(TrueReg)
612 .addMBB(TrueMBB);
613
614 return DoneMBB;
615}
616
617// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
618// instuction to handle the zero-length case.
621 const TargetInstrInfo &TII, bool Int64) {
623
624 MachineOperand DstMem = MI.getOperand(0);
625 MachineOperand SrcMem = MI.getOperand(1);
626 MachineOperand Dst = MI.getOperand(2);
627 MachineOperand Src = MI.getOperand(3);
628 MachineOperand Len = MI.getOperand(4);
629
630 // If the length is a constant, we don't actually need the check.
631 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
632 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
633 Def->getOpcode() == WebAssembly::CONST_I64) {
634 if (Def->getOperand(1).getImm() == 0) {
635 // A zero-length memcpy is a no-op.
636 MI.eraseFromParent();
637 return BB;
638 }
639 // A non-zero-length memcpy doesn't need a zero check.
640 unsigned MemoryCopy =
641 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
642 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
643 .add(DstMem)
644 .add(SrcMem)
645 .add(Dst)
646 .add(Src)
647 .add(Len);
648 MI.eraseFromParent();
649 return BB;
650 }
651 }
652
653 // We're going to add an extra use to `Len` to test if it's zero; that
654 // use shouldn't be a kill, even if the original use is.
655 MachineOperand NoKillLen = Len;
656 NoKillLen.setIsKill(false);
657
658 // Decide on which `MachineInstr` opcode we're going to use.
659 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
660 unsigned MemoryCopy =
661 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
662
663 // Create two new basic blocks; one for the new `memory.fill` that we can
664 // branch over, and one for the rest of the instructions after the original
665 // `memory.fill`.
666 const BasicBlock *LLVMBB = BB->getBasicBlock();
667 MachineFunction *F = BB->getParent();
668 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
669 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
670
672 F->insert(It, TrueMBB);
673 F->insert(It, DoneMBB);
674
675 // Transfer the remainder of BB and its successor edges to DoneMBB.
676 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
678
679 // Connect the CFG edges.
680 BB->addSuccessor(TrueMBB);
681 BB->addSuccessor(DoneMBB);
682 TrueMBB->addSuccessor(DoneMBB);
683
684 // Create a virtual register for the `Eqz` result.
685 unsigned EqzReg;
686 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
687
688 // Erase the original `memory.copy`.
689 MI.eraseFromParent();
690
691 // Test if `Len` is zero.
692 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
693
694 // Insert a new `memory.copy`.
695 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
696 .add(DstMem)
697 .add(SrcMem)
698 .add(Dst)
699 .add(Src)
700 .add(Len);
701
702 // Create the CFG triangle.
703 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
704 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
705
706 return DoneMBB;
707}
708
709// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
710// instuction to handle the zero-length case.
713 const TargetInstrInfo &TII, bool Int64) {
715
716 MachineOperand Mem = MI.getOperand(0);
717 MachineOperand Dst = MI.getOperand(1);
718 MachineOperand Val = MI.getOperand(2);
719 MachineOperand Len = MI.getOperand(3);
720
721 // If the length is a constant, we don't actually need the check.
722 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
723 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
724 Def->getOpcode() == WebAssembly::CONST_I64) {
725 if (Def->getOperand(1).getImm() == 0) {
726 // A zero-length memset is a no-op.
727 MI.eraseFromParent();
728 return BB;
729 }
730 // A non-zero-length memset doesn't need a zero check.
731 unsigned MemoryFill =
732 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
733 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
734 .add(Mem)
735 .add(Dst)
736 .add(Val)
737 .add(Len);
738 MI.eraseFromParent();
739 return BB;
740 }
741 }
742
743 // We're going to add an extra use to `Len` to test if it's zero; that
744 // use shouldn't be a kill, even if the original use is.
745 MachineOperand NoKillLen = Len;
746 NoKillLen.setIsKill(false);
747
748 // Decide on which `MachineInstr` opcode we're going to use.
749 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
750 unsigned MemoryFill =
751 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
752
753 // Create two new basic blocks; one for the new `memory.fill` that we can
754 // branch over, and one for the rest of the instructions after the original
755 // `memory.fill`.
756 const BasicBlock *LLVMBB = BB->getBasicBlock();
757 MachineFunction *F = BB->getParent();
758 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
759 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
760
762 F->insert(It, TrueMBB);
763 F->insert(It, DoneMBB);
764
765 // Transfer the remainder of BB and its successor edges to DoneMBB.
766 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
768
769 // Connect the CFG edges.
770 BB->addSuccessor(TrueMBB);
771 BB->addSuccessor(DoneMBB);
772 TrueMBB->addSuccessor(DoneMBB);
773
774 // Create a virtual register for the `Eqz` result.
775 unsigned EqzReg;
776 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
777
778 // Erase the original `memory.fill`.
779 MI.eraseFromParent();
780
781 // Test if `Len` is zero.
782 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
783
784 // Insert a new `memory.copy`.
785 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
786
787 // Create the CFG triangle.
788 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
789 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
790
791 return DoneMBB;
792}
793
794static MachineBasicBlock *
796 const WebAssemblySubtarget *Subtarget,
797 const TargetInstrInfo &TII) {
798 MachineInstr &CallParams = *CallResults.getPrevNode();
799 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
800 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
801 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
802
803 bool IsIndirect =
804 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
805 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
806
807 bool IsFuncrefCall = false;
808 if (IsIndirect && CallParams.getOperand(0).isReg()) {
809 Register Reg = CallParams.getOperand(0).getReg();
810 const MachineFunction *MF = BB->getParent();
811 const MachineRegisterInfo &MRI = MF->getRegInfo();
812 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
813 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
814 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
815 }
816
817 unsigned CallOp;
818 if (IsIndirect && IsRetCall) {
819 CallOp = WebAssembly::RET_CALL_INDIRECT;
820 } else if (IsIndirect) {
821 CallOp = WebAssembly::CALL_INDIRECT;
822 } else if (IsRetCall) {
823 CallOp = WebAssembly::RET_CALL;
824 } else {
825 CallOp = WebAssembly::CALL;
826 }
827
828 MachineFunction &MF = *BB->getParent();
829 const MCInstrDesc &MCID = TII.get(CallOp);
830 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
831
832 // Move the function pointer to the end of the arguments for indirect calls
833 if (IsIndirect) {
834 auto FnPtr = CallParams.getOperand(0);
835 CallParams.removeOperand(0);
836
837 // For funcrefs, call_indirect is done through __funcref_call_table and the
838 // funcref is always installed in slot 0 of the table, therefore instead of
839 // having the function pointer added at the end of the params list, a zero
840 // (the index in
841 // __funcref_call_table is added).
842 if (IsFuncrefCall) {
843 Register RegZero =
844 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
845 MachineInstrBuilder MIBC0 =
846 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
847
848 BB->insert(CallResults.getIterator(), MIBC0);
849 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
850 } else
851 CallParams.addOperand(FnPtr);
852 }
853
854 for (auto Def : CallResults.defs())
855 MIB.add(Def);
856
857 if (IsIndirect) {
858 // Placeholder for the type index.
859 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
860 MIB.addImm(0);
861 // The table into which this call_indirect indexes.
862 MCSymbolWasm *Table = IsFuncrefCall
864 MF.getContext(), Subtarget)
866 MF.getContext(), Subtarget);
867 if (Subtarget->hasCallIndirectOverlong()) {
868 MIB.addSym(Table);
869 } else {
870 // For the MVP there is at most one table whose number is 0, but we can't
871 // write a table symbol or issue relocations. Instead we just ensure the
872 // table is live and write a zero.
873 Table->setNoStrip();
874 MIB.addImm(0);
875 }
876 }
877
878 for (auto Use : CallParams.uses())
879 MIB.add(Use);
880
881 BB->insert(CallResults.getIterator(), MIB);
882 CallParams.eraseFromParent();
883 CallResults.eraseFromParent();
884
885 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
886 // table slot with ref.null upon call_indirect return.
887 //
888 // This generates the following code, which comes right after a call_indirect
889 // of a funcref:
890 //
891 // i32.const 0
892 // ref.null func
893 // table.set __funcref_call_table
894 if (IsIndirect && IsFuncrefCall) {
896 MF.getContext(), Subtarget);
897 Register RegZero =
898 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
899 MachineInstr *Const0 =
900 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
901 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
902
903 Register RegFuncref =
904 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
905 MachineInstr *RefNull =
906 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
907 BB->insertAfter(Const0->getIterator(), RefNull);
908
909 MachineInstr *TableSet =
910 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
911 .addSym(Table)
912 .addReg(RegZero)
913 .addReg(RegFuncref);
914 BB->insertAfter(RefNull->getIterator(), TableSet);
915 }
916
917 return BB;
918}
919
920MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
921 MachineInstr &MI, MachineBasicBlock *BB) const {
922 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
923 DebugLoc DL = MI.getDebugLoc();
924
925 switch (MI.getOpcode()) {
926 default:
927 llvm_unreachable("Unexpected instr type to insert");
928 case WebAssembly::FP_TO_SINT_I32_F32:
929 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
930 WebAssembly::I32_TRUNC_S_F32);
931 case WebAssembly::FP_TO_UINT_I32_F32:
932 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
933 WebAssembly::I32_TRUNC_U_F32);
934 case WebAssembly::FP_TO_SINT_I64_F32:
935 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
936 WebAssembly::I64_TRUNC_S_F32);
937 case WebAssembly::FP_TO_UINT_I64_F32:
938 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
939 WebAssembly::I64_TRUNC_U_F32);
940 case WebAssembly::FP_TO_SINT_I32_F64:
941 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
942 WebAssembly::I32_TRUNC_S_F64);
943 case WebAssembly::FP_TO_UINT_I32_F64:
944 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
945 WebAssembly::I32_TRUNC_U_F64);
946 case WebAssembly::FP_TO_SINT_I64_F64:
947 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
948 WebAssembly::I64_TRUNC_S_F64);
949 case WebAssembly::FP_TO_UINT_I64_F64:
950 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
951 WebAssembly::I64_TRUNC_U_F64);
952 case WebAssembly::MEMCPY_A32:
953 return LowerMemcpy(MI, DL, BB, TII, false);
954 case WebAssembly::MEMCPY_A64:
955 return LowerMemcpy(MI, DL, BB, TII, true);
956 case WebAssembly::MEMSET_A32:
957 return LowerMemset(MI, DL, BB, TII, false);
958 case WebAssembly::MEMSET_A64:
959 return LowerMemset(MI, DL, BB, TII, true);
960 case WebAssembly::CALL_RESULTS:
961 case WebAssembly::RET_CALL_RESULTS:
962 return LowerCallResults(MI, DL, BB, Subtarget, TII);
963 }
964}
965
966std::pair<unsigned, const TargetRegisterClass *>
967WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
968 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
969 // First, see if this is a constraint that directly corresponds to a
970 // WebAssembly register class.
971 if (Constraint.size() == 1) {
972 switch (Constraint[0]) {
973 case 'r':
974 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
975 if (Subtarget->hasSIMD128() && VT.isVector()) {
976 if (VT.getSizeInBits() == 128)
977 return std::make_pair(0U, &WebAssembly::V128RegClass);
978 }
979 if (VT.isInteger() && !VT.isVector()) {
980 if (VT.getSizeInBits() <= 32)
981 return std::make_pair(0U, &WebAssembly::I32RegClass);
982 if (VT.getSizeInBits() <= 64)
983 return std::make_pair(0U, &WebAssembly::I64RegClass);
984 }
985 if (VT.isFloatingPoint() && !VT.isVector()) {
986 switch (VT.getSizeInBits()) {
987 case 32:
988 return std::make_pair(0U, &WebAssembly::F32RegClass);
989 case 64:
990 return std::make_pair(0U, &WebAssembly::F64RegClass);
991 default:
992 break;
993 }
994 }
995 break;
996 default:
997 break;
998 }
999 }
1000
1001 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1002}
1003
1004bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1005 // Assume ctz is a relatively cheap operation.
1006 return true;
1007}
1008
1009bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1010 // Assume clz is a relatively cheap operation.
1011 return true;
1012}
1013
1014bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1015 const AddrMode &AM,
1016 Type *Ty, unsigned AS,
1017 Instruction *I) const {
1018 // WebAssembly offsets are added as unsigned without wrapping. The
1019 // isLegalAddressingMode gives us no way to determine if wrapping could be
1020 // happening, so we approximate this by accepting only non-negative offsets.
1021 if (AM.BaseOffs < 0)
1022 return false;
1023
1024 // WebAssembly has no scale register operands.
1025 if (AM.Scale != 0)
1026 return false;
1027
1028 // Everything else is legal.
1029 return true;
1030}
1031
1032bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1033 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1034 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1035 // WebAssembly supports unaligned accesses, though it should be declared
1036 // with the p2align attribute on loads and stores which do so, and there
1037 // may be a performance impact. We tell LLVM they're "fast" because
1038 // for the kinds of things that LLVM uses this for (merging adjacent stores
1039 // of constants, etc.), WebAssembly implementations will either want the
1040 // unaligned access or they'll split anyway.
1041 if (Fast)
1042 *Fast = 1;
1043 return true;
1044}
1045
1046bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1047 AttributeList Attr) const {
1048 // The current thinking is that wasm engines will perform this optimization,
1049 // so we can save on code size.
1050 return true;
1051}
1052
1053bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1054 EVT ExtT = ExtVal.getValueType();
1055 SDValue N0 = ExtVal->getOperand(0);
1056 if (N0.getOpcode() == ISD::FREEZE)
1057 N0 = N0.getOperand(0);
1058 auto *Load = dyn_cast<LoadSDNode>(N0);
1059 if (!Load)
1060 return false;
1061 EVT MemT = Load->getValueType(0);
1062 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1063 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1064 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1065}
1066
1067bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1068 const GlobalAddressSDNode *GA) const {
1069 // Wasm doesn't support function addresses with offsets
1070 const GlobalValue *GV = GA->getGlobal();
1072}
1073
1074EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1075 LLVMContext &C,
1076 EVT VT) const {
1077 if (VT.isVector()) {
1078 if (VT.getVectorElementType() == MVT::f16 && !Subtarget->hasFP16())
1079 return VT.changeElementType(C, MVT::i1);
1080
1082 }
1083
1084 // So far, all branch instructions in Wasm take an I32 condition.
1085 // The default TargetLowering::getSetCCResultType returns the pointer size,
1086 // which would be useful to reduce instruction counts when testing
1087 // against 64-bit pointers/values if at some point Wasm supports that.
1088 return EVT::getIntegerVT(C, 32);
1089}
1090
1091void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1093 MachineFunction &MF, unsigned Intrinsic) const {
1095 switch (Intrinsic) {
1096 case Intrinsic::wasm_memory_atomic_notify:
1098 Info.memVT = MVT::i32;
1099 Info.ptrVal = I.getArgOperand(0);
1100 Info.offset = 0;
1101 Info.align = Align(4);
1102 // atomic.notify instruction does not really load the memory specified with
1103 // this argument, but MachineMemOperand should either be load or store, so
1104 // we set this to a load.
1105 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1106 // instructions are treated as volatiles in the backend, so we should be
1107 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1109 Infos.push_back(Info);
1110 return;
1111 case Intrinsic::wasm_memory_atomic_wait32:
1113 Info.memVT = MVT::i32;
1114 Info.ptrVal = I.getArgOperand(0);
1115 Info.offset = 0;
1116 Info.align = Align(4);
1118 Infos.push_back(Info);
1119 return;
1120 case Intrinsic::wasm_memory_atomic_wait64:
1122 Info.memVT = MVT::i64;
1123 Info.ptrVal = I.getArgOperand(0);
1124 Info.offset = 0;
1125 Info.align = Align(8);
1127 Infos.push_back(Info);
1128 return;
1129 case Intrinsic::wasm_loadf16_f32:
1131 Info.memVT = MVT::f16;
1132 Info.ptrVal = I.getArgOperand(0);
1133 Info.offset = 0;
1134 Info.align = Align(2);
1136 Infos.push_back(Info);
1137 return;
1138 case Intrinsic::wasm_storef16_f32:
1140 Info.memVT = MVT::f16;
1141 Info.ptrVal = I.getArgOperand(1);
1142 Info.offset = 0;
1143 Info.align = Align(2);
1145 Infos.push_back(Info);
1146 return;
1147 default:
1148 return;
1149 }
1150}
1151
1152void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1153 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1154 const SelectionDAG &DAG, unsigned Depth) const {
1155 switch (Op.getOpcode()) {
1156 default:
1157 break;
1159 unsigned IntNo = Op.getConstantOperandVal(0);
1160 switch (IntNo) {
1161 default:
1162 break;
1163 case Intrinsic::wasm_bitmask: {
1164 unsigned BitWidth = Known.getBitWidth();
1165 EVT VT = Op.getOperand(1).getSimpleValueType();
1166 unsigned PossibleBits = VT.getVectorNumElements();
1167 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1168 Known.Zero |= ZeroMask;
1169 break;
1170 }
1171 }
1172 break;
1173 }
1174 case WebAssemblyISD::EXTEND_LOW_U:
1175 case WebAssemblyISD::EXTEND_HIGH_U: {
1176 // We know the high half, of each destination vector element, will be zero.
1177 SDValue SrcOp = Op.getOperand(0);
1178 EVT VT = SrcOp.getSimpleValueType();
1179 unsigned BitWidth = Known.getBitWidth();
1180 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1181 assert(BitWidth >= 8 && "Unexpected width!");
1183 Known.Zero |= Mask;
1184 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1185 assert(BitWidth >= 16 && "Unexpected width!");
1187 Known.Zero |= Mask;
1188 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1189 assert(BitWidth >= 32 && "Unexpected width!");
1191 Known.Zero |= Mask;
1192 }
1193 break;
1194 }
1195 // For 128-bit addition if the upper bits are all zero then it's known that
1196 // the upper bits of the result will have all bits guaranteed zero except the
1197 // first.
1198 case WebAssemblyISD::I64_ADD128:
1199 if (Op.getResNo() == 1) {
1200 SDValue LHS_HI = Op.getOperand(1);
1201 SDValue RHS_HI = Op.getOperand(3);
1202 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1203 Known.Zero.setBitsFrom(1);
1204 }
1205 break;
1206 }
1207}
1208
1210WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1211 if (VT.isFixedLengthVector()) {
1212 MVT EltVT = VT.getVectorElementType();
1213 // We have legal vector types with these lane types, so widening the
1214 // vector would let us use some of the lanes directly without having to
1215 // extend or truncate values.
1216 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1217 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1218 return TypeWidenVector;
1219 }
1220
1222}
1223
1224bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1225 const MachineFunction &MF, EVT VT) const {
1226 if (!Subtarget->hasFP16() || !VT.isVector())
1227 return false;
1228
1229 EVT ScalarVT = VT.getScalarType();
1230 if (!ScalarVT.isSimple())
1231 return false;
1232
1233 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1234}
1235
1236bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1237 SDValue Op, const TargetLoweringOpt &TLO) const {
1238 // ISel process runs DAGCombiner after legalization; this step is called
1239 // SelectionDAG optimization phase. This post-legalization combining process
1240 // runs DAGCombiner on each node, and if there was a change to be made,
1241 // re-runs legalization again on it and its user nodes to make sure
1242 // everythiing is in a legalized state.
1243 //
1244 // The legalization calls lowering routines, and we do our custom lowering for
1245 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1246 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1247 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1248 // turns unused vector elements into undefs. But this routine does not work
1249 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1250 // combination can result in a infinite loop, in which undefs are converted to
1251 // zeros in legalization and back to undefs in combining.
1252 //
1253 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1254 // running for build_vectors.
1255 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1256 return false;
1257 return true;
1258}
1259
1260//===----------------------------------------------------------------------===//
1261// WebAssembly Lowering private implementation.
1262//===----------------------------------------------------------------------===//
1263
1264//===----------------------------------------------------------------------===//
1265// Lowering Code
1266//===----------------------------------------------------------------------===//
1267
1268static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1270 DAG.getContext()->diagnose(
1271 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1272}
1273
1274// Test whether the given calling convention is supported.
1276 // We currently support the language-independent target-independent
1277 // conventions. We don't yet have a way to annotate calls with properties like
1278 // "cold", and we don't have any call-clobbered registers, so these are mostly
1279 // all handled the same.
1280 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1281 CallConv == CallingConv::Cold ||
1282 CallConv == CallingConv::PreserveMost ||
1283 CallConv == CallingConv::PreserveAll ||
1284 CallConv == CallingConv::CXX_FAST_TLS ||
1286 CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail;
1287}
1288
1289SDValue
1290WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1291 SmallVectorImpl<SDValue> &InVals) const {
1292 SelectionDAG &DAG = CLI.DAG;
1293 SDLoc DL = CLI.DL;
1294 SDValue Chain = CLI.Chain;
1295 SDValue Callee = CLI.Callee;
1296 MachineFunction &MF = DAG.getMachineFunction();
1297 auto Layout = MF.getDataLayout();
1298
1299 CallingConv::ID CallConv = CLI.CallConv;
1300 if (!callingConvSupported(CallConv))
1301 fail(DL, DAG,
1302 "WebAssembly doesn't support language-specific or target-specific "
1303 "calling conventions yet");
1304 if (CLI.IsPatchPoint)
1305 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1306
1307 if (CLI.IsTailCall) {
1308 auto NoTail = [&](const char *Msg) {
1309 if (CLI.CB && CLI.CB->isMustTailCall())
1310 fail(DL, DAG, Msg);
1311 CLI.IsTailCall = false;
1312 };
1313
1314 if (!Subtarget->hasTailCall())
1315 NoTail("WebAssembly 'tail-call' feature not enabled");
1316
1317 // Varargs calls cannot be tail calls because the buffer is on the stack
1318 if (CLI.IsVarArg)
1319 NoTail("WebAssembly does not support varargs tail calls");
1320
1321 // Do not tail call unless caller and callee return types match
1322 const Function &F = MF.getFunction();
1323 const TargetMachine &TM = getTargetMachine();
1324 Type *RetTy = F.getReturnType();
1325 SmallVector<MVT, 4> CallerRetTys;
1326 SmallVector<MVT, 4> CalleeRetTys;
1327 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1328 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1329 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1330 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1331 CalleeRetTys.begin());
1332 if (!TypesMatch)
1333 NoTail("WebAssembly tail call requires caller and callee return types to "
1334 "match");
1335
1336 // If pointers to local stack values are passed, we cannot tail call
1337 if (CLI.CB) {
1338 for (auto &Arg : CLI.CB->args()) {
1339 Value *Val = Arg.get();
1340 // Trace the value back through pointer operations
1341 while (true) {
1342 Value *Src = Val->stripPointerCastsAndAliases();
1343 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1344 Src = GEP->getPointerOperand();
1345 if (Val == Src)
1346 break;
1347 Val = Src;
1348 }
1349 if (isa<AllocaInst>(Val)) {
1350 NoTail(
1351 "WebAssembly does not support tail calling with stack arguments");
1352 break;
1353 }
1354 }
1355 }
1356 }
1357
1358 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1359 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1360 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1361
1362 // The generic code may have added an sret argument. If we're lowering an
1363 // invoke function, the ABI requires that the function pointer be the first
1364 // argument, so we may have to swap the arguments.
1365 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1366 Outs[0].Flags.isSRet()) {
1367 std::swap(Outs[0], Outs[1]);
1368 std::swap(OutVals[0], OutVals[1]);
1369 }
1370
1371 bool HasSwiftSelfArg = false;
1372 bool HasSwiftErrorArg = false;
1373 bool HasSwiftAsyncArg = false;
1374 unsigned NumFixedArgs = 0;
1375 for (unsigned I = 0; I < Outs.size(); ++I) {
1376 const ISD::OutputArg &Out = Outs[I];
1377 SDValue &OutVal = OutVals[I];
1378 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1379 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1380 HasSwiftAsyncArg |= Out.Flags.isSwiftAsync();
1381 if (Out.Flags.isNest())
1382 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1383 if (Out.Flags.isInAlloca())
1384 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1385 if (Out.Flags.isInConsecutiveRegs())
1386 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1388 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1389 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1390 auto &MFI = MF.getFrameInfo();
1391 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1393 /*isSS=*/false);
1394 SDValue SizeNode =
1395 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1396 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1397 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1399 /*isVolatile*/ false, /*AlwaysInline=*/false,
1400 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1401 MachinePointerInfo());
1402 OutVal = FINode;
1403 }
1404 // Count the number of fixed args *after* legalization.
1405 NumFixedArgs += !Out.Flags.isVarArg();
1406 }
1407
1408 bool IsVarArg = CLI.IsVarArg;
1409 auto PtrVT = getPointerTy(Layout);
1410
1411 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1412 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1413 // arguments are also added for callee signature. They are necessary to match
1414 // callee and caller signature for indirect call.
1415 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1416 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1417 if (!HasSwiftSelfArg) {
1418 NumFixedArgs++;
1419 ISD::ArgFlagsTy Flags;
1420 Flags.setSwiftSelf();
1421 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1422 CLI.Outs.push_back(Arg);
1423 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1424 CLI.OutVals.push_back(ArgVal);
1425 }
1426 if (!HasSwiftErrorArg) {
1427 NumFixedArgs++;
1428 ISD::ArgFlagsTy Flags;
1429 Flags.setSwiftError();
1430 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1431 CLI.Outs.push_back(Arg);
1432 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1433 CLI.OutVals.push_back(ArgVal);
1434 }
1435 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1436 NumFixedArgs++;
1437 ISD::ArgFlagsTy Flags;
1438 Flags.setSwiftAsync();
1439 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1440 CLI.Outs.push_back(Arg);
1441 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1442 CLI.OutVals.push_back(ArgVal);
1443 }
1444 }
1445
1446 // Analyze operands of the call, assigning locations to each operand.
1448 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1449
1450 if (IsVarArg) {
1451 // Outgoing non-fixed arguments are placed in a buffer. First
1452 // compute their offsets and the total amount of buffer space needed.
1453 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1454 const ISD::OutputArg &Out = Outs[I];
1455 SDValue &Arg = OutVals[I];
1456 EVT VT = Arg.getValueType();
1457 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1458 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1459 Align Alignment =
1460 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1461 unsigned Offset =
1462 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1463 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1464 Offset, VT.getSimpleVT(),
1466 }
1467 }
1468
1469 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1470
1471 SDValue FINode;
1472 if (IsVarArg && NumBytes) {
1473 // For non-fixed arguments, next emit stores to store the argument values
1474 // to the stack buffer at the offsets computed above.
1475 MaybeAlign StackAlign = Layout.getStackAlignment();
1476 assert(StackAlign && "data layout string is missing stack alignment");
1477 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1478 /*isSS=*/false);
1479 unsigned ValNo = 0;
1481 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1482 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1483 "ArgLocs should remain in order and only hold varargs args");
1484 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1485 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1486 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1487 DAG.getConstant(Offset, DL, PtrVT));
1488 Chains.push_back(
1489 DAG.getStore(Chain, DL, Arg, Add,
1491 }
1492 if (!Chains.empty())
1493 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1494 } else if (IsVarArg) {
1495 FINode = DAG.getIntPtrConstant(0, DL);
1496 }
1497
1498 if (Callee->getOpcode() == ISD::GlobalAddress) {
1499 // If the callee is a GlobalAddress node (quite common, every direct call
1500 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1501 // doesn't at MO_GOT which is not needed for direct calls.
1502 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1505 GA->getOffset());
1506 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1507 getPointerTy(DAG.getDataLayout()), Callee);
1508 }
1509
1510 // Compute the operands for the CALLn node.
1512 Ops.push_back(Chain);
1513 Ops.push_back(Callee);
1514
1515 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1516 // isn't reliable.
1517 Ops.append(OutVals.begin(),
1518 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1519 // Add a pointer to the vararg buffer.
1520 if (IsVarArg)
1521 Ops.push_back(FINode);
1522
1523 SmallVector<EVT, 8> InTys;
1524 for (const auto &In : Ins) {
1525 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1526 assert(!In.Flags.isNest() && "nest is not valid for return values");
1527 if (In.Flags.isInAlloca())
1528 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1529 if (In.Flags.isInConsecutiveRegs())
1530 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1531 if (In.Flags.isInConsecutiveRegsLast())
1532 fail(DL, DAG,
1533 "WebAssembly hasn't implemented cons regs last return values");
1534 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1535 // registers.
1536 InTys.push_back(In.VT);
1537 }
1538
1539 // Lastly, if this is a call to a funcref we need to add an instruction
1540 // table.set to the chain and transform the call.
1542 CLI.CB->getCalledOperand()->getType())) {
1543 // In the absence of function references proposal where a funcref call is
1544 // lowered to call_ref, using reference types we generate a table.set to set
1545 // the funcref to a special table used solely for this purpose, followed by
1546 // a call_indirect. Here we just generate the table set, and return the
1547 // SDValue of the table.set so that LowerCall can finalize the lowering by
1548 // generating the call_indirect.
1549 SDValue Chain = Ops[0];
1550
1552 MF.getContext(), Subtarget);
1553 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1554 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1555 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1556 SDValue TableSet = DAG.getMemIntrinsicNode(
1557 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1558 MVT::funcref,
1559 // Machine Mem Operand args
1560 MachinePointerInfo(
1562 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1564
1565 Ops[0] = TableSet; // The new chain is the TableSet itself
1566 }
1567
1568 if (CLI.IsTailCall) {
1569 // ret_calls do not return values to the current frame
1570 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1571 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1572 }
1573
1574 InTys.push_back(MVT::Other);
1575 SDVTList InTyList = DAG.getVTList(InTys);
1576 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1577
1578 for (size_t I = 0; I < Ins.size(); ++I)
1579 InVals.push_back(Res.getValue(I));
1580
1581 // Return the chain
1582 return Res.getValue(Ins.size());
1583}
1584
1585bool WebAssemblyTargetLowering::CanLowerReturn(
1586 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1587 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1588 const Type *RetTy) const {
1589 // WebAssembly can only handle returning tuples with multivalue enabled
1590 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1591}
1592
1593SDValue WebAssemblyTargetLowering::LowerReturn(
1594 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1596 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1597 SelectionDAG &DAG) const {
1598 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1599 "MVP WebAssembly can only return up to one value");
1600 if (!callingConvSupported(CallConv))
1601 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1602
1603 SmallVector<SDValue, 4> RetOps(1, Chain);
1604 RetOps.append(OutVals.begin(), OutVals.end());
1605 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1606
1607 // Record the number and types of the return values.
1608 for (const ISD::OutputArg &Out : Outs) {
1609 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1610 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1611 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1612 if (Out.Flags.isInAlloca())
1613 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1614 if (Out.Flags.isInConsecutiveRegs())
1615 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1617 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1618 }
1619
1620 return Chain;
1621}
1622
1623SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1624 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1625 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1626 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1627 if (!callingConvSupported(CallConv))
1628 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1629
1630 MachineFunction &MF = DAG.getMachineFunction();
1631 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1632
1633 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1634 // of the incoming values before they're represented by virtual registers.
1635 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1636
1637 bool HasSwiftErrorArg = false;
1638 bool HasSwiftSelfArg = false;
1639 bool HasSwiftAsyncArg = false;
1640 for (const ISD::InputArg &In : Ins) {
1641 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1642 HasSwiftErrorArg |= In.Flags.isSwiftError();
1643 HasSwiftAsyncArg |= In.Flags.isSwiftAsync();
1644 if (In.Flags.isInAlloca())
1645 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1646 if (In.Flags.isNest())
1647 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1648 if (In.Flags.isInConsecutiveRegs())
1649 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1650 if (In.Flags.isInConsecutiveRegsLast())
1651 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1652 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1653 // registers.
1654 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1655 DAG.getTargetConstant(InVals.size(),
1656 DL, MVT::i32))
1657 : DAG.getUNDEF(In.VT));
1658
1659 // Record the number and types of arguments.
1660 MFI->addParam(In.VT);
1661 }
1662
1663 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1664 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1665 // arguments are also added for callee signature. They are necessary to match
1666 // callee and caller signature for indirect call.
1667 auto PtrVT = getPointerTy(MF.getDataLayout());
1668 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1669 if (!HasSwiftSelfArg) {
1670 MFI->addParam(PtrVT);
1671 }
1672 if (!HasSwiftErrorArg) {
1673 MFI->addParam(PtrVT);
1674 }
1675 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1676 MFI->addParam(PtrVT);
1677 }
1678 }
1679 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1680 // the buffer is passed as an argument.
1681 if (IsVarArg) {
1682 MVT PtrVT = getPointerTy(MF.getDataLayout());
1683 Register VarargVreg =
1685 MFI->setVarargBufferVreg(VarargVreg);
1686 Chain = DAG.getCopyToReg(
1687 Chain, DL, VarargVreg,
1688 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1689 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1690 MFI->addParam(PtrVT);
1691 }
1692
1693 // Record the number and types of arguments and results.
1694 SmallVector<MVT, 4> Params;
1697 MF.getFunction(), DAG.getTarget(), Params, Results);
1698 for (MVT VT : Results)
1699 MFI->addResult(VT);
1700 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1701 // the param logic here with ComputeSignatureVTs
1702 assert(MFI->getParams().size() == Params.size() &&
1703 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1704 Params.begin()));
1705
1706 return Chain;
1707}
1708
1709void WebAssemblyTargetLowering::ReplaceNodeResults(
1711 switch (N->getOpcode()) {
1713 // Do not add any results, signifying that N should not be custom lowered
1714 // after all. This happens because simd128 turns on custom lowering for
1715 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1716 // illegal type.
1717 break;
1721 // Do not add any results, signifying that N should not be custom lowered.
1722 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1723 break;
1724 case ISD::FP_ROUND: {
1725 EVT VT = N->getValueType(0);
1726 SDValue Src = N->getOperand(0);
1727 if (VT == MVT::v4f16 && Src.getValueType() == MVT::v4f32) {
1728 Results.push_back(
1729 DAG.getNode(WebAssemblyISD::DEMOTE_ZERO, SDLoc(N), MVT::v8f16, Src));
1730 }
1731 break;
1732 }
1733 case ISD::ADD:
1734 case ISD::SUB:
1735 Results.push_back(Replace128Op(N, DAG));
1736 break;
1737 default:
1739 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1740 }
1741}
1742
1743//===----------------------------------------------------------------------===//
1744// Custom lowering hooks.
1745//===----------------------------------------------------------------------===//
1746
1747SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1748 SelectionDAG &DAG) const {
1749 SDLoc DL(Op);
1750 switch (Op.getOpcode()) {
1751 default:
1752 llvm_unreachable("unimplemented operation lowering");
1753 return SDValue();
1754 case ISD::FrameIndex:
1755 return LowerFrameIndex(Op, DAG);
1756 case ISD::GlobalAddress:
1757 return LowerGlobalAddress(Op, DAG);
1759 return LowerGlobalTLSAddress(Op, DAG);
1761 return LowerExternalSymbol(Op, DAG);
1762 case ISD::JumpTable:
1763 return LowerJumpTable(Op, DAG);
1764 case ISD::BR_JT:
1765 return LowerBR_JT(Op, DAG);
1766 case ISD::VASTART:
1767 return LowerVASTART(Op, DAG);
1768 case ISD::BlockAddress:
1769 case ISD::BRIND:
1770 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1771 return SDValue();
1772 case ISD::RETURNADDR:
1773 return LowerRETURNADDR(Op, DAG);
1774 case ISD::FRAMEADDR:
1775 return LowerFRAMEADDR(Op, DAG);
1776 case ISD::CopyToReg:
1777 return LowerCopyToReg(Op, DAG);
1780 return LowerAccessVectorElement(Op, DAG);
1784 return LowerIntrinsic(Op, DAG);
1786 return LowerSIGN_EXTEND_INREG(Op, DAG);
1790 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1791 case ISD::BUILD_VECTOR:
1792 return LowerBUILD_VECTOR(Op, DAG);
1794 return LowerVECTOR_SHUFFLE(Op, DAG);
1795 case ISD::SETCC:
1796 return LowerSETCC(Op, DAG);
1797 case ISD::SHL:
1798 case ISD::SRA:
1799 case ISD::SRL:
1800 return LowerShift(Op, DAG);
1803 return LowerFP_TO_INT_SAT(Op, DAG);
1804 case ISD::FMINNUM:
1805 case ISD::FMINIMUMNUM:
1806 return LowerFMIN(Op, DAG);
1807 case ISD::FMAXNUM:
1808 case ISD::FMAXIMUMNUM:
1809 return LowerFMAX(Op, DAG);
1810 case ISD::LOAD:
1811 return LowerLoad(Op, DAG);
1812 case ISD::STORE:
1813 return LowerStore(Op, DAG);
1814 case ISD::CTPOP:
1815 case ISD::CTLZ:
1816 case ISD::CTTZ:
1817 return DAG.UnrollVectorOp(Op.getNode());
1818 case ISD::CLEAR_CACHE:
1819 report_fatal_error("llvm.clear_cache is not supported on wasm");
1820 case ISD::SMUL_LOHI:
1821 case ISD::UMUL_LOHI:
1822 return LowerMUL_LOHI(Op, DAG);
1823 case ISD::UADDO:
1824 return LowerUADDO(Op, DAG);
1825 }
1826}
1827
1831
1832 return false;
1833}
1834
1835static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1836 SelectionDAG &DAG) {
1838 if (!FI)
1839 return std::nullopt;
1840
1841 auto &MF = DAG.getMachineFunction();
1843}
1844
1845SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1846 SelectionDAG &DAG) const {
1847 SDLoc DL(Op);
1848 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1849 const SDValue &Value = SN->getValue();
1850 const SDValue &Base = SN->getBasePtr();
1851 const SDValue &Offset = SN->getOffset();
1852
1854 if (!Offset->isUndef())
1855 report_fatal_error("unexpected offset when storing to webassembly global",
1856 false);
1857
1858 SDVTList Tys = DAG.getVTList(MVT::Other);
1859 SDValue Ops[] = {SN->getChain(), Value, Base};
1860 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1861 SN->getMemoryVT(), SN->getMemOperand());
1862 }
1863
1864 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1865 if (!Offset->isUndef())
1866 report_fatal_error("unexpected offset when storing to webassembly local",
1867 false);
1868
1869 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1870 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1871 SDValue Ops[] = {SN->getChain(), Idx, Value};
1872 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1873 }
1874
1877 "Encountered an unlowerable store to the wasm_var address space",
1878 false);
1879
1880 return Op;
1881}
1882
1883SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1884 SelectionDAG &DAG) const {
1885 SDLoc DL(Op);
1886 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1887 const SDValue &Base = LN->getBasePtr();
1888 const SDValue &Offset = LN->getOffset();
1889
1891 if (!Offset->isUndef())
1893 "unexpected offset when loading from webassembly global", false);
1894
1895 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1896 SDValue Ops[] = {LN->getChain(), Base};
1897 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1898 LN->getMemoryVT(), LN->getMemOperand());
1899 }
1900
1901 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1902 if (!Offset->isUndef())
1904 "unexpected offset when loading from webassembly local", false);
1905
1906 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1907 EVT LocalVT = LN->getValueType(0);
1908 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1909 {LN->getChain(), Idx});
1910 }
1911
1914 "Encountered an unlowerable load from the wasm_var address space",
1915 false);
1916
1917 return Op;
1918}
1919
1920SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1921 SelectionDAG &DAG) const {
1922 assert(Subtarget->hasWideArithmetic());
1923 assert(Op.getValueType() == MVT::i64);
1924 SDLoc DL(Op);
1925 unsigned Opcode;
1926 switch (Op.getOpcode()) {
1927 case ISD::UMUL_LOHI:
1928 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1929 break;
1930 case ISD::SMUL_LOHI:
1931 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1932 break;
1933 default:
1934 llvm_unreachable("unexpected opcode");
1935 }
1936 SDValue LHS = Op.getOperand(0);
1937 SDValue RHS = Op.getOperand(1);
1938 SDValue Lo =
1939 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1940 SDValue Hi(Lo.getNode(), 1);
1941 SDValue Ops[] = {Lo, Hi};
1942 return DAG.getMergeValues(Ops, DL);
1943}
1944
1945// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1946//
1947// This enables generating a single wasm instruction for this operation where
1948// the upper half of both operands are constant zeros. The upper half of the
1949// result is then whether the overflow happened.
1950SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1951 SelectionDAG &DAG) const {
1952 assert(Subtarget->hasWideArithmetic());
1953 assert(Op.getValueType() == MVT::i64);
1954 assert(Op.getOpcode() == ISD::UADDO);
1955 SDLoc DL(Op);
1956 SDValue LHS = Op.getOperand(0);
1957 SDValue RHS = Op.getOperand(1);
1958 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1959 SDValue Result =
1960 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1961 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1962 SDValue CarryI64(Result.getNode(), 1);
1963 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1964 SDValue Ops[] = {Result, CarryI32};
1965 return DAG.getMergeValues(Ops, DL);
1966}
1967
1968SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1969 SelectionDAG &DAG) const {
1970 assert(Subtarget->hasWideArithmetic());
1971 assert(N->getValueType(0) == MVT::i128);
1972 SDLoc DL(N);
1973 unsigned Opcode;
1974 switch (N->getOpcode()) {
1975 case ISD::ADD:
1976 Opcode = WebAssemblyISD::I64_ADD128;
1977 break;
1978 case ISD::SUB:
1979 Opcode = WebAssemblyISD::I64_SUB128;
1980 break;
1981 default:
1982 llvm_unreachable("unexpected opcode");
1983 }
1984 SDValue LHS = N->getOperand(0);
1985 SDValue RHS = N->getOperand(1);
1986
1987 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1988 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1989 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1990 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1991 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1992 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1993 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1994 LHS_0, LHS_1, RHS_0, RHS_1);
1995 SDValue Result_HI(Result_LO.getNode(), 1);
1996 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1997}
1998
1999SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
2000 SelectionDAG &DAG) const {
2001 SDValue Src = Op.getOperand(2);
2002 if (isa<FrameIndexSDNode>(Src.getNode())) {
2003 // CopyToReg nodes don't support FrameIndex operands. Other targets select
2004 // the FI to some LEA-like instruction, but since we don't have that, we
2005 // need to insert some kind of instruction that can take an FI operand and
2006 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
2007 // local.copy between Op and its FI operand.
2008 SDValue Chain = Op.getOperand(0);
2009 SDLoc DL(Op);
2010 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
2011 EVT VT = Src.getValueType();
2012 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
2013 : WebAssembly::COPY_I64,
2014 DL, VT, Src),
2015 0);
2016 return Op.getNode()->getNumValues() == 1
2017 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
2018 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
2019 Op.getNumOperands() == 4 ? Op.getOperand(3)
2020 : SDValue());
2021 }
2022 return SDValue();
2023}
2024
2025SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
2026 SelectionDAG &DAG) const {
2027 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
2028 return DAG.getTargetFrameIndex(FI, Op.getValueType());
2029}
2030
2031SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
2032 SelectionDAG &DAG) const {
2033 SDLoc DL(Op);
2034
2035 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
2036 fail(DL, DAG,
2037 "Non-Emscripten WebAssembly hasn't implemented "
2038 "__builtin_return_address");
2039 return SDValue();
2040 }
2041
2042 unsigned Depth = Op.getConstantOperandVal(0);
2043 MakeLibCallOptions CallOptions;
2044 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
2045 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
2046 .first;
2047}
2048
2049SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2050 SelectionDAG &DAG) const {
2051 // Non-zero depths are not supported by WebAssembly currently. Use the
2052 // legalizer's default expansion, which is to return 0 (what this function is
2053 // documented to do).
2054 if (Op.getConstantOperandVal(0) > 0)
2055 return SDValue();
2056
2058 EVT VT = Op.getValueType();
2059 Register FP =
2060 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2061 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2062}
2063
2064SDValue
2065WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2066 SelectionDAG &DAG) const {
2067 SDLoc DL(Op);
2068 const auto *GA = cast<GlobalAddressSDNode>(Op);
2069
2070 MachineFunction &MF = DAG.getMachineFunction();
2071 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2072 report_fatal_error("cannot use thread-local storage without bulk memory",
2073 false);
2074
2075 const GlobalValue *GV = GA->getGlobal();
2076
2077 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2078 // on other targets, if we have thread-local storage, only the local-exec
2079 // model is possible.
2080 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2081 ? GV->getThreadLocalMode()
2083
2084 // Unsupported TLS modes
2087
2088 if (model == GlobalValue::LocalExecTLSModel ||
2091 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2092 // For DSO-local TLS variables we use offset from __tls_base
2093
2094 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2095 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2096 : WebAssembly::GLOBAL_GET_I32;
2097 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2098
2100 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2101 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2102 0);
2103
2104 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2105 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2106 SDValue SymOffset =
2107 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2108
2109 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2110 }
2111
2113
2114 EVT VT = Op.getValueType();
2115 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2116 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2117 GA->getOffset(),
2119}
2120
2121SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2122 SelectionDAG &DAG) const {
2123 SDLoc DL(Op);
2124 const auto *GA = cast<GlobalAddressSDNode>(Op);
2125 EVT VT = Op.getValueType();
2126 assert(GA->getTargetFlags() == 0 &&
2127 "Unexpected target flags on generic GlobalAddressSDNode");
2129 fail(DL, DAG, "Invalid address space for WebAssembly target");
2130
2131 unsigned OperandFlags = 0;
2132 const GlobalValue *GV = GA->getGlobal();
2133 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2134 // need special treatment for tables in PIC mode.
2135 if (isPositionIndependent() &&
2137 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2138 MachineFunction &MF = DAG.getMachineFunction();
2139 MVT PtrVT = getPointerTy(MF.getDataLayout());
2140 const char *BaseName;
2141 if (GV->getValueType()->isFunctionTy()) {
2142 BaseName = MF.createExternalSymbolName("__table_base");
2144 } else {
2145 BaseName = MF.createExternalSymbolName("__memory_base");
2147 }
2149 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2150 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2151
2152 SDValue SymAddr = DAG.getNode(
2153 WebAssemblyISD::WrapperREL, DL, VT,
2154 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2155 OperandFlags));
2156
2157 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2158 }
2160 }
2161
2162 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2163 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2164 GA->getOffset(), OperandFlags));
2165}
2166
2167SDValue
2168WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2169 SelectionDAG &DAG) const {
2170 SDLoc DL(Op);
2171 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2172 EVT VT = Op.getValueType();
2173 assert(ES->getTargetFlags() == 0 &&
2174 "Unexpected target flags on generic ExternalSymbolSDNode");
2175 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2176 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2177}
2178
2179SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2180 SelectionDAG &DAG) const {
2181 // There's no need for a Wrapper node because we always incorporate a jump
2182 // table operand into a BR_TABLE instruction, rather than ever
2183 // materializing it in a register.
2184 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2185 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2186 JT->getTargetFlags());
2187}
2188
2189SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2190 SelectionDAG &DAG) const {
2191 SDLoc DL(Op);
2192 SDValue Chain = Op.getOperand(0);
2193 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2194 SDValue Index = Op.getOperand(2);
2195 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2196
2198 Ops.push_back(Chain);
2199 Ops.push_back(Index);
2200
2201 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2202 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2203
2204 // Add an operand for each case.
2205 for (auto *MBB : MBBs)
2206 Ops.push_back(DAG.getBasicBlock(MBB));
2207
2208 // Add the first MBB as a dummy default target for now. This will be replaced
2209 // with the proper default target (and the preceding range check eliminated)
2210 // if possible by WebAssemblyFixBrTableDefaults.
2211 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2212 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2213}
2214
2215SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2216 SelectionDAG &DAG) const {
2217 SDLoc DL(Op);
2218 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2219
2220 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2221 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2222
2223 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2224 MFI->getVarargBufferVreg(), PtrVT);
2225 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2226 MachinePointerInfo(SV));
2227}
2228
2229SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2230 SelectionDAG &DAG) const {
2231 MachineFunction &MF = DAG.getMachineFunction();
2232 unsigned IntNo;
2233 switch (Op.getOpcode()) {
2236 IntNo = Op.getConstantOperandVal(1);
2237 break;
2239 IntNo = Op.getConstantOperandVal(0);
2240 break;
2241 default:
2242 llvm_unreachable("Invalid intrinsic");
2243 }
2244 SDLoc DL(Op);
2245
2246 switch (IntNo) {
2247 default:
2248 return SDValue(); // Don't custom lower most intrinsics.
2249
2250 case Intrinsic::wasm_lsda: {
2251 auto PtrVT = getPointerTy(MF.getDataLayout());
2252 const char *SymName = MF.createExternalSymbolName(
2253 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2254 if (isPositionIndependent()) {
2256 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2257 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2259 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2260 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2261 SDValue SymAddr =
2262 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2263 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2264 }
2265 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2266 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2267 }
2268
2269 case Intrinsic::wasm_shuffle: {
2270 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2271 SDValue Ops[18];
2272 size_t OpIdx = 0;
2273 Ops[OpIdx++] = Op.getOperand(1);
2274 Ops[OpIdx++] = Op.getOperand(2);
2275 while (OpIdx < 18) {
2276 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2277 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2278 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2279 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2280 } else {
2281 Ops[OpIdx++] = MaskIdx;
2282 }
2283 }
2284 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2285 }
2286
2287 case Intrinsic::thread_pointer: {
2288 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2289 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2290 : WebAssembly::GLOBAL_GET_I32;
2291 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2292 return SDValue(
2293 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2294 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2295 0);
2296 }
2297 }
2298}
2299
2300SDValue
2301WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2302 SelectionDAG &DAG) const {
2303 SDLoc DL(Op);
2304 // If sign extension operations are disabled, allow sext_inreg only if operand
2305 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2306 // extension operations, but allowing sext_inreg in this context lets us have
2307 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2308 // everywhere would be simpler in this file, but would necessitate large and
2309 // brittle patterns to undo the expansion and select extract_lane_s
2310 // instructions.
2311 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2312 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2313 return SDValue();
2314
2315 const SDValue &Extract = Op.getOperand(0);
2316 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2317 if (VecT.getVectorElementType().getSizeInBits() > 32)
2318 return SDValue();
2319 MVT ExtractedLaneT =
2320 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2321 MVT ExtractedVecT =
2322 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2323 if (ExtractedVecT == VecT)
2324 return Op;
2325
2326 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2327 const SDNode *Index = Extract.getOperand(1).getNode();
2328 if (!isa<ConstantSDNode>(Index))
2329 return SDValue();
2330 unsigned IndexVal = Index->getAsZExtVal();
2331 unsigned Scale =
2332 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2333 assert(Scale > 1);
2334 SDValue NewIndex =
2335 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2336 SDValue NewExtract = DAG.getNode(
2338 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2339 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2340 Op.getOperand(1));
2341}
2342
2343static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2344 SelectionDAG &DAG) {
2345 SDValue Source = peekThroughBitcasts(Op);
2346 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2347 return SDValue();
2348
2349 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2350 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2351 "expected extend_low");
2352 auto *Shuffle = cast<ShuffleVectorSDNode>(Source.getNode());
2353
2354 ArrayRef<int> Mask = Shuffle->getMask();
2355 // Look for a shuffle which moves from the high half to the low half.
2356 size_t FirstIdx = Mask.size() / 2;
2357 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2358 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2359 return SDValue();
2360 }
2361 }
2362
2363 SDLoc DL(Op);
2364 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2365 ? WebAssemblyISD::EXTEND_HIGH_S
2366 : WebAssemblyISD::EXTEND_HIGH_U;
2367 SDValue ShuffleSrc = Shuffle->getOperand(0);
2368 if (Op.getOpcode() == ISD::BITCAST)
2369 ShuffleSrc = DAG.getBitcast(Op.getValueType(), ShuffleSrc);
2370
2371 return DAG.getNode(Opc, DL, VT, ShuffleSrc);
2372}
2373
2374SDValue
2375WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2376 SelectionDAG &DAG) const {
2377 SDLoc DL(Op);
2378 EVT VT = Op.getValueType();
2379 SDValue Src = Op.getOperand(0);
2380 EVT SrcVT = Src.getValueType();
2381
2382 if (SrcVT.getVectorElementType() == MVT::i1 ||
2383 SrcVT.getVectorElementType() == MVT::i64)
2384 return SDValue();
2385
2386 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2387 "Unexpected extension factor.");
2388 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2389
2390 if (Scale != 2 && Scale != 4 && Scale != 8)
2391 return SDValue();
2392
2393 unsigned Ext;
2394 switch (Op.getOpcode()) {
2395 default:
2396 llvm_unreachable("unexpected opcode");
2399 Ext = WebAssemblyISD::EXTEND_LOW_U;
2400 break;
2402 Ext = WebAssemblyISD::EXTEND_LOW_S;
2403 break;
2404 }
2405
2406 if (Scale == 2) {
2407 // See if we can use EXTEND_HIGH.
2408 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2409 return ExtendHigh;
2410 }
2411
2412 SDValue Ret = Src;
2413 while (Scale != 1) {
2414 Ret = DAG.getNode(Ext, DL,
2415 Ret.getValueType()
2418 Ret);
2419 Scale /= 2;
2420 }
2421 assert(Ret.getValueType() == VT);
2422 return Ret;
2423}
2424
2426 SDLoc DL(Op);
2427 if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
2428 return SDValue();
2429
2430 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2431 unsigned &Index) -> bool {
2432 switch (Op.getOpcode()) {
2433 case ISD::SINT_TO_FP:
2434 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2435 break;
2436 case ISD::UINT_TO_FP:
2437 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2438 break;
2439 case ISD::FP_EXTEND:
2440 case ISD::FP16_TO_FP:
2441 Opcode = WebAssemblyISD::PROMOTE_LOW;
2442 break;
2443 default:
2444 return false;
2445 }
2446
2447 auto ExtractVector = Op.getOperand(0);
2448 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2449 return false;
2450
2451 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2452 return false;
2453
2454 SrcVec = ExtractVector.getOperand(0);
2455 Index = ExtractVector.getConstantOperandVal(1);
2456 return true;
2457 };
2458
2459 unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
2460 unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
2461 unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
2462 SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
2463
2464 if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec,
2465 FirstIndex) ||
2466 !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec,
2467 SecondIndex))
2468 return SDValue();
2469
2470 // If we're converting to v4f32, check the third and fourth lanes, too.
2471 if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode,
2472 ThirdSrcVec, ThirdIndex) ||
2473 !GetConvertedLane(Op.getOperand(3), FourthOpcode,
2474 FourthSrcVec, FourthIndex)))
2475 return SDValue();
2476
2477 if (FirstOpcode != SecondOpcode)
2478 return SDValue();
2479
2480 // TODO Add an optimization similar to the v2f64 below for shuffling the
2481 // vectors when the lanes are in the wrong order or come from different src
2482 // vectors.
2483 if (NumLanes == 4 &&
2484 (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
2485 FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec ||
2486 FirstSrcVec != FourthSrcVec || FirstIndex != 0 || SecondIndex != 1 ||
2487 ThirdIndex != 2 || FourthIndex != 3))
2488 return SDValue();
2489
2490 MVT ExpectedSrcVT;
2491 switch (FirstOpcode) {
2492 case WebAssemblyISD::CONVERT_LOW_S:
2493 case WebAssemblyISD::CONVERT_LOW_U:
2494 ExpectedSrcVT = MVT::v4i32;
2495 break;
2496 case WebAssemblyISD::PROMOTE_LOW:
2497 ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
2498 break;
2499 }
2500 if (FirstSrcVec.getValueType() != ExpectedSrcVT)
2501 return SDValue();
2502
2503 auto Src = FirstSrcVec;
2504 if (NumLanes == 2 &&
2505 (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
2506 // Shuffle the source vector so that the converted lanes are the low lanes.
2507 Src = DAG.getVectorShuffle(ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
2508 {static_cast<int>(FirstIndex),
2509 static_cast<int>(SecondIndex) + 4, -1, -1});
2510 }
2511 return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32,
2512 Src);
2513}
2514
2515SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2516 SelectionDAG &DAG) const {
2517 MVT VT = Op.getSimpleValueType();
2518 if (VT == MVT::v8f16) {
2519 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2520 // FP16 type, so cast them to I16s.
2521 MVT IVT = VT.changeVectorElementType(MVT::i16);
2523 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2524 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2525 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2526 return DAG.getBitcast(VT, Res);
2527 }
2528
2529 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2530 return ConvertLow;
2531
2532 SDLoc DL(Op);
2533 const EVT VecT = Op.getValueType();
2534 const EVT LaneT = Op.getOperand(0).getValueType();
2535 const size_t Lanes = Op.getNumOperands();
2536 bool CanSwizzle = VecT == MVT::v16i8;
2537
2538 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2539 // possible number of lanes at once followed by a sequence of replace_lane
2540 // instructions to individually initialize any remaining lanes.
2541
2542 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2543 // swizzled lanes should be given greater weight.
2544
2545 // TODO: Investigate looping rather than always extracting/replacing specific
2546 // lanes to fill gaps.
2547
2548 auto IsConstant = [](const SDValue &V) {
2549 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2550 };
2551
2552 // Returns the source vector and index vector pair if they exist. Checks for:
2553 // (extract_vector_elt
2554 // $src,
2555 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2556 // )
2557 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2558 auto Bail = std::make_pair(SDValue(), SDValue());
2559 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2560 return Bail;
2561 const SDValue &SwizzleSrc = Lane->getOperand(0);
2562 const SDValue &IndexExt = Lane->getOperand(1);
2563 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2564 return Bail;
2565 const SDValue &Index = IndexExt->getOperand(0);
2566 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2567 return Bail;
2568 const SDValue &SwizzleIndices = Index->getOperand(0);
2569 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2570 SwizzleIndices.getValueType() != MVT::v16i8 ||
2571 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2572 Index->getConstantOperandVal(1) != I)
2573 return Bail;
2574 return std::make_pair(SwizzleSrc, SwizzleIndices);
2575 };
2576
2577 // If the lane is extracted from another vector at a constant index, return
2578 // that vector. The source vector must not have more lanes than the dest
2579 // because the shufflevector indices are in terms of the destination lanes and
2580 // would not be able to address the smaller individual source lanes.
2581 auto GetShuffleSrc = [&](const SDValue &Lane) {
2582 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2583 return SDValue();
2584 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2585 return SDValue();
2586 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2587 VecT.getVectorNumElements())
2588 return SDValue();
2589 return Lane->getOperand(0);
2590 };
2591
2592 using ValueEntry = std::pair<SDValue, size_t>;
2593 SmallVector<ValueEntry, 16> SplatValueCounts;
2594
2595 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2596 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2597
2598 using ShuffleEntry = std::pair<SDValue, size_t>;
2599 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2600
2601 auto AddCount = [](auto &Counts, const auto &Val) {
2602 auto CountIt =
2603 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2604 if (CountIt == Counts.end()) {
2605 Counts.emplace_back(Val, 1);
2606 } else {
2607 CountIt->second++;
2608 }
2609 };
2610
2611 auto GetMostCommon = [](auto &Counts) {
2612 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2613 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2614 return *CommonIt;
2615 };
2616
2617 size_t NumConstantLanes = 0;
2618
2619 // Count eligible lanes for each type of vector creation op
2620 for (size_t I = 0; I < Lanes; ++I) {
2621 const SDValue &Lane = Op->getOperand(I);
2622 if (Lane.isUndef())
2623 continue;
2624
2625 AddCount(SplatValueCounts, Lane);
2626
2627 if (IsConstant(Lane))
2628 NumConstantLanes++;
2629 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2630 AddCount(ShuffleCounts, ShuffleSrc);
2631 if (CanSwizzle) {
2632 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2633 if (SwizzleSrcs.first)
2634 AddCount(SwizzleCounts, SwizzleSrcs);
2635 }
2636 }
2637
2638 SDValue SplatValue;
2639 size_t NumSplatLanes;
2640 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2641
2642 SDValue SwizzleSrc;
2643 SDValue SwizzleIndices;
2644 size_t NumSwizzleLanes = 0;
2645 if (SwizzleCounts.size())
2646 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2647 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2648
2649 // Shuffles can draw from up to two vectors, so find the two most common
2650 // sources.
2651 SDValue ShuffleSrc1, ShuffleSrc2;
2652 size_t NumShuffleLanes = 0;
2653 if (ShuffleCounts.size()) {
2654 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2655 llvm::erase_if(ShuffleCounts,
2656 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2657 }
2658 if (ShuffleCounts.size()) {
2659 size_t AdditionalShuffleLanes;
2660 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2661 GetMostCommon(ShuffleCounts);
2662 NumShuffleLanes += AdditionalShuffleLanes;
2663 }
2664
2665 // Predicate returning true if the lane is properly initialized by the
2666 // original instruction
2667 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2669 // Prefer swizzles over shuffles over vector consts over splats
2670 if (NumSwizzleLanes >= NumShuffleLanes &&
2671 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2672 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2673 SwizzleIndices);
2674 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2675 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2676 return Swizzled == GetSwizzleSrcs(I, Lane);
2677 };
2678 } else if (NumShuffleLanes >= NumConstantLanes &&
2679 NumShuffleLanes >= NumSplatLanes) {
2680 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2681 size_t DestLaneCount = VecT.getVectorNumElements();
2682 size_t Scale1 = 1;
2683 size_t Scale2 = 1;
2684 SDValue Src1 = ShuffleSrc1;
2685 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2686 if (Src1.getValueType() != VecT) {
2687 size_t LaneSize =
2689 assert(LaneSize > DestLaneSize);
2690 Scale1 = LaneSize / DestLaneSize;
2691 Src1 = DAG.getBitcast(VecT, Src1);
2692 }
2693 if (Src2.getValueType() != VecT) {
2694 size_t LaneSize =
2696 assert(LaneSize > DestLaneSize);
2697 Scale2 = LaneSize / DestLaneSize;
2698 Src2 = DAG.getBitcast(VecT, Src2);
2699 }
2700
2701 int Mask[16];
2702 assert(DestLaneCount <= 16);
2703 for (size_t I = 0; I < DestLaneCount; ++I) {
2704 const SDValue &Lane = Op->getOperand(I);
2705 SDValue Src = GetShuffleSrc(Lane);
2706 if (Src == ShuffleSrc1) {
2707 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2708 } else if (Src && Src == ShuffleSrc2) {
2709 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2710 } else {
2711 Mask[I] = -1;
2712 }
2713 }
2714 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2715 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2716 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2717 auto Src = GetShuffleSrc(Lane);
2718 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2719 };
2720 } else if (NumConstantLanes >= NumSplatLanes) {
2721 SmallVector<SDValue, 16> ConstLanes;
2722 for (const SDValue &Lane : Op->op_values()) {
2723 if (IsConstant(Lane)) {
2724 // Values may need to be fixed so that they will sign extend to be
2725 // within the expected range during ISel. Check whether the value is in
2726 // bounds based on the lane bit width and if it is out of bounds, lop
2727 // off the extra bits.
2728 uint64_t LaneBits = 128 / Lanes;
2729 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2730 ConstLanes.push_back(DAG.getConstant(
2731 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2732 SDLoc(Lane), LaneT));
2733 } else {
2734 ConstLanes.push_back(Lane);
2735 }
2736 } else if (LaneT.isFloatingPoint()) {
2737 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2738 } else {
2739 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2740 }
2741 }
2742 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2743 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2744 return IsConstant(Lane);
2745 };
2746 } else {
2747 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2748 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2749 (DestLaneSize == 32 || DestLaneSize == 64)) {
2750 // Could be selected to load_zero.
2751 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2752 } else {
2753 // Use a splat (which might be selected as a load splat)
2754 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2755 }
2756 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2757 return Lane == SplatValue;
2758 };
2759 }
2760
2761 assert(Result);
2762 assert(IsLaneConstructed);
2763
2764 // Add replace_lane instructions for any unhandled values
2765 for (size_t I = 0; I < Lanes; ++I) {
2766 const SDValue &Lane = Op->getOperand(I);
2767 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2768 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2769 DAG.getConstant(I, DL, MVT::i32));
2770 }
2771
2772 return Result;
2773}
2774
2775SDValue
2776WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2777 SelectionDAG &DAG) const {
2778 SDLoc DL(Op);
2779 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2780 MVT VecType = Op.getOperand(0).getSimpleValueType();
2781 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2782 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2783
2784 // Space for two vector args and sixteen mask indices
2785 SDValue Ops[18];
2786 size_t OpIdx = 0;
2787 Ops[OpIdx++] = Op.getOperand(0);
2788 Ops[OpIdx++] = Op.getOperand(1);
2789
2790 // Expand mask indices to byte indices and materialize them as operands
2791 for (int M : Mask) {
2792 for (size_t J = 0; J < LaneBytes; ++J) {
2793 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2794 // whole lane of vector input, to allow further reduction at VM. E.g.
2795 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2796 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2797 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2798 }
2799 }
2800
2801 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2802}
2803
2804SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2805 SelectionDAG &DAG) const {
2806 SDLoc DL(Op);
2807 // The legalizer does not know how to expand the unsupported comparison modes
2808 // of i64x2 vectors, so we manually unroll them here.
2809 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2811 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2812 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2813 const SDValue &CC = Op->getOperand(2);
2814 auto MakeLane = [&](unsigned I) {
2815 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2816 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2817 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2818 };
2819 return DAG.getBuildVector(Op->getValueType(0), DL,
2820 {MakeLane(0), MakeLane(1)});
2821}
2822
2823SDValue
2824WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2825 SelectionDAG &DAG) const {
2826 // Allow constant lane indices, expand variable lane indices
2827 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2828 if (isa<ConstantSDNode>(IdxNode)) {
2829 // Ensure the index type is i32 to match the tablegen patterns
2830 uint64_t Idx = IdxNode->getAsZExtVal();
2831 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2832 Ops[Op.getNumOperands() - 1] =
2833 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2834 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2835 }
2836 // Perform default expansion
2837 return SDValue();
2838}
2839
2841 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2842 // 32-bit and 64-bit unrolled shifts will have proper semantics
2843 if (LaneT.bitsGE(MVT::i32))
2844 return DAG.UnrollVectorOp(Op.getNode());
2845 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2846 SDLoc DL(Op);
2847 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2848 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2849 unsigned ShiftOpcode = Op.getOpcode();
2850 SmallVector<SDValue, 16> ShiftedElements;
2851 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2852 SmallVector<SDValue, 16> ShiftElements;
2853 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2854 SmallVector<SDValue, 16> UnrolledOps;
2855 for (size_t i = 0; i < NumLanes; ++i) {
2856 SDValue MaskedShiftValue =
2857 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2858 SDValue ShiftedValue = ShiftedElements[i];
2859 if (ShiftOpcode == ISD::SRA)
2860 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2861 ShiftedValue, DAG.getValueType(LaneT));
2862 UnrolledOps.push_back(
2863 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2864 }
2865 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2866}
2867
2868SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2869 SelectionDAG &DAG) const {
2870 SDLoc DL(Op);
2871 // Only manually lower vector shifts
2872 assert(Op.getSimpleValueType().isVector());
2873
2874 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2875 auto ShiftVal = Op.getOperand(1);
2876
2877 // Try to skip bitmask operation since it is implied inside shift instruction
2878 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2879 if (MaskOp.getOpcode() != ISD::AND)
2880 return MaskOp;
2881 SDValue LHS = MaskOp.getOperand(0);
2882 SDValue RHS = MaskOp.getOperand(1);
2883 if (MaskOp.getValueType().isVector()) {
2884 APInt MaskVal;
2885 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2886 std::swap(LHS, RHS);
2887
2888 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2889 MaskVal == MaskBits)
2890 MaskOp = LHS;
2891 } else {
2892 if (!isa<ConstantSDNode>(RHS.getNode()))
2893 std::swap(LHS, RHS);
2894
2895 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2896 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2897 MaskOp = LHS;
2898 }
2899
2900 return MaskOp;
2901 };
2902
2903 // Skip vector and operation
2904 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2905 ShiftVal = DAG.getSplatValue(ShiftVal);
2906 if (!ShiftVal)
2907 return unrollVectorShift(Op, DAG);
2908
2909 // Skip scalar and operation
2910 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2911 // Use anyext because none of the high bits can affect the shift
2912 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2913
2914 unsigned Opcode;
2915 switch (Op.getOpcode()) {
2916 case ISD::SHL:
2917 Opcode = WebAssemblyISD::VEC_SHL;
2918 break;
2919 case ISD::SRA:
2920 Opcode = WebAssemblyISD::VEC_SHR_S;
2921 break;
2922 case ISD::SRL:
2923 Opcode = WebAssemblyISD::VEC_SHR_U;
2924 break;
2925 default:
2926 llvm_unreachable("unexpected opcode");
2927 }
2928
2929 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2930}
2931
2932SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2933 SelectionDAG &DAG) const {
2934 EVT ResT = Op.getValueType();
2935 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2936
2937 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2938 (SatVT == MVT::i32 || SatVT == MVT::i64))
2939 return Op;
2940
2941 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2942 return Op;
2943
2944 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2945 return Op;
2946
2947 return SDValue();
2948}
2949
2951 return (Op->getFlags().hasNoNaNs() ||
2952 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2953 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2954 (Op->getFlags().hasNoSignedZeros() ||
2955 DAG.isKnownNeverLogicalZero(Op->getOperand(0)) ||
2956 DAG.isKnownNeverLogicalZero(Op->getOperand(1)));
2957}
2958
2959SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2960 SelectionDAG &DAG) const {
2961 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2962 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2963 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2964 }
2965 return SDValue();
2966}
2967
2968SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2969 SelectionDAG &DAG) const {
2970 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2971 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2972 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2973 }
2974 return SDValue();
2975}
2976
2977//===----------------------------------------------------------------------===//
2978// Custom DAG combine hooks
2979//===----------------------------------------------------------------------===//
2980static SDValue
2982 auto &DAG = DCI.DAG;
2983 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2984
2985 // Hoist vector bitcasts that don't change the number of lanes out of unary
2986 // shuffles, where they are less likely to get in the way of other combines.
2987 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2988 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2989 SDValue Bitcast = N->getOperand(0);
2990 if (Bitcast.getOpcode() != ISD::BITCAST)
2991 return SDValue();
2992 if (!N->getOperand(1).isUndef())
2993 return SDValue();
2994 SDValue CastOp = Bitcast.getOperand(0);
2995 EVT SrcType = CastOp.getValueType();
2996 EVT DstType = Bitcast.getValueType();
2997 if (!SrcType.is128BitVector() ||
2998 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2999 return SDValue();
3000 SDValue NewShuffle = DAG.getVectorShuffle(
3001 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
3002 return DAG.getBitcast(DstType, NewShuffle);
3003}
3004
3005/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
3006/// split up into scalar instructions during legalization, and the vector
3007/// extending instructions are selected in performVectorExtendCombine below.
3008static SDValue
3011 auto &DAG = DCI.DAG;
3012 assert(N->getOpcode() == ISD::UINT_TO_FP ||
3013 N->getOpcode() == ISD::SINT_TO_FP);
3014
3015 EVT InVT = N->getOperand(0)->getValueType(0);
3016 EVT ResVT = N->getValueType(0);
3017 MVT ExtVT;
3018 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
3019 ExtVT = MVT::v4i32;
3020 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
3021 ExtVT = MVT::v2i32;
3022 else
3023 return SDValue();
3024
3025 unsigned Op =
3027 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
3028 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
3029}
3030
3031static SDValue
3034 auto &DAG = DCI.DAG;
3035
3036 SDNodeFlags Flags = N->getFlags();
3037 SDValue Op0 = N->getOperand(0);
3038 EVT VT = N->getValueType(0);
3039
3040 // Optimize uitofp to sitofp when the sign bit is known to be zero.
3041 // Depending on the target (runtime) backend, this might be performance
3042 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
3043 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
3044 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
3045 }
3046
3047 return SDValue();
3048}
3049
3050static SDValue
3052 auto &DAG = DCI.DAG;
3053 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
3054 N->getOpcode() == ISD::ZERO_EXTEND);
3055
3056 EVT ResVT = N->getValueType(0);
3057 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3058 SDLoc DL(N);
3059
3060 if (ResVT == MVT::v16i32 && N->getOperand(0)->getValueType(0) == MVT::v16i8) {
3061 // Use a tree of extend low/high to split and extend the input in two
3062 // layers to avoid doing several shuffles and even more extends.
3063 unsigned LowOp =
3064 IsSext ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3065 unsigned HighOp =
3066 IsSext ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3067 SDValue Input = N->getOperand(0);
3068 SDValue LowHalf = DAG.getNode(LowOp, DL, MVT::v8i16, Input);
3069 SDValue HighHalf = DAG.getNode(HighOp, DL, MVT::v8i16, Input);
3070 SDValue Subvectors[] = {
3071 DAG.getNode(LowOp, DL, MVT::v4i32, LowHalf),
3072 DAG.getNode(HighOp, DL, MVT::v4i32, LowHalf),
3073 DAG.getNode(LowOp, DL, MVT::v4i32, HighHalf),
3074 DAG.getNode(HighOp, DL, MVT::v4i32, HighHalf),
3075 };
3076 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Subvectors);
3077 }
3078
3079 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
3080 // possible before the extract_subvector can be expanded.
3081 auto Extract = N->getOperand(0);
3082 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3083 return SDValue();
3084 auto Source = Extract.getOperand(0);
3085 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
3086 if (IndexNode == nullptr)
3087 return SDValue();
3088 auto Index = IndexNode->getZExtValue();
3089
3090 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3091 // extracted subvector is the low or high half of its source.
3092 if (ResVT == MVT::v8i16) {
3093 if (Extract.getValueType() != MVT::v8i8 ||
3094 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3095 return SDValue();
3096 } else if (ResVT == MVT::v4i32) {
3097 if (Extract.getValueType() != MVT::v4i16 ||
3098 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3099 return SDValue();
3100 } else if (ResVT == MVT::v2i64) {
3101 if (Extract.getValueType() != MVT::v2i32 ||
3102 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3103 return SDValue();
3104 } else {
3105 return SDValue();
3106 }
3107
3108 bool IsLow = Index == 0;
3109
3110 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3111 : WebAssemblyISD::EXTEND_HIGH_S)
3112 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3113 : WebAssemblyISD::EXTEND_HIGH_U);
3114
3115 return DAG.getNode(Op, DL, ResVT, Source);
3116}
3117
3118static SDValue
3120 auto &DAG = DCI.DAG;
3121
3122 auto GetWasmConversionOp = [](unsigned Op) {
3123 switch (Op) {
3125 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3127 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3128 case ISD::FP_ROUND:
3129 return WebAssemblyISD::DEMOTE_ZERO;
3130 }
3131 llvm_unreachable("unexpected op");
3132 };
3133
3134 auto IsZeroSplat = [](SDValue SplatVal) {
3135 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3136 APInt SplatValue, SplatUndef;
3137 unsigned SplatBitSize;
3138 bool HasAnyUndefs;
3139 // Endianness doesn't matter in this context because we are looking for
3140 // an all-zero value.
3141 return Splat &&
3142 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3143 HasAnyUndefs) &&
3144 SplatValue == 0;
3145 };
3146
3147 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3148 // Combine this:
3149 //
3150 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3151 //
3152 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3153 //
3154 // Or this:
3155 //
3156 // (concat_vectors ({v2f32, v4f16} (fp_round ({v2f64, v4f32} $x))),
3157 // ({v2f32, v4f16} (splat 0)))
3158 //
3159 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3160 EVT ResVT;
3161 EVT ExpectedConversionType;
3162 auto Conversion = N->getOperand(0);
3163 auto ConversionOp = Conversion.getOpcode();
3164 switch (ConversionOp) {
3167 ResVT = MVT::v4i32;
3168 ExpectedConversionType = MVT::v2i32;
3169 break;
3170 case ISD::FP_ROUND:
3171 if (Conversion.getValueType() == MVT::v2f32) {
3172 ResVT = MVT::v4f32;
3173 ExpectedConversionType = MVT::v2f32;
3174 } else if (Conversion.getValueType() == MVT::v4f16) {
3175 ResVT = MVT::v8f16;
3176 ExpectedConversionType = MVT::v4f16;
3177 } else {
3178 return SDValue();
3179 }
3180 break;
3181 default:
3182 return SDValue();
3183 }
3184
3185 if (N->getValueType(0) != ResVT)
3186 return SDValue();
3187
3188 if (Conversion.getValueType() != ExpectedConversionType)
3189 return SDValue();
3190
3191 auto Source = Conversion.getOperand(0);
3192 if (!((Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4f32) ||
3193 (Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4i32) ||
3194 (Source.getValueType() == MVT::v4f32 && ResVT == MVT::v8f16)))
3195 return SDValue();
3196
3197 if (!IsZeroSplat(N->getOperand(1)) ||
3198 N->getOperand(1).getValueType() != ExpectedConversionType)
3199 return SDValue();
3200
3201 unsigned Op = GetWasmConversionOp(ConversionOp);
3202 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3203 }
3204
3205 // Combine this:
3206 //
3207 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3208 //
3209 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3210 //
3211 // Or this:
3212 //
3213 // ({v4f32, v8f16} (fp_round (concat_vectors $x,
3214 // ({v2f64, v4f32} (splat 0)))))
3215 //
3216 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3217 EVT ResVT;
3218 auto ConversionOp = N->getOpcode();
3219 switch (ConversionOp) {
3222 ResVT = MVT::v4i32;
3223 break;
3224 case ISD::FP_ROUND:
3225 ResVT = N->getValueType(0);
3226 break;
3227 default:
3228 llvm_unreachable("unexpected op");
3229 }
3230
3231 if (N->getValueType(0) != ResVT)
3232 return SDValue();
3233
3234 auto Concat = N->getOperand(0);
3235 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3236 return SDValue();
3237 EVT ConcatVT = Concat.getValueType();
3238 EVT SourceVT = Concat.getOperand(0).getValueType();
3239
3240 if (!IsZeroSplat(Concat.getOperand(1)))
3241 return SDValue();
3242
3243 if (ConversionOp == ISD::FP_ROUND) {
3244 bool IsF64ToF32 =
3245 ConcatVT == MVT::v4f64 && SourceVT == MVT::v2f64 && ResVT == MVT::v4f32;
3246 bool IsF32ToF16 =
3247 ConcatVT == MVT::v8f32 && SourceVT == MVT::v4f32 && ResVT == MVT::v8f16;
3248 if (!(IsF64ToF32 || IsF32ToF16))
3249 return SDValue();
3250 } else {
3251 if (ConcatVT != MVT::v4f64 || SourceVT != MVT::v2f64 || ResVT != MVT::v4i32)
3252 return SDValue();
3253 }
3254
3255 unsigned Op = GetWasmConversionOp(ConversionOp);
3256 return DAG.getNode(Op, SDLoc(N), ResVT, Concat.getOperand(0));
3257}
3258
3259// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3260static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3261 const SDLoc &DL, unsigned VectorWidth) {
3262 EVT VT = Vec.getValueType();
3263 EVT ElVT = VT.getVectorElementType();
3264 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3265 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3266 VT.getVectorNumElements() / Factor);
3267
3268 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3269 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3270 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3271
3272 // This is the index of the first element of the VectorWidth-bit chunk
3273 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3274 IdxVal &= ~(ElemsPerChunk - 1);
3275
3276 // If the input is a buildvector just emit a smaller one.
3277 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3278 return DAG.getBuildVector(ResultVT, DL,
3279 Vec->ops().slice(IdxVal, ElemsPerChunk));
3280
3281 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3282 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3283}
3284
3285// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3286// is the expected destination value type after recursion. In is the initial
3287// input. Note that the input should have enough leading zero bits to prevent
3288// NARROW_U from saturating results.
3290 SelectionDAG &DAG) {
3291 EVT SrcVT = In.getValueType();
3292
3293 // No truncation required, we might get here due to recursive calls.
3294 if (SrcVT == DstVT)
3295 return In;
3296
3297 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3298 unsigned NumElems = SrcVT.getVectorNumElements();
3299 if (!isPowerOf2_32(NumElems))
3300 return SDValue();
3301 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3302 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3303
3304 LLVMContext &Ctx = *DAG.getContext();
3305 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3306
3307 // Narrow to the largest type possible:
3308 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3309 EVT InVT = MVT::i16, OutVT = MVT::i8;
3310 if (SrcVT.getScalarSizeInBits() > 16) {
3311 InVT = MVT::i32;
3312 OutVT = MVT::i16;
3313 }
3314 unsigned SubSizeInBits = SrcSizeInBits / 2;
3315 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3316 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3317
3318 // Split lower/upper subvectors.
3319 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3320 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3321
3322 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3323 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3324 Lo = DAG.getBitcast(InVT, Lo);
3325 Hi = DAG.getBitcast(InVT, Hi);
3326 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3327 return DAG.getBitcast(DstVT, Res);
3328 }
3329
3330 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3331 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3332 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3333 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3334
3335 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3336 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3337 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3338}
3339
3342 auto &DAG = DCI.DAG;
3343
3344 SDValue In = N->getOperand(0);
3345 EVT InVT = In.getValueType();
3346 if (!InVT.isSimple())
3347 return SDValue();
3348
3349 EVT OutVT = N->getValueType(0);
3350 if (!OutVT.isVector())
3351 return SDValue();
3352
3353 EVT OutSVT = OutVT.getVectorElementType();
3354 EVT InSVT = InVT.getVectorElementType();
3355 // Currently only cover truncate to v16i8 or v8i16.
3356 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3357 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3358 return SDValue();
3359
3360 SDLoc DL(N);
3362 OutVT.getScalarSizeInBits());
3363 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3364 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3365}
3366
3369 using namespace llvm::SDPatternMatch;
3370 auto &DAG = DCI.DAG;
3371 SDLoc DL(N);
3372 SDValue Src = N->getOperand(0);
3373 EVT VT = N->getValueType(0);
3374 EVT SrcVT = Src.getValueType();
3375
3376 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3377 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3378 return SDValue();
3379
3380 unsigned NumElts = SrcVT.getVectorNumElements();
3381 EVT Width = MVT::getIntegerVT(128 / NumElts);
3382
3383 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3384 // ==> bitmask
3385 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3386 return DAG.getZExtOrTrunc(
3387 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3388 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3389 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3390 SrcVT.changeVectorElementType(
3391 *DAG.getContext(), Width))}),
3392 DL, VT);
3393 }
3394
3395 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3396 if (NumElts == 32 || NumElts == 64) {
3397 SDValue Concat, SetCCVector;
3398 ISD::CondCode SetCond;
3399
3400 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3401 m_CondCode(SetCond)))))
3402 return SDValue();
3403 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3404 return SDValue();
3405
3406 // Reconstruct the wide bitmask from each CONCAT_VECTORS operand.
3407 // Derive the per-chunk mask/integer types from the actual operand type
3408 // instead of hardcoding v16i1 / i16 for every chunk.
3409 EVT ConcatOperandVT = Concat.getOperand(0).getValueType();
3410 unsigned ConcatOperandNumElts = ConcatOperandVT.getVectorNumElements();
3411
3412 EVT ConcatOperandMaskVT =
3413 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
3414 ElementCount::getFixed(ConcatOperandNumElts));
3415 EVT ConcatOperandBitmaskVT =
3416 EVT::getIntegerVT(*DAG.getContext(), ConcatOperandNumElts);
3417 EVT ReturnVT = N->getValueType(0);
3418 SDValue ReconstructedBitmask = DAG.getConstant(0, DL, ReturnVT);
3419 // Example:
3420 // v32i16 = concat(v8i16, v8i16, v8i16, v8i16)
3421 // -> v8i1 + v8i1 + v8i1 + v8i1
3422 // -> i8 + i8 + i8 + i8
3423 // -> reconstructed i32 bitmask
3424 for (size_t I = 0; I < Concat->ops().size(); ++I) {
3425 SDValue ConcatOperand = Concat.getOperand(I);
3426 assert(ConcatOperand.getValueType() == ConcatOperandVT &&
3427 "concat_vectors operands must have the same type");
3428
3429 SDValue SetCCVectorOperand =
3430 extractSubVector(SetCCVector, I * ConcatOperandNumElts, DAG, DL, 128);
3431 if (!SetCCVectorOperand ||
3432 SetCCVectorOperand.getValueType() != ConcatOperandVT)
3433 return SDValue();
3434
3435 // Build the per-chunk mask using the correct chunk type:
3436 // v16i8 -> v16i1 -> i16
3437 // v8i16 -> v8i1 -> i8
3438 // v4i32 -> v4i1 -> i4
3439 // v2i64 -> v2i1 -> i2
3440 SDValue ConcatOperandMask = DAG.getSetCC(
3441 DL, ConcatOperandMaskVT, ConcatOperand, SetCCVectorOperand, SetCond);
3442 SDValue ConcatOperandBitmask =
3443 DAG.getBitcast(ConcatOperandBitmaskVT, ConcatOperandMask);
3444 SDValue ExtendedConcatOperandBitmask =
3445 DAG.getZExtOrTrunc(ConcatOperandBitmask, DL, ReturnVT);
3446
3447 // Shift the previously reconstructed bits to make room for this chunk.
3448 if (I != 0) {
3449 ReconstructedBitmask = DAG.getNode(
3450 ISD::SHL, DL, ReturnVT, ReconstructedBitmask,
3451 DAG.getShiftAmountConstant(ConcatOperandNumElts, ReturnVT, DL));
3452 }
3453
3454 // Merge disjoint partial bitmasks with OR.
3455 ReconstructedBitmask =
3456 DAG.getNode(ISD::OR, DL, ReturnVT, ReconstructedBitmask,
3457 ExtendedConcatOperandBitmask);
3458 }
3459
3460 return ReconstructedBitmask;
3461 }
3462
3463 return SDValue();
3464}
3465
3467 // bitmask (setcc <X>, 0, setlt) => bitmask X
3468 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3469 using namespace llvm::SDPatternMatch;
3470
3471 if (N->getConstantOperandVal(0) != Intrinsic::wasm_bitmask)
3472 return SDValue();
3473
3474 SDValue LHS;
3475 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3477 return SDValue();
3478
3479 SDLoc DL(N);
3480 return DAG.getNode(
3481 ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
3482 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), LHS});
3483}
3484
3486 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3487 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3488 // any_true (setcc <X>, 0, ne) => (any_true X)
3489 // all_true (setcc <X>, 0, ne) => (all_true X)
3490 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3491 using namespace llvm::SDPatternMatch;
3492
3493 SDValue LHS;
3494 if (N->getNumOperands() < 2 ||
3495 !sd_match(N->getOperand(1),
3497 return SDValue();
3498 EVT LT = LHS.getValueType();
3499 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3500 return SDValue();
3501
3502 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3503 ISD::CondCode SetType,
3504 Intrinsic::WASMIntrinsics InPost) {
3505 if (N->getConstantOperandVal(0) != InPre)
3506 return SDValue();
3507
3508 SDValue LHS;
3509 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3510 m_SpecificCondCode(SetType))))
3511 return SDValue();
3512
3513 SDLoc DL(N);
3514 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3515 {DAG.getConstant(InPost, DL, MVT::i32), LHS});
3516 if (SetType == ISD::SETEQ)
3517 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3518 DAG.getConstant(1, DL, MVT::i32));
3519 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3520 };
3521
3522 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3523 Intrinsic::wasm_alltrue))
3524 return AnyTrueEQ;
3525 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3526 Intrinsic::wasm_anytrue))
3527 return AllTrueEQ;
3528 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3529 Intrinsic::wasm_anytrue))
3530 return AnyTrueNE;
3531 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3532 Intrinsic::wasm_alltrue))
3533 return AllTrueNE;
3534
3535 return SDValue();
3536}
3537
3538template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3539 Intrinsic::ID Intrin>
3541 SDValue LHS = N->getOperand(0);
3542 SDValue RHS = N->getOperand(1);
3543 SDValue Cond = N->getOperand(2);
3544 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3545 return SDValue();
3546
3547 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3548 return SDValue();
3549
3550 SDLoc DL(N);
3551 SDValue Ret =
3552 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3553 {DAG.getConstant(Intrin, DL, MVT::i32),
3554 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)});
3555 if (RequiresNegate)
3556 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3557 DAG.getConstant(1, DL, MVT::i32));
3558 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3559}
3560
3561/// Try to convert a i128 comparison to a v16i8 comparison before type
3562/// legalization splits it up into chunks
3563static SDValue
3565 const WebAssemblySubtarget *Subtarget) {
3566
3567 SDLoc DL(N);
3568 SDValue X = N->getOperand(0);
3569 SDValue Y = N->getOperand(1);
3570 EVT VT = N->getValueType(0);
3571 EVT OpVT = X.getValueType();
3572
3573 SelectionDAG &DAG = DCI.DAG;
3575 Attribute::NoImplicitFloat))
3576 return SDValue();
3577
3578 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3579 // We're looking for an oversized integer equality comparison with SIMD
3580 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3581 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3582 return SDValue();
3583
3584 // Don't perform this combine if constructing the vector will be expensive.
3585 auto IsVectorBitCastCheap = [](SDValue X) {
3587 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3588 };
3589
3590 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3591 return SDValue();
3592
3593 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3594 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3595 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3596
3597 SDValue Intr =
3598 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3599 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3600 : Intrinsic::wasm_anytrue,
3601 DL, MVT::i32),
3602 Cmp});
3603
3604 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3605 ISD::SETNE);
3606}
3607
3610 const WebAssemblySubtarget *Subtarget) {
3611 if (!DCI.isBeforeLegalize())
3612 return SDValue();
3613
3614 EVT VT = N->getValueType(0);
3615 if (!VT.isScalarInteger())
3616 return SDValue();
3617
3618 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3619 return V;
3620
3621 SDValue LHS = N->getOperand(0);
3622 if (LHS->getOpcode() != ISD::BITCAST)
3623 return SDValue();
3624
3625 EVT FromVT = LHS->getOperand(0).getValueType();
3626 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3627 return SDValue();
3628
3629 unsigned NumElts = FromVT.getVectorNumElements();
3630 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3631 return SDValue();
3632
3633 if (!cast<ConstantSDNode>(N->getOperand(1)))
3634 return SDValue();
3635
3636 auto &DAG = DCI.DAG;
3637 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3638 MVT::getIntegerVT(128 / NumElts));
3639 // setcc (iN (bitcast (vNi1 X))), 0, ne
3640 // ==> any_true (vNi1 X)
3642 N, VecVT, DAG)) {
3643 return Match;
3644 }
3645 // setcc (iN (bitcast (vNi1 X))), 0, eq
3646 // ==> xor (any_true (vNi1 X)), -1
3648 N, VecVT, DAG)) {
3649 return Match;
3650 }
3651 // setcc (iN (bitcast (vNi1 X))), -1, eq
3652 // ==> all_true (vNi1 X)
3654 N, VecVT, DAG)) {
3655 return Match;
3656 }
3657 // setcc (iN (bitcast (vNi1 X))), -1, ne
3658 // ==> xor (all_true (vNi1 X)), -1
3660 N, VecVT, DAG)) {
3661 return Match;
3662 }
3663 return SDValue();
3664}
3665
3667 EVT VT = N->getValueType(0);
3668 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3669 return SDValue();
3670
3671 // Mul with extending inputs.
3672 SDValue LHS = N->getOperand(0);
3673 SDValue RHS = N->getOperand(1);
3674 if (LHS.getOpcode() != RHS.getOpcode())
3675 return SDValue();
3676
3677 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3678 LHS.getOpcode() != ISD::ZERO_EXTEND)
3679 return SDValue();
3680
3681 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3682 return SDValue();
3683
3684 EVT FromVT = LHS->getOperand(0).getValueType();
3685 EVT EltTy = FromVT.getVectorElementType();
3686 if (EltTy != MVT::i8)
3687 return SDValue();
3688
3689 // For an input DAG that looks like this
3690 // %a = input_type
3691 // %b = input_type
3692 // %lhs = extend %a to output_type
3693 // %rhs = extend %b to output_type
3694 // %mul = mul %lhs, %rhs
3695
3696 // input_type | output_type | instructions
3697 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3698 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3699 // | | %low_low = i32x4.ext_low_i16x8_ %low
3700 // | | %low_high = i32x4.ext_high_i16x8_ %low
3701 // | | %high_low = i32x4.ext_low_i16x8_ %high
3702 // | | %high_high = i32x4.ext_high_i16x8_ %high
3703 // | | %res = concat_vector(...)
3704 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3705 // | | %low_low = i32x4.ext_low_i16x8_ %low
3706 // | | %low_high = i32x4.ext_high_i16x8_ %low
3707 // | | %res = concat_vector(%low_low, %low_high)
3708
3709 SDLoc DL(N);
3710 unsigned NumElts = VT.getVectorNumElements();
3711 SDValue ExtendInLHS = LHS->getOperand(0);
3712 SDValue ExtendInRHS = RHS->getOperand(0);
3713 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3714 unsigned ExtendLowOpc =
3715 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3716 unsigned ExtendHighOpc =
3717 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3718
3719 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3720 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3721 };
3722 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3723 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3724 };
3725
3726 if (NumElts == 16) {
3727 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3728 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3729 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3730 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3731 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3732 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3733 SDValue SubVectors[] = {
3734 GetExtendLow(MVT::v4i32, MulLow),
3735 GetExtendHigh(MVT::v4i32, MulLow),
3736 GetExtendLow(MVT::v4i32, MulHigh),
3737 GetExtendHigh(MVT::v4i32, MulHigh),
3738 };
3739 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3740 } else {
3741 assert(NumElts == 8);
3742 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3743 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3744 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3745 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3746 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3747 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3748 }
3749 return SDValue();
3750}
3751
3754 assert(N->getOpcode() == ISD::MUL);
3755 EVT VT = N->getValueType(0);
3756 if (!VT.isVector())
3757 return SDValue();
3758
3759 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3760 return Res;
3761
3762 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3763 // extend them to v8i16.
3764 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3765 return SDValue();
3766
3767 SDLoc DL(N);
3768 SelectionDAG &DAG = DCI.DAG;
3769 SDValue LHS = N->getOperand(0);
3770 SDValue RHS = N->getOperand(1);
3771 EVT MulVT = MVT::v8i16;
3772
3773 if (VT == MVT::v8i8) {
3774 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3775 DAG.getUNDEF(MVT::v8i8));
3776 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3777 DAG.getUNDEF(MVT::v8i8));
3778 SDValue LowLHS =
3779 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3780 SDValue LowRHS =
3781 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3782 SDValue MulLow = DAG.getBitcast(
3783 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3784 // Take the low byte of each lane.
3785 SDValue Shuffle = DAG.getVectorShuffle(
3786 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3787 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3788 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3789 } else {
3790 assert(VT == MVT::v16i8 && "Expected v16i8");
3791 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3792 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3793 SDValue HighLHS =
3794 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3795 SDValue HighRHS =
3796 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3797
3798 SDValue MulLow =
3799 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3800 SDValue MulHigh =
3801 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3802
3803 // Take the low byte of each lane.
3804 return DAG.getVectorShuffle(
3805 VT, DL, MulLow, MulHigh,
3806 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3807 }
3808}
3809
3810SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3811 SelectionDAG &DAG) {
3812 SDLoc DL(In);
3813 LLVMContext &Ctx = *DAG.getContext();
3814 EVT InVT = In.getValueType();
3815 unsigned NumElems = InVT.getVectorNumElements() * 2;
3816 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3817 SDValue Concat =
3818 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3819 if (NumElems < RequiredNumElems) {
3820 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3821 }
3822 return Concat;
3823}
3824
3826 EVT OutVT = N->getValueType(0);
3827 if (!OutVT.isVector())
3828 return SDValue();
3829
3830 EVT OutElTy = OutVT.getVectorElementType();
3831 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3832 return SDValue();
3833
3834 unsigned NumElems = OutVT.getVectorNumElements();
3835 if (!isPowerOf2_32(NumElems))
3836 return SDValue();
3837
3838 EVT FPVT = N->getOperand(0)->getValueType(0);
3839 if (FPVT.getVectorElementType() != MVT::f32)
3840 return SDValue();
3841
3842 SDLoc DL(N);
3843
3844 // First, convert to i32.
3845 LLVMContext &Ctx = *DAG.getContext();
3846 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3847 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3849 OutVT.getScalarSizeInBits());
3850 // Mask out the top MSBs.
3851 SDValue Masked =
3852 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3853
3854 if (OutVT.getSizeInBits() < 128) {
3855 // Create a wide enough vector that we can use narrow.
3856 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3857 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3858 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3859 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3860 return DAG.getBitcast(
3861 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3862 } else {
3863 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3864 }
3865 return SDValue();
3866}
3867
3868// Wide vector shift operations such as v8i32 with sign-extended
3869// operands cause Type Legalizer crashes because the target-specific
3870// extension nodes cannot be directly mapped to the 256-bit size.
3871//
3872// To resolve the crash and optimize performance, we intercept the
3873// illegal v8i32 shift in DAGCombine. We convert the shift amounts
3874// into multipliers and manually split the vector into two v4i32 halves.
3875//
3876// Before: t1: v8i32 = shl (sign_extend v8i16), const_vec
3877// After : t2: v4i32 = mul (ext_low_s v8i16), (ext_low_s narrow_vec)
3878// t3: v4i32 = mul (ext_high_s v8i16), (ext_high_s narrow_vec)
3879// t4: v8i32 = concat_vectors t2, t3
3882 SelectionDAG &DAG = DCI.DAG;
3883 assert(N->getOpcode() == ISD::SHL);
3884 EVT VT = N->getValueType(0);
3885 if (VT != MVT::v8i32)
3886 return SDValue();
3887
3888 SDValue LHS = N->getOperand(0);
3889 SDValue RHS = N->getOperand(1);
3890 unsigned ExtOpc = LHS.getOpcode();
3891 if (ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND)
3892 return SDValue();
3893
3894 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3895 return SDValue();
3896
3897 SDLoc DL(N);
3898 SDValue ExtendIn = LHS.getOperand(0);
3899 EVT FromVT = ExtendIn.getValueType();
3900 if (FromVT != MVT::v8i16)
3901 return SDValue();
3902
3903 unsigned NumElts = VT.getVectorNumElements();
3904 unsigned BitWidth = FromVT.getScalarSizeInBits();
3905 bool IsSigned = (ExtOpc == ISD::SIGN_EXTEND);
3906 unsigned MaxValidShift = IsSigned ? (BitWidth - 1) : BitWidth;
3907 SmallVector<SDValue, 16> MulConsts;
3908 for (unsigned I = 0; I < NumElts; ++I) {
3909 auto *C = dyn_cast<ConstantSDNode>(RHS.getOperand(I));
3910 if (!C)
3911 return SDValue();
3912
3913 const APInt &ShiftAmt = C->getAPIntValue();
3914 if (ShiftAmt.uge(MaxValidShift))
3915 return SDValue();
3916
3917 APInt MulAmt = APInt::getOneBitSet(BitWidth, ShiftAmt.getZExtValue());
3918 MulConsts.push_back(DAG.getConstant(MulAmt, DL, FromVT.getScalarType(),
3919 /*isTarget=*/false, /*isOpaque=*/true));
3920 }
3921
3922 SDValue NarrowConst = DAG.getBuildVector(FromVT, DL, MulConsts);
3923 unsigned ExtLowOpc =
3924 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3925 unsigned ExtHighOpc =
3926 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3927
3928 EVT HalfVT = MVT::v4i32;
3929 SDValue LHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, ExtendIn);
3930 SDValue LHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, ExtendIn);
3931 SDValue RHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, NarrowConst);
3932 SDValue RHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, NarrowConst);
3933 SDValue MulLo = DAG.getNode(ISD::MUL, DL, HalfVT, LHSLo, RHSLo);
3934 SDValue MulHi = DAG.getNode(ISD::MUL, DL, HalfVT, LHSHi, RHSHi);
3935 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MulLo, MulHi);
3936}
3937
3938SDValue
3939WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3940 DAGCombinerInfo &DCI) const {
3941 switch (N->getOpcode()) {
3942 default:
3943 return SDValue();
3944 case ISD::BITCAST:
3945 return performBitcastCombine(N, DCI);
3946 case ISD::SETCC:
3947 return performSETCCCombine(N, DCI, Subtarget);
3949 return performVECTOR_SHUFFLECombine(N, DCI);
3950 case ISD::SIGN_EXTEND:
3951 case ISD::ZERO_EXTEND:
3952 return performVectorExtendCombine(N, DCI);
3953 case ISD::UINT_TO_FP:
3954 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3955 return ExtCombine;
3956 return performVectorNonNegToFPCombine(N, DCI);
3957 case ISD::SINT_TO_FP:
3958 return performVectorExtendToFPCombine(N, DCI);
3961 case ISD::FP_ROUND:
3963 return performVectorTruncZeroCombine(N, DCI);
3964 case ISD::FP_TO_SINT:
3965 case ISD::FP_TO_UINT:
3966 return performConvertFPCombine(N, DCI.DAG);
3967 case ISD::TRUNCATE:
3968 return performTruncateCombine(N, DCI);
3970 if (SDValue V = performBitmaskCombine(N, DCI.DAG))
3971 return V;
3972 return performAnyAllCombine(N, DCI.DAG);
3973 }
3974 case ISD::MUL:
3975 return performMulCombine(N, DCI);
3976 case ISD::SHL:
3977 return performShiftCombine(N, DCI);
3978 }
3979}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool callingConvSupported(CallingConv::ID CallConv)
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue performShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:713
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
auto m_Value()
Match an arbitrary value and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2087
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:460
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.