LLVM 18.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
139
142 }
143
144 // Set operations for LA32 only.
145
146 if (!Subtarget.is64Bit()) {
152
153 // Set libcalls.
154 setLibcallName(RTLIB::MUL_I128, nullptr);
155 // The MULO libcall is not part of libgcc, only compiler-rt.
156 setLibcallName(RTLIB::MULO_I64, nullptr);
157 }
158
159 // The MULO libcall is not part of libgcc, only compiler-rt.
160 setLibcallName(RTLIB::MULO_I128, nullptr);
161
163
164 static const ISD::CondCode FPCCToExpand[] = {
167
168 // Set operations for 'F' feature.
169
170 if (Subtarget.hasBasicF()) {
171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172
186
187 if (Subtarget.is64Bit())
189
190 if (!Subtarget.hasBasicD()) {
192 if (Subtarget.is64Bit()) {
195 }
196 }
197 }
198
199 // Set operations for 'D' feature.
200
201 if (Subtarget.hasBasicD()) {
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205
219
220 if (Subtarget.is64Bit())
222 }
223
224 // Set operations for 'LSX' feature.
225
226 if (Subtarget.hasExtLSX()) {
228 // Expand all truncating stores and extending loads.
229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230 setTruncStoreAction(VT, InnerVT, Expand);
233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234 }
235 // By default everything must be expanded. Then we will selectively turn
236 // on ones that can be effectively codegen'd.
237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
239 }
240
241 for (MVT VT : LSXVTs) {
245
249 }
250 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
254 Legal);
256 VT, Legal);
260 }
261 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
265 }
266 }
267
268 // Set operations for 'LASX' feature.
269
270 if (Subtarget.hasExtLASX()) {
271 for (MVT VT : LASXVTs) {
275
279 }
280 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
284 Legal);
286 VT, Legal);
290 }
291 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
295 }
296 }
297
298 // Set DAG combine for LA32 and LA64.
299
303
304 // Set DAG combine for 'LSX' feature.
305
306 if (Subtarget.hasExtLSX())
308
309 // Compute derived properties from the register classes.
311
313
315
317
319
320 // Function alignments.
322 // Set preferred alignments.
326}
327
329 const GlobalAddressSDNode *GA) const {
330 // In order to maximise the opportunity for common subexpression elimination,
331 // keep a separate ADD node for the global address offset instead of folding
332 // it in the global address node. Later peephole optimisations may choose to
333 // fold it back in when profitable.
334 return false;
335}
336
338 SelectionDAG &DAG) const {
339 switch (Op.getOpcode()) {
341 return lowerATOMIC_FENCE(Op, DAG);
343 return lowerEH_DWARF_CFA(Op, DAG);
345 return lowerGlobalAddress(Op, DAG);
347 return lowerGlobalTLSAddress(Op, DAG);
349 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
351 return lowerINTRINSIC_W_CHAIN(Op, DAG);
353 return lowerINTRINSIC_VOID(Op, DAG);
355 return lowerBlockAddress(Op, DAG);
356 case ISD::JumpTable:
357 return lowerJumpTable(Op, DAG);
358 case ISD::SHL_PARTS:
359 return lowerShiftLeftParts(Op, DAG);
360 case ISD::SRA_PARTS:
361 return lowerShiftRightParts(Op, DAG, true);
362 case ISD::SRL_PARTS:
363 return lowerShiftRightParts(Op, DAG, false);
365 return lowerConstantPool(Op, DAG);
366 case ISD::FP_TO_SINT:
367 return lowerFP_TO_SINT(Op, DAG);
368 case ISD::BITCAST:
369 return lowerBITCAST(Op, DAG);
370 case ISD::UINT_TO_FP:
371 return lowerUINT_TO_FP(Op, DAG);
372 case ISD::SINT_TO_FP:
373 return lowerSINT_TO_FP(Op, DAG);
374 case ISD::VASTART:
375 return lowerVASTART(Op, DAG);
376 case ISD::FRAMEADDR:
377 return lowerFRAMEADDR(Op, DAG);
378 case ISD::RETURNADDR:
379 return lowerRETURNADDR(Op, DAG);
381 return lowerWRITE_REGISTER(Op, DAG);
383 return lowerINSERT_VECTOR_ELT(Op, DAG);
385 return lowerBUILD_VECTOR(Op, DAG);
387 return lowerVECTOR_SHUFFLE(Op, DAG);
388 }
389 return SDValue();
390}
391
392SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
393 SelectionDAG &DAG) const {
394 // TODO: custom shuffle.
395 return SDValue();
396}
397
398static bool isConstantOrUndef(const SDValue Op) {
399 if (Op->isUndef())
400 return true;
401 if (isa<ConstantSDNode>(Op))
402 return true;
403 if (isa<ConstantFPSDNode>(Op))
404 return true;
405 return false;
406}
407
409 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
410 if (isConstantOrUndef(Op->getOperand(i)))
411 return true;
412 return false;
413}
414
415SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
416 SelectionDAG &DAG) const {
417 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
418 EVT ResTy = Op->getValueType(0);
419 SDLoc DL(Op);
420 APInt SplatValue, SplatUndef;
421 unsigned SplatBitSize;
422 bool HasAnyUndefs;
423 bool Is128Vec = ResTy.is128BitVector();
424 bool Is256Vec = ResTy.is256BitVector();
425
426 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
427 (!Subtarget.hasExtLASX() || !Is256Vec))
428 return SDValue();
429
430 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
431 /*MinSplatBits=*/8) &&
432 SplatBitSize <= 64) {
433 // We can only cope with 8, 16, 32, or 64-bit elements.
434 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
435 SplatBitSize != 64)
436 return SDValue();
437
438 EVT ViaVecTy;
439
440 switch (SplatBitSize) {
441 default:
442 return SDValue();
443 case 8:
444 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
445 break;
446 case 16:
447 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
448 break;
449 case 32:
450 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
451 break;
452 case 64:
453 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
454 break;
455 }
456
457 // SelectionDAG::getConstant will promote SplatValue appropriately.
458 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
459
460 // Bitcast to the type we originally wanted.
461 if (ViaVecTy != ResTy)
462 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
463
464 return Result;
465 }
466
467 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
468 return Op;
469
471 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
472 // The resulting code is the same length as the expansion, but it doesn't
473 // use memory operations.
474 EVT ResTy = Node->getValueType(0);
475
476 assert(ResTy.isVector());
477
478 unsigned NumElts = ResTy.getVectorNumElements();
479 SDValue Vector = DAG.getUNDEF(ResTy);
480 for (unsigned i = 0; i < NumElts; ++i) {
482 Node->getOperand(i),
483 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
484 }
485 return Vector;
486 }
487
488 return SDValue();
489}
490
492LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
493 SelectionDAG &DAG) const {
494 if (isa<ConstantSDNode>(Op->getOperand(2)))
495 return Op;
496 return SDValue();
497}
498
499SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
500 SelectionDAG &DAG) const {
501 SDLoc DL(Op);
502 SyncScope::ID FenceSSID =
503 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
504
505 // singlethread fences only synchronize with signal handlers on the same
506 // thread and thus only need to preserve instruction order, not actually
507 // enforce memory ordering.
508 if (FenceSSID == SyncScope::SingleThread)
509 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
510 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
511
512 return Op;
513}
514
515SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
516 SelectionDAG &DAG) const {
517
518 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
519 DAG.getContext()->emitError(
520 "On LA64, only 64-bit registers can be written.");
521 return Op.getOperand(0);
522 }
523
524 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
525 DAG.getContext()->emitError(
526 "On LA32, only 32-bit registers can be written.");
527 return Op.getOperand(0);
528 }
529
530 return Op;
531}
532
533SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
534 SelectionDAG &DAG) const {
535 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
536 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
537 "be a constant integer");
538 return SDValue();
539 }
540
543 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
544 EVT VT = Op.getValueType();
545 SDLoc DL(Op);
546 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
547 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
548 int GRLenInBytes = Subtarget.getGRLen() / 8;
549
550 while (Depth--) {
551 int Offset = -(GRLenInBytes * 2);
552 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
554 FrameAddr =
555 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
556 }
557 return FrameAddr;
558}
559
560SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
561 SelectionDAG &DAG) const {
563 return SDValue();
564
565 // Currently only support lowering return address for current frame.
566 if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0) {
567 DAG.getContext()->emitError(
568 "return address can only be determined for the current frame");
569 return SDValue();
570 }
571
574 MVT GRLenVT = Subtarget.getGRLenVT();
575
576 // Return the value of the return address register, marking it an implicit
577 // live-in.
578 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
579 getRegClassFor(GRLenVT));
580 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
581}
582
583SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
584 SelectionDAG &DAG) const {
586 auto Size = Subtarget.getGRLen() / 8;
587 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
588 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
589}
590
591SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
592 SelectionDAG &DAG) const {
594 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
595
596 SDLoc DL(Op);
597 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
599
600 // vastart just stores the address of the VarArgsFrameIndex slot into the
601 // memory location argument.
602 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
603 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
605}
606
607SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
608 SelectionDAG &DAG) const {
609 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
610 !Subtarget.hasBasicD() && "unexpected target features");
611
612 SDLoc DL(Op);
613 SDValue Op0 = Op.getOperand(0);
614 if (Op0->getOpcode() == ISD::AND) {
615 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
616 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
617 return Op;
618 }
619
620 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
621 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
622 Op0.getConstantOperandVal(2) == UINT64_C(0))
623 return Op;
624
625 if (Op0.getOpcode() == ISD::AssertZext &&
626 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
627 return Op;
628
629 EVT OpVT = Op0.getValueType();
630 EVT RetVT = Op.getValueType();
631 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
632 MakeLibCallOptions CallOptions;
633 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
634 SDValue Chain = SDValue();
636 std::tie(Result, Chain) =
637 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
638 return Result;
639}
640
641SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
642 SelectionDAG &DAG) const {
643 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
644 !Subtarget.hasBasicD() && "unexpected target features");
645
646 SDLoc DL(Op);
647 SDValue Op0 = Op.getOperand(0);
648
649 if ((Op0.getOpcode() == ISD::AssertSext ||
651 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
652 return Op;
653
654 EVT OpVT = Op0.getValueType();
655 EVT RetVT = Op.getValueType();
656 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
657 MakeLibCallOptions CallOptions;
658 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
659 SDValue Chain = SDValue();
661 std::tie(Result, Chain) =
662 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
663 return Result;
664}
665
666SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
667 SelectionDAG &DAG) const {
668
669 SDLoc DL(Op);
670 SDValue Op0 = Op.getOperand(0);
671
672 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
673 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
674 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
675 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
676 }
677 return Op;
678}
679
680SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
681 SelectionDAG &DAG) const {
682
683 SDLoc DL(Op);
684
685 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
686 !Subtarget.hasBasicD()) {
687 SDValue Dst =
688 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
689 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
690 }
691
692 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
693 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
694 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
695}
696
698 SelectionDAG &DAG, unsigned Flags) {
699 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
700}
701
703 SelectionDAG &DAG, unsigned Flags) {
704 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
705 Flags);
706}
707
709 SelectionDAG &DAG, unsigned Flags) {
710 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
711 N->getOffset(), Flags);
712}
713
715 SelectionDAG &DAG, unsigned Flags) {
716 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
717}
718
719template <class NodeTy>
720SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
721 bool IsLocal) const {
722 SDLoc DL(N);
723 EVT Ty = getPointerTy(DAG.getDataLayout());
724 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
725
726 switch (DAG.getTarget().getCodeModel()) {
727 default:
728 report_fatal_error("Unsupported code model");
729
730 case CodeModel::Large: {
731 assert(Subtarget.is64Bit() && "Large code model requires LA64");
732
733 // This is not actually used, but is necessary for successfully matching
734 // the PseudoLA_*_LARGE nodes.
735 SDValue Tmp = DAG.getConstant(0, DL, Ty);
736 if (IsLocal)
737 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
738 // eventually becomes the desired 5-insn code sequence.
739 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
740 Tmp, Addr),
741 0);
742
743 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
744 // becomes the desired 5-insn code sequence.
745 return SDValue(
746 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
747 0);
748 }
749
750 case CodeModel::Small:
752 if (IsLocal)
753 // This generates the pattern (PseudoLA_PCREL sym), which expands to
754 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
755 return SDValue(
756 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
757
758 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
759 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
760 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
761 0);
762 }
763}
764
765SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
766 SelectionDAG &DAG) const {
767 return getAddr(cast<BlockAddressSDNode>(Op), DAG);
768}
769
770SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
771 SelectionDAG &DAG) const {
772 return getAddr(cast<JumpTableSDNode>(Op), DAG);
773}
774
775SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
776 SelectionDAG &DAG) const {
777 return getAddr(cast<ConstantPoolSDNode>(Op), DAG);
778}
779
780SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
781 SelectionDAG &DAG) const {
782 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
783 assert(N->getOffset() == 0 && "unexpected offset in global node");
784 return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
785}
786
787SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
788 SelectionDAG &DAG,
789 unsigned Opc,
790 bool Large) const {
791 SDLoc DL(N);
792 EVT Ty = getPointerTy(DAG.getDataLayout());
793 MVT GRLenVT = Subtarget.getGRLenVT();
794
795 // This is not actually used, but is necessary for successfully matching the
796 // PseudoLA_*_LARGE nodes.
797 SDValue Tmp = DAG.getConstant(0, DL, Ty);
798 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
800 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
801 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
802
803 // Add the thread pointer.
804 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
805 DAG.getRegister(LoongArch::R2, GRLenVT));
806}
807
808SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
809 SelectionDAG &DAG,
810 unsigned Opc,
811 bool Large) const {
812 SDLoc DL(N);
813 EVT Ty = getPointerTy(DAG.getDataLayout());
814 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
815
816 // This is not actually used, but is necessary for successfully matching the
817 // PseudoLA_*_LARGE nodes.
818 SDValue Tmp = DAG.getConstant(0, DL, Ty);
819
820 // Use a PC-relative addressing mode to access the dynamic GOT address.
821 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
822 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
823 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
824
825 // Prepare argument list to generate call.
827 ArgListEntry Entry;
828 Entry.Node = Load;
829 Entry.Ty = CallTy;
830 Args.push_back(Entry);
831
832 // Setup call to __tls_get_addr.
834 CLI.setDebugLoc(DL)
835 .setChain(DAG.getEntryNode())
836 .setLibCallee(CallingConv::C, CallTy,
837 DAG.getExternalSymbol("__tls_get_addr", Ty),
838 std::move(Args));
839
840 return LowerCallTo(CLI).first;
841}
842
844LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
845 SelectionDAG &DAG) const {
848 report_fatal_error("In GHC calling convention TLS is not supported");
849
851 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
852
853 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
854 assert(N->getOffset() == 0 && "unexpected offset in global node");
855
857 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
859 // In this model, application code calls the dynamic linker function
860 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
861 // runtime.
862 Addr = getDynamicTLSAddr(N, DAG,
863 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
864 : LoongArch::PseudoLA_TLS_GD,
865 Large);
866 break;
868 // Same as GeneralDynamic, except for assembly modifiers and relocation
869 // records.
870 Addr = getDynamicTLSAddr(N, DAG,
871 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
872 : LoongArch::PseudoLA_TLS_LD,
873 Large);
874 break;
876 // This model uses the GOT to resolve TLS offsets.
877 Addr = getStaticTLSAddr(N, DAG,
878 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
879 : LoongArch::PseudoLA_TLS_IE,
880 Large);
881 break;
883 // This model is used when static linking as the TLS offsets are resolved
884 // during program linking.
885 //
886 // This node doesn't need an extra argument for the large code model.
887 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
888 break;
889 }
890
891 return Addr;
892}
893
894template <unsigned N>
895static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
896 SelectionDAG &DAG, bool IsSigned = false) {
897 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
898 // Check the ImmArg.
899 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
900 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
901 DAG.getContext()->emitError(Op->getOperationName(0) +
902 ": argument out of range.");
903 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
904 }
905 return SDValue();
906}
907
909LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
910 SelectionDAG &DAG) const {
911 SDLoc DL(Op);
912 switch (Op.getConstantOperandVal(0)) {
913 default:
914 return SDValue(); // Don't custom lower most intrinsics.
915 case Intrinsic::thread_pointer: {
916 EVT PtrVT = getPointerTy(DAG.getDataLayout());
917 return DAG.getRegister(LoongArch::R2, PtrVT);
918 }
919 case Intrinsic::loongarch_lsx_vpickve2gr_d:
920 case Intrinsic::loongarch_lsx_vpickve2gr_du:
921 case Intrinsic::loongarch_lsx_vreplvei_d:
922 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
923 return checkIntrinsicImmArg<1>(Op, 2, DAG);
924 case Intrinsic::loongarch_lsx_vreplvei_w:
925 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
926 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
927 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
928 case Intrinsic::loongarch_lasx_xvpickve_d:
929 case Intrinsic::loongarch_lasx_xvpickve_d_f:
930 return checkIntrinsicImmArg<2>(Op, 2, DAG);
931 case Intrinsic::loongarch_lasx_xvinsve0_d:
932 return checkIntrinsicImmArg<2>(Op, 3, DAG);
933 case Intrinsic::loongarch_lsx_vsat_b:
934 case Intrinsic::loongarch_lsx_vsat_bu:
935 case Intrinsic::loongarch_lsx_vrotri_b:
936 case Intrinsic::loongarch_lsx_vsllwil_h_b:
937 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
938 case Intrinsic::loongarch_lsx_vsrlri_b:
939 case Intrinsic::loongarch_lsx_vsrari_b:
940 case Intrinsic::loongarch_lsx_vreplvei_h:
941 case Intrinsic::loongarch_lasx_xvsat_b:
942 case Intrinsic::loongarch_lasx_xvsat_bu:
943 case Intrinsic::loongarch_lasx_xvrotri_b:
944 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
945 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
946 case Intrinsic::loongarch_lasx_xvsrlri_b:
947 case Intrinsic::loongarch_lasx_xvsrari_b:
948 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
949 case Intrinsic::loongarch_lasx_xvpickve_w:
950 case Intrinsic::loongarch_lasx_xvpickve_w_f:
951 return checkIntrinsicImmArg<3>(Op, 2, DAG);
952 case Intrinsic::loongarch_lasx_xvinsve0_w:
953 return checkIntrinsicImmArg<3>(Op, 3, DAG);
954 case Intrinsic::loongarch_lsx_vsat_h:
955 case Intrinsic::loongarch_lsx_vsat_hu:
956 case Intrinsic::loongarch_lsx_vrotri_h:
957 case Intrinsic::loongarch_lsx_vsllwil_w_h:
958 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
959 case Intrinsic::loongarch_lsx_vsrlri_h:
960 case Intrinsic::loongarch_lsx_vsrari_h:
961 case Intrinsic::loongarch_lsx_vreplvei_b:
962 case Intrinsic::loongarch_lasx_xvsat_h:
963 case Intrinsic::loongarch_lasx_xvsat_hu:
964 case Intrinsic::loongarch_lasx_xvrotri_h:
965 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
966 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
967 case Intrinsic::loongarch_lasx_xvsrlri_h:
968 case Intrinsic::loongarch_lasx_xvsrari_h:
969 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
970 return checkIntrinsicImmArg<4>(Op, 2, DAG);
971 case Intrinsic::loongarch_lsx_vsrlni_b_h:
972 case Intrinsic::loongarch_lsx_vsrani_b_h:
973 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
974 case Intrinsic::loongarch_lsx_vsrarni_b_h:
975 case Intrinsic::loongarch_lsx_vssrlni_b_h:
976 case Intrinsic::loongarch_lsx_vssrani_b_h:
977 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
978 case Intrinsic::loongarch_lsx_vssrani_bu_h:
979 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
980 case Intrinsic::loongarch_lsx_vssrarni_b_h:
981 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
982 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
983 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
984 case Intrinsic::loongarch_lasx_xvsrani_b_h:
985 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
986 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
987 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
988 case Intrinsic::loongarch_lasx_xvssrani_b_h:
989 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
990 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
991 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
992 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
993 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
994 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
995 return checkIntrinsicImmArg<4>(Op, 3, DAG);
996 case Intrinsic::loongarch_lsx_vsat_w:
997 case Intrinsic::loongarch_lsx_vsat_wu:
998 case Intrinsic::loongarch_lsx_vrotri_w:
999 case Intrinsic::loongarch_lsx_vsllwil_d_w:
1000 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1001 case Intrinsic::loongarch_lsx_vsrlri_w:
1002 case Intrinsic::loongarch_lsx_vsrari_w:
1003 case Intrinsic::loongarch_lsx_vslei_bu:
1004 case Intrinsic::loongarch_lsx_vslei_hu:
1005 case Intrinsic::loongarch_lsx_vslei_wu:
1006 case Intrinsic::loongarch_lsx_vslei_du:
1007 case Intrinsic::loongarch_lsx_vslti_bu:
1008 case Intrinsic::loongarch_lsx_vslti_hu:
1009 case Intrinsic::loongarch_lsx_vslti_wu:
1010 case Intrinsic::loongarch_lsx_vslti_du:
1011 case Intrinsic::loongarch_lsx_vbsll_v:
1012 case Intrinsic::loongarch_lsx_vbsrl_v:
1013 case Intrinsic::loongarch_lasx_xvsat_w:
1014 case Intrinsic::loongarch_lasx_xvsat_wu:
1015 case Intrinsic::loongarch_lasx_xvrotri_w:
1016 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1017 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1018 case Intrinsic::loongarch_lasx_xvsrlri_w:
1019 case Intrinsic::loongarch_lasx_xvsrari_w:
1020 case Intrinsic::loongarch_lasx_xvslei_bu:
1021 case Intrinsic::loongarch_lasx_xvslei_hu:
1022 case Intrinsic::loongarch_lasx_xvslei_wu:
1023 case Intrinsic::loongarch_lasx_xvslei_du:
1024 case Intrinsic::loongarch_lasx_xvslti_bu:
1025 case Intrinsic::loongarch_lasx_xvslti_hu:
1026 case Intrinsic::loongarch_lasx_xvslti_wu:
1027 case Intrinsic::loongarch_lasx_xvslti_du:
1028 case Intrinsic::loongarch_lasx_xvbsll_v:
1029 case Intrinsic::loongarch_lasx_xvbsrl_v:
1030 return checkIntrinsicImmArg<5>(Op, 2, DAG);
1031 case Intrinsic::loongarch_lsx_vseqi_b:
1032 case Intrinsic::loongarch_lsx_vseqi_h:
1033 case Intrinsic::loongarch_lsx_vseqi_w:
1034 case Intrinsic::loongarch_lsx_vseqi_d:
1035 case Intrinsic::loongarch_lsx_vslei_b:
1036 case Intrinsic::loongarch_lsx_vslei_h:
1037 case Intrinsic::loongarch_lsx_vslei_w:
1038 case Intrinsic::loongarch_lsx_vslei_d:
1039 case Intrinsic::loongarch_lsx_vslti_b:
1040 case Intrinsic::loongarch_lsx_vslti_h:
1041 case Intrinsic::loongarch_lsx_vslti_w:
1042 case Intrinsic::loongarch_lsx_vslti_d:
1043 case Intrinsic::loongarch_lasx_xvseqi_b:
1044 case Intrinsic::loongarch_lasx_xvseqi_h:
1045 case Intrinsic::loongarch_lasx_xvseqi_w:
1046 case Intrinsic::loongarch_lasx_xvseqi_d:
1047 case Intrinsic::loongarch_lasx_xvslei_b:
1048 case Intrinsic::loongarch_lasx_xvslei_h:
1049 case Intrinsic::loongarch_lasx_xvslei_w:
1050 case Intrinsic::loongarch_lasx_xvslei_d:
1051 case Intrinsic::loongarch_lasx_xvslti_b:
1052 case Intrinsic::loongarch_lasx_xvslti_h:
1053 case Intrinsic::loongarch_lasx_xvslti_w:
1054 case Intrinsic::loongarch_lasx_xvslti_d:
1055 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1056 case Intrinsic::loongarch_lsx_vsrlni_h_w:
1057 case Intrinsic::loongarch_lsx_vsrani_h_w:
1058 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1059 case Intrinsic::loongarch_lsx_vsrarni_h_w:
1060 case Intrinsic::loongarch_lsx_vssrlni_h_w:
1061 case Intrinsic::loongarch_lsx_vssrani_h_w:
1062 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1063 case Intrinsic::loongarch_lsx_vssrani_hu_w:
1064 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1065 case Intrinsic::loongarch_lsx_vssrarni_h_w:
1066 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1067 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1068 case Intrinsic::loongarch_lsx_vfrstpi_b:
1069 case Intrinsic::loongarch_lsx_vfrstpi_h:
1070 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1071 case Intrinsic::loongarch_lasx_xvsrani_h_w:
1072 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1073 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1074 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1075 case Intrinsic::loongarch_lasx_xvssrani_h_w:
1076 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1077 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1078 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1079 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1080 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1081 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1082 case Intrinsic::loongarch_lasx_xvfrstpi_b:
1083 case Intrinsic::loongarch_lasx_xvfrstpi_h:
1084 return checkIntrinsicImmArg<5>(Op, 3, DAG);
1085 case Intrinsic::loongarch_lsx_vsat_d:
1086 case Intrinsic::loongarch_lsx_vsat_du:
1087 case Intrinsic::loongarch_lsx_vrotri_d:
1088 case Intrinsic::loongarch_lsx_vsrlri_d:
1089 case Intrinsic::loongarch_lsx_vsrari_d:
1090 case Intrinsic::loongarch_lasx_xvsat_d:
1091 case Intrinsic::loongarch_lasx_xvsat_du:
1092 case Intrinsic::loongarch_lasx_xvrotri_d:
1093 case Intrinsic::loongarch_lasx_xvsrlri_d:
1094 case Intrinsic::loongarch_lasx_xvsrari_d:
1095 return checkIntrinsicImmArg<6>(Op, 2, DAG);
1096 case Intrinsic::loongarch_lsx_vsrlni_w_d:
1097 case Intrinsic::loongarch_lsx_vsrani_w_d:
1098 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1099 case Intrinsic::loongarch_lsx_vsrarni_w_d:
1100 case Intrinsic::loongarch_lsx_vssrlni_w_d:
1101 case Intrinsic::loongarch_lsx_vssrani_w_d:
1102 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1103 case Intrinsic::loongarch_lsx_vssrani_wu_d:
1104 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1105 case Intrinsic::loongarch_lsx_vssrarni_w_d:
1106 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1107 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1108 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1109 case Intrinsic::loongarch_lasx_xvsrani_w_d:
1110 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1111 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1112 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1113 case Intrinsic::loongarch_lasx_xvssrani_w_d:
1114 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1115 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1116 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1117 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1118 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1119 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1120 return checkIntrinsicImmArg<6>(Op, 3, DAG);
1121 case Intrinsic::loongarch_lsx_vsrlni_d_q:
1122 case Intrinsic::loongarch_lsx_vsrani_d_q:
1123 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1124 case Intrinsic::loongarch_lsx_vsrarni_d_q:
1125 case Intrinsic::loongarch_lsx_vssrlni_d_q:
1126 case Intrinsic::loongarch_lsx_vssrani_d_q:
1127 case Intrinsic::loongarch_lsx_vssrlni_du_q:
1128 case Intrinsic::loongarch_lsx_vssrani_du_q:
1129 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1130 case Intrinsic::loongarch_lsx_vssrarni_d_q:
1131 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1132 case Intrinsic::loongarch_lsx_vssrarni_du_q:
1133 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1134 case Intrinsic::loongarch_lasx_xvsrani_d_q:
1135 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1136 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1137 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1138 case Intrinsic::loongarch_lasx_xvssrani_d_q:
1139 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1140 case Intrinsic::loongarch_lasx_xvssrani_du_q:
1141 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1142 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1143 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1144 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1145 return checkIntrinsicImmArg<7>(Op, 3, DAG);
1146 case Intrinsic::loongarch_lsx_vnori_b:
1147 case Intrinsic::loongarch_lsx_vshuf4i_b:
1148 case Intrinsic::loongarch_lsx_vshuf4i_h:
1149 case Intrinsic::loongarch_lsx_vshuf4i_w:
1150 case Intrinsic::loongarch_lasx_xvnori_b:
1151 case Intrinsic::loongarch_lasx_xvshuf4i_b:
1152 case Intrinsic::loongarch_lasx_xvshuf4i_h:
1153 case Intrinsic::loongarch_lasx_xvshuf4i_w:
1154 case Intrinsic::loongarch_lasx_xvpermi_d:
1155 return checkIntrinsicImmArg<8>(Op, 2, DAG);
1156 case Intrinsic::loongarch_lsx_vshuf4i_d:
1157 case Intrinsic::loongarch_lsx_vpermi_w:
1158 case Intrinsic::loongarch_lsx_vbitseli_b:
1159 case Intrinsic::loongarch_lsx_vextrins_b:
1160 case Intrinsic::loongarch_lsx_vextrins_h:
1161 case Intrinsic::loongarch_lsx_vextrins_w:
1162 case Intrinsic::loongarch_lsx_vextrins_d:
1163 case Intrinsic::loongarch_lasx_xvshuf4i_d:
1164 case Intrinsic::loongarch_lasx_xvpermi_w:
1165 case Intrinsic::loongarch_lasx_xvpermi_q:
1166 case Intrinsic::loongarch_lasx_xvbitseli_b:
1167 case Intrinsic::loongarch_lasx_xvextrins_b:
1168 case Intrinsic::loongarch_lasx_xvextrins_h:
1169 case Intrinsic::loongarch_lasx_xvextrins_w:
1170 case Intrinsic::loongarch_lasx_xvextrins_d:
1171 return checkIntrinsicImmArg<8>(Op, 3, DAG);
1172 case Intrinsic::loongarch_lsx_vrepli_b:
1173 case Intrinsic::loongarch_lsx_vrepli_h:
1174 case Intrinsic::loongarch_lsx_vrepli_w:
1175 case Intrinsic::loongarch_lsx_vrepli_d:
1176 case Intrinsic::loongarch_lasx_xvrepli_b:
1177 case Intrinsic::loongarch_lasx_xvrepli_h:
1178 case Intrinsic::loongarch_lasx_xvrepli_w:
1179 case Intrinsic::loongarch_lasx_xvrepli_d:
1180 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1181 case Intrinsic::loongarch_lsx_vldi:
1182 case Intrinsic::loongarch_lasx_xvldi:
1183 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1184 }
1185}
1186
1187// Helper function that emits error message for intrinsics with chain and return
1188// merge values of a UNDEF and the chain.
1190 StringRef ErrorMsg,
1191 SelectionDAG &DAG) {
1192 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1193 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1194 SDLoc(Op));
1195}
1196
1197SDValue
1198LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1199 SelectionDAG &DAG) const {
1200 SDLoc DL(Op);
1201 MVT GRLenVT = Subtarget.getGRLenVT();
1202 EVT VT = Op.getValueType();
1203 SDValue Chain = Op.getOperand(0);
1204 const StringRef ErrorMsgOOR = "argument out of range";
1205 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1206 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1207
1208 switch (Op.getConstantOperandVal(1)) {
1209 default:
1210 return Op;
1211 case Intrinsic::loongarch_crc_w_b_w:
1212 case Intrinsic::loongarch_crc_w_h_w:
1213 case Intrinsic::loongarch_crc_w_w_w:
1214 case Intrinsic::loongarch_crc_w_d_w:
1215 case Intrinsic::loongarch_crcc_w_b_w:
1216 case Intrinsic::loongarch_crcc_w_h_w:
1217 case Intrinsic::loongarch_crcc_w_w_w:
1218 case Intrinsic::loongarch_crcc_w_d_w:
1219 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1220 case Intrinsic::loongarch_csrrd_w:
1221 case Intrinsic::loongarch_csrrd_d: {
1222 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
1223 return !isUInt<14>(Imm)
1224 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1225 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1226 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1227 }
1228 case Intrinsic::loongarch_csrwr_w:
1229 case Intrinsic::loongarch_csrwr_d: {
1230 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1231 return !isUInt<14>(Imm)
1232 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1233 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1234 {Chain, Op.getOperand(2),
1235 DAG.getConstant(Imm, DL, GRLenVT)});
1236 }
1237 case Intrinsic::loongarch_csrxchg_w:
1238 case Intrinsic::loongarch_csrxchg_d: {
1239 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
1240 return !isUInt<14>(Imm)
1241 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1242 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1243 {Chain, Op.getOperand(2), Op.getOperand(3),
1244 DAG.getConstant(Imm, DL, GRLenVT)});
1245 }
1246 case Intrinsic::loongarch_iocsrrd_d: {
1247 return DAG.getNode(
1248 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1249 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1250 }
1251#define IOCSRRD_CASE(NAME, NODE) \
1252 case Intrinsic::loongarch_##NAME: { \
1253 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1254 {Chain, Op.getOperand(2)}); \
1255 }
1256 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1257 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1258 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1259#undef IOCSRRD_CASE
1260 case Intrinsic::loongarch_cpucfg: {
1261 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1262 {Chain, Op.getOperand(2)});
1263 }
1264 case Intrinsic::loongarch_lddir_d: {
1265 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1266 return !isUInt<8>(Imm)
1267 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1268 : Op;
1269 }
1270 case Intrinsic::loongarch_movfcsr2gr: {
1271 if (!Subtarget.hasBasicF())
1272 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1273 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
1274 return !isUInt<2>(Imm)
1275 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1276 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1277 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1278 }
1279 case Intrinsic::loongarch_lsx_vld:
1280 case Intrinsic::loongarch_lsx_vldrepl_b:
1281 case Intrinsic::loongarch_lasx_xvld:
1282 case Intrinsic::loongarch_lasx_xvldrepl_b:
1283 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1284 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1285 : SDValue();
1286 case Intrinsic::loongarch_lsx_vldrepl_h:
1287 case Intrinsic::loongarch_lasx_xvldrepl_h:
1288 return !isShiftedInt<11, 1>(
1289 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1291 Op, "argument out of range or not a multiple of 2", DAG)
1292 : SDValue();
1293 case Intrinsic::loongarch_lsx_vldrepl_w:
1294 case Intrinsic::loongarch_lasx_xvldrepl_w:
1295 return !isShiftedInt<10, 2>(
1296 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1298 Op, "argument out of range or not a multiple of 4", DAG)
1299 : SDValue();
1300 case Intrinsic::loongarch_lsx_vldrepl_d:
1301 case Intrinsic::loongarch_lasx_xvldrepl_d:
1302 return !isShiftedInt<9, 3>(
1303 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1305 Op, "argument out of range or not a multiple of 8", DAG)
1306 : SDValue();
1307 }
1308}
1309
1310// Helper function that emits error message for intrinsics with void return
1311// value and return the chain.
1313 SelectionDAG &DAG) {
1314
1315 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1316 return Op.getOperand(0);
1317}
1318
1319SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1320 SelectionDAG &DAG) const {
1321 SDLoc DL(Op);
1322 MVT GRLenVT = Subtarget.getGRLenVT();
1323 SDValue Chain = Op.getOperand(0);
1324 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1325 SDValue Op2 = Op.getOperand(2);
1326 const StringRef ErrorMsgOOR = "argument out of range";
1327 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1328 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1329 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1330
1331 switch (IntrinsicEnum) {
1332 default:
1333 // TODO: Add more Intrinsics.
1334 return SDValue();
1335 case Intrinsic::loongarch_cacop_d:
1336 case Intrinsic::loongarch_cacop_w: {
1337 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1338 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1339 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1340 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1341 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1342 unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
1343 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1344 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1345 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1346 return Op;
1347 }
1348 case Intrinsic::loongarch_dbar: {
1349 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1350 return !isUInt<15>(Imm)
1351 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1352 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1353 DAG.getConstant(Imm, DL, GRLenVT));
1354 }
1355 case Intrinsic::loongarch_ibar: {
1356 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1357 return !isUInt<15>(Imm)
1358 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1359 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1360 DAG.getConstant(Imm, DL, GRLenVT));
1361 }
1362 case Intrinsic::loongarch_break: {
1363 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1364 return !isUInt<15>(Imm)
1365 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1366 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1367 DAG.getConstant(Imm, DL, GRLenVT));
1368 }
1369 case Intrinsic::loongarch_movgr2fcsr: {
1370 if (!Subtarget.hasBasicF())
1371 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1372 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1373 return !isUInt<2>(Imm)
1374 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1375 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1376 DAG.getConstant(Imm, DL, GRLenVT),
1377 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1378 Op.getOperand(3)));
1379 }
1380 case Intrinsic::loongarch_syscall: {
1381 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1382 return !isUInt<15>(Imm)
1383 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1384 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1385 DAG.getConstant(Imm, DL, GRLenVT));
1386 }
1387#define IOCSRWR_CASE(NAME, NODE) \
1388 case Intrinsic::loongarch_##NAME: { \
1389 SDValue Op3 = Op.getOperand(3); \
1390 return Subtarget.is64Bit() \
1391 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1392 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1393 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1394 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1395 Op3); \
1396 }
1397 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1398 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1399 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1400#undef IOCSRWR_CASE
1401 case Intrinsic::loongarch_iocsrwr_d: {
1402 return !Subtarget.is64Bit()
1403 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1404 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1405 Op2,
1406 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1407 Op.getOperand(3)));
1408 }
1409#define ASRT_LE_GT_CASE(NAME) \
1410 case Intrinsic::loongarch_##NAME: { \
1411 return !Subtarget.is64Bit() \
1412 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1413 : Op; \
1414 }
1415 ASRT_LE_GT_CASE(asrtle_d)
1416 ASRT_LE_GT_CASE(asrtgt_d)
1417#undef ASRT_LE_GT_CASE
1418 case Intrinsic::loongarch_ldpte_d: {
1419 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
1420 return !Subtarget.is64Bit()
1421 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1422 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1423 : Op;
1424 }
1425 case Intrinsic::loongarch_lsx_vst:
1426 case Intrinsic::loongarch_lasx_xvst:
1427 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1428 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1429 : SDValue();
1430 case Intrinsic::loongarch_lasx_xvstelm_b:
1431 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1432 !isUInt<5>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1433 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1434 : SDValue();
1435 case Intrinsic::loongarch_lsx_vstelm_b:
1436 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1437 !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1438 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1439 : SDValue();
1440 case Intrinsic::loongarch_lasx_xvstelm_h:
1441 return (!isShiftedInt<8, 1>(
1442 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1443 !isUInt<4>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1445 Op, "argument out of range or not a multiple of 2", DAG)
1446 : SDValue();
1447 case Intrinsic::loongarch_lsx_vstelm_h:
1448 return (!isShiftedInt<8, 1>(
1449 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1450 !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1452 Op, "argument out of range or not a multiple of 2", DAG)
1453 : SDValue();
1454 case Intrinsic::loongarch_lasx_xvstelm_w:
1455 return (!isShiftedInt<8, 2>(
1456 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1457 !isUInt<3>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1459 Op, "argument out of range or not a multiple of 4", DAG)
1460 : SDValue();
1461 case Intrinsic::loongarch_lsx_vstelm_w:
1462 return (!isShiftedInt<8, 2>(
1463 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1464 !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1466 Op, "argument out of range or not a multiple of 4", DAG)
1467 : SDValue();
1468 case Intrinsic::loongarch_lasx_xvstelm_d:
1469 return (!isShiftedInt<8, 3>(
1470 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1471 !isUInt<2>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1473 Op, "argument out of range or not a multiple of 8", DAG)
1474 : SDValue();
1475 case Intrinsic::loongarch_lsx_vstelm_d:
1476 return (!isShiftedInt<8, 3>(
1477 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1478 !isUInt<1>(cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue()))
1480 Op, "argument out of range or not a multiple of 8", DAG)
1481 : SDValue();
1482 }
1483}
1484
1485SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1486 SelectionDAG &DAG) const {
1487 SDLoc DL(Op);
1488 SDValue Lo = Op.getOperand(0);
1489 SDValue Hi = Op.getOperand(1);
1490 SDValue Shamt = Op.getOperand(2);
1491 EVT VT = Lo.getValueType();
1492
1493 // if Shamt-GRLen < 0: // Shamt < GRLen
1494 // Lo = Lo << Shamt
1495 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1496 // else:
1497 // Lo = 0
1498 // Hi = Lo << (Shamt-GRLen)
1499
1500 SDValue Zero = DAG.getConstant(0, DL, VT);
1501 SDValue One = DAG.getConstant(1, DL, VT);
1502 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1503 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1504 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1505 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1506
1507 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1508 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1509 SDValue ShiftRightLo =
1510 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1511 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1512 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1513 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1514
1515 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1516
1517 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1518 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1519
1520 SDValue Parts[2] = {Lo, Hi};
1521 return DAG.getMergeValues(Parts, DL);
1522}
1523
1524SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1525 SelectionDAG &DAG,
1526 bool IsSRA) const {
1527 SDLoc DL(Op);
1528 SDValue Lo = Op.getOperand(0);
1529 SDValue Hi = Op.getOperand(1);
1530 SDValue Shamt = Op.getOperand(2);
1531 EVT VT = Lo.getValueType();
1532
1533 // SRA expansion:
1534 // if Shamt-GRLen < 0: // Shamt < GRLen
1535 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1536 // Hi = Hi >>s Shamt
1537 // else:
1538 // Lo = Hi >>s (Shamt-GRLen);
1539 // Hi = Hi >>s (GRLen-1)
1540 //
1541 // SRL expansion:
1542 // if Shamt-GRLen < 0: // Shamt < GRLen
1543 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1544 // Hi = Hi >>u Shamt
1545 // else:
1546 // Lo = Hi >>u (Shamt-GRLen);
1547 // Hi = 0;
1548
1549 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1550
1551 SDValue Zero = DAG.getConstant(0, DL, VT);
1552 SDValue One = DAG.getConstant(1, DL, VT);
1553 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1554 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1555 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1556 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1557
1558 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1559 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1560 SDValue ShiftLeftHi =
1561 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1562 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1563 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1564 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1565 SDValue HiFalse =
1566 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1567
1568 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1569
1570 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1571 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1572
1573 SDValue Parts[2] = {Lo, Hi};
1574 return DAG.getMergeValues(Parts, DL);
1575}
1576
1577// Returns the opcode of the target-specific SDNode that implements the 32-bit
1578// form of the given Opcode.
1580 switch (Opcode) {
1581 default:
1582 llvm_unreachable("Unexpected opcode");
1583 case ISD::SHL:
1584 return LoongArchISD::SLL_W;
1585 case ISD::SRA:
1586 return LoongArchISD::SRA_W;
1587 case ISD::SRL:
1588 return LoongArchISD::SRL_W;
1589 case ISD::ROTR:
1590 return LoongArchISD::ROTR_W;
1591 case ISD::ROTL:
1592 return LoongArchISD::ROTL_W;
1593 case ISD::CTTZ:
1594 return LoongArchISD::CTZ_W;
1595 case ISD::CTLZ:
1596 return LoongArchISD::CLZ_W;
1597 }
1598}
1599
1600// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1601// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1602// otherwise be promoted to i64, making it difficult to select the
1603// SLL_W/.../*W later one because the fact the operation was originally of
1604// type i8/i16/i32 is lost.
1606 unsigned ExtOpc = ISD::ANY_EXTEND) {
1607 SDLoc DL(N);
1608 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1609 SDValue NewOp0, NewRes;
1610
1611 switch (NumOp) {
1612 default:
1613 llvm_unreachable("Unexpected NumOp");
1614 case 1: {
1615 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1616 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1617 break;
1618 }
1619 case 2: {
1620 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1621 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1622 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1623 break;
1624 }
1625 // TODO:Handle more NumOp.
1626 }
1627
1628 // ReplaceNodeResults requires we maintain the same type for the return
1629 // value.
1630 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1631}
1632
1633// Helper function that emits error message for intrinsics with/without chain
1634// and return a UNDEF or and the chain as the results.
1637 StringRef ErrorMsg, bool WithChain = true) {
1638 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1639 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1640 if (!WithChain)
1641 return;
1642 Results.push_back(N->getOperand(0));
1643}
1644
1645template <unsigned N>
1646static void
1648 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1649 unsigned ResOp) {
1650 const StringRef ErrorMsgOOR = "argument out of range";
1651 unsigned Imm = cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue();
1652 if (!isUInt<N>(Imm)) {
1654 /*WithChain=*/false);
1655 return;
1656 }
1657 SDLoc DL(Node);
1658 SDValue Vec = Node->getOperand(1);
1659
1660 SDValue PickElt =
1661 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1662 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1664 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1665 PickElt.getValue(0)));
1666}
1667
1670 SelectionDAG &DAG,
1671 const LoongArchSubtarget &Subtarget,
1672 unsigned ResOp) {
1673 SDLoc DL(N);
1674 SDValue Vec = N->getOperand(1);
1675
1676 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1677 Results.push_back(
1678 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1679}
1680
1681static void
1683 SelectionDAG &DAG,
1684 const LoongArchSubtarget &Subtarget) {
1685 switch (N->getConstantOperandVal(0)) {
1686 default:
1687 llvm_unreachable("Unexpected Intrinsic.");
1688 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1689 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1691 break;
1692 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1693 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1694 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1696 break;
1697 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1698 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1700 break;
1701 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1702 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1704 break;
1705 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1706 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1707 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1709 break;
1710 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1711 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1713 break;
1714 case Intrinsic::loongarch_lsx_bz_b:
1715 case Intrinsic::loongarch_lsx_bz_h:
1716 case Intrinsic::loongarch_lsx_bz_w:
1717 case Intrinsic::loongarch_lsx_bz_d:
1718 case Intrinsic::loongarch_lasx_xbz_b:
1719 case Intrinsic::loongarch_lasx_xbz_h:
1720 case Intrinsic::loongarch_lasx_xbz_w:
1721 case Intrinsic::loongarch_lasx_xbz_d:
1722 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1724 break;
1725 case Intrinsic::loongarch_lsx_bz_v:
1726 case Intrinsic::loongarch_lasx_xbz_v:
1727 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1729 break;
1730 case Intrinsic::loongarch_lsx_bnz_b:
1731 case Intrinsic::loongarch_lsx_bnz_h:
1732 case Intrinsic::loongarch_lsx_bnz_w:
1733 case Intrinsic::loongarch_lsx_bnz_d:
1734 case Intrinsic::loongarch_lasx_xbnz_b:
1735 case Intrinsic::loongarch_lasx_xbnz_h:
1736 case Intrinsic::loongarch_lasx_xbnz_w:
1737 case Intrinsic::loongarch_lasx_xbnz_d:
1738 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1740 break;
1741 case Intrinsic::loongarch_lsx_bnz_v:
1742 case Intrinsic::loongarch_lasx_xbnz_v:
1743 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1745 break;
1746 }
1747}
1748
1751 SDLoc DL(N);
1752 EVT VT = N->getValueType(0);
1753 switch (N->getOpcode()) {
1754 default:
1755 llvm_unreachable("Don't know how to legalize this operation");
1756 case ISD::SHL:
1757 case ISD::SRA:
1758 case ISD::SRL:
1759 case ISD::ROTR:
1760 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1761 "Unexpected custom legalisation");
1762 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1763 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1764 break;
1765 }
1766 break;
1767 case ISD::ROTL:
1768 ConstantSDNode *CN;
1769 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1770 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1771 break;
1772 }
1773 break;
1774 case ISD::FP_TO_SINT: {
1775 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1776 "Unexpected custom legalisation");
1777 SDValue Src = N->getOperand(0);
1778 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1779 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1781 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1782 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1783 return;
1784 }
1785 // If the FP type needs to be softened, emit a library call using the 'si'
1786 // version. If we left it to default legalization we'd end up with 'di'.
1787 RTLIB::Libcall LC;
1788 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1789 MakeLibCallOptions CallOptions;
1790 EVT OpVT = Src.getValueType();
1791 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1792 SDValue Chain = SDValue();
1793 SDValue Result;
1794 std::tie(Result, Chain) =
1795 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1796 Results.push_back(Result);
1797 break;
1798 }
1799 case ISD::BITCAST: {
1800 SDValue Src = N->getOperand(0);
1801 EVT SrcVT = Src.getValueType();
1802 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1803 Subtarget.hasBasicF()) {
1804 SDValue Dst =
1805 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1806 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1807 }
1808 break;
1809 }
1810 case ISD::FP_TO_UINT: {
1811 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1812 "Unexpected custom legalisation");
1813 auto &TLI = DAG.getTargetLoweringInfo();
1814 SDValue Tmp1, Tmp2;
1815 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1816 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1817 break;
1818 }
1819 case ISD::BSWAP: {
1820 SDValue Src = N->getOperand(0);
1821 assert((VT == MVT::i16 || VT == MVT::i32) &&
1822 "Unexpected custom legalization");
1823 MVT GRLenVT = Subtarget.getGRLenVT();
1824 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1825 SDValue Tmp;
1826 switch (VT.getSizeInBits()) {
1827 default:
1828 llvm_unreachable("Unexpected operand width");
1829 case 16:
1830 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1831 break;
1832 case 32:
1833 // Only LA64 will get to here due to the size mismatch between VT and
1834 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1835 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1836 break;
1837 }
1838 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1839 break;
1840 }
1841 case ISD::BITREVERSE: {
1842 SDValue Src = N->getOperand(0);
1843 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1844 "Unexpected custom legalization");
1845 MVT GRLenVT = Subtarget.getGRLenVT();
1846 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1847 SDValue Tmp;
1848 switch (VT.getSizeInBits()) {
1849 default:
1850 llvm_unreachable("Unexpected operand width");
1851 case 8:
1852 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1853 break;
1854 case 32:
1855 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1856 break;
1857 }
1858 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1859 break;
1860 }
1861 case ISD::CTLZ:
1862 case ISD::CTTZ: {
1863 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1864 "Unexpected custom legalisation");
1865 Results.push_back(customLegalizeToWOp(N, DAG, 1));
1866 break;
1867 }
1869 SDValue Chain = N->getOperand(0);
1870 SDValue Op2 = N->getOperand(2);
1871 MVT GRLenVT = Subtarget.getGRLenVT();
1872 const StringRef ErrorMsgOOR = "argument out of range";
1873 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1874 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1875
1876 switch (N->getConstantOperandVal(1)) {
1877 default:
1878 llvm_unreachable("Unexpected Intrinsic.");
1879 case Intrinsic::loongarch_movfcsr2gr: {
1880 if (!Subtarget.hasBasicF()) {
1881 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1882 return;
1883 }
1884 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1885 if (!isUInt<2>(Imm)) {
1886 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1887 return;
1888 }
1889 SDValue MOVFCSR2GRResults = DAG.getNode(
1890 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1891 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1892 Results.push_back(
1893 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1894 Results.push_back(MOVFCSR2GRResults.getValue(1));
1895 break;
1896 }
1897#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1898 case Intrinsic::loongarch_##NAME: { \
1899 SDValue NODE = DAG.getNode( \
1900 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1901 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1902 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1903 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1904 Results.push_back(NODE.getValue(1)); \
1905 break; \
1906 }
1907 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1908 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1909 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1910 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1911 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1912 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1913#undef CRC_CASE_EXT_BINARYOP
1914
1915#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
1916 case Intrinsic::loongarch_##NAME: { \
1917 SDValue NODE = DAG.getNode( \
1918 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1919 {Chain, Op2, \
1920 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1921 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1922 Results.push_back(NODE.getValue(1)); \
1923 break; \
1924 }
1925 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1926 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1927#undef CRC_CASE_EXT_UNARYOP
1928#define CSR_CASE(ID) \
1929 case Intrinsic::loongarch_##ID: { \
1930 if (!Subtarget.is64Bit()) \
1931 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
1932 break; \
1933 }
1934 CSR_CASE(csrrd_d);
1935 CSR_CASE(csrwr_d);
1936 CSR_CASE(csrxchg_d);
1937 CSR_CASE(iocsrrd_d);
1938#undef CSR_CASE
1939 case Intrinsic::loongarch_csrrd_w: {
1940 unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
1941 if (!isUInt<14>(Imm)) {
1942 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1943 return;
1944 }
1945 SDValue CSRRDResults =
1946 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1947 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1948 Results.push_back(
1949 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
1950 Results.push_back(CSRRDResults.getValue(1));
1951 break;
1952 }
1953 case Intrinsic::loongarch_csrwr_w: {
1954 unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
1955 if (!isUInt<14>(Imm)) {
1956 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1957 return;
1958 }
1959 SDValue CSRWRResults =
1960 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1961 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1962 DAG.getConstant(Imm, DL, GRLenVT)});
1963 Results.push_back(
1964 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
1965 Results.push_back(CSRWRResults.getValue(1));
1966 break;
1967 }
1968 case Intrinsic::loongarch_csrxchg_w: {
1969 unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
1970 if (!isUInt<14>(Imm)) {
1971 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1972 return;
1973 }
1974 SDValue CSRXCHGResults = DAG.getNode(
1975 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1976 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
1977 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
1978 DAG.getConstant(Imm, DL, GRLenVT)});
1979 Results.push_back(
1980 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
1981 Results.push_back(CSRXCHGResults.getValue(1));
1982 break;
1983 }
1984#define IOCSRRD_CASE(NAME, NODE) \
1985 case Intrinsic::loongarch_##NAME: { \
1986 SDValue IOCSRRDResults = \
1987 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1988 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
1989 Results.push_back( \
1990 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
1991 Results.push_back(IOCSRRDResults.getValue(1)); \
1992 break; \
1993 }
1994 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1995 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1996 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1997#undef IOCSRRD_CASE
1998 case Intrinsic::loongarch_cpucfg: {
1999 SDValue CPUCFGResults =
2000 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2001 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2002 Results.push_back(
2003 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2004 Results.push_back(CPUCFGResults.getValue(1));
2005 break;
2006 }
2007 case Intrinsic::loongarch_lddir_d: {
2008 if (!Subtarget.is64Bit()) {
2009 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2010 return;
2011 }
2012 break;
2013 }
2014 }
2015 break;
2016 }
2017 case ISD::READ_REGISTER: {
2018 if (Subtarget.is64Bit())
2019 DAG.getContext()->emitError(
2020 "On LA64, only 64-bit registers can be read.");
2021 else
2022 DAG.getContext()->emitError(
2023 "On LA32, only 32-bit registers can be read.");
2024 Results.push_back(DAG.getUNDEF(VT));
2025 Results.push_back(N->getOperand(0));
2026 break;
2027 }
2029 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2030 break;
2031 }
2032 }
2033}
2034
2037 const LoongArchSubtarget &Subtarget) {
2038 if (DCI.isBeforeLegalizeOps())
2039 return SDValue();
2040
2041 SDValue FirstOperand = N->getOperand(0);
2042 SDValue SecondOperand = N->getOperand(1);
2043 unsigned FirstOperandOpc = FirstOperand.getOpcode();
2044 EVT ValTy = N->getValueType(0);
2045 SDLoc DL(N);
2046 uint64_t lsb, msb;
2047 unsigned SMIdx, SMLen;
2048 ConstantSDNode *CN;
2049 SDValue NewOperand;
2050 MVT GRLenVT = Subtarget.getGRLenVT();
2051
2052 // Op's second operand must be a shifted mask.
2053 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2054 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2055 return SDValue();
2056
2057 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2058 // Pattern match BSTRPICK.
2059 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2060 // => BSTRPICK $dst, $src, msb, lsb
2061 // where msb = lsb + len - 1
2062
2063 // The second operand of the shift must be an immediate.
2064 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2065 return SDValue();
2066
2067 lsb = CN->getZExtValue();
2068
2069 // Return if the shifted mask does not start at bit 0 or the sum of its
2070 // length and lsb exceeds the word's size.
2071 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2072 return SDValue();
2073
2074 NewOperand = FirstOperand.getOperand(0);
2075 } else {
2076 // Pattern match BSTRPICK.
2077 // $dst = and $src, (2**len- 1) , if len > 12
2078 // => BSTRPICK $dst, $src, msb, lsb
2079 // where lsb = 0 and msb = len - 1
2080
2081 // If the mask is <= 0xfff, andi can be used instead.
2082 if (CN->getZExtValue() <= 0xfff)
2083 return SDValue();
2084
2085 // Return if the MSB exceeds.
2086 if (SMIdx + SMLen > ValTy.getSizeInBits())
2087 return SDValue();
2088
2089 if (SMIdx > 0) {
2090 // Omit if the constant has more than 2 uses. This a conservative
2091 // decision. Whether it is a win depends on the HW microarchitecture.
2092 // However it should always be better for 1 and 2 uses.
2093 if (CN->use_size() > 2)
2094 return SDValue();
2095 // Return if the constant can be composed by a single LU12I.W.
2096 if ((CN->getZExtValue() & 0xfff) == 0)
2097 return SDValue();
2098 // Return if the constand can be composed by a single ADDI with
2099 // the zero register.
2100 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2101 return SDValue();
2102 }
2103
2104 lsb = SMIdx;
2105 NewOperand = FirstOperand;
2106 }
2107
2108 msb = lsb + SMLen - 1;
2109 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2110 DAG.getConstant(msb, DL, GRLenVT),
2111 DAG.getConstant(lsb, DL, GRLenVT));
2112 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2113 return NR0;
2114 // Try to optimize to
2115 // bstrpick $Rd, $Rs, msb, lsb
2116 // slli $Rd, $Rd, lsb
2117 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2118 DAG.getConstant(lsb, DL, GRLenVT));
2119}
2120
2123 const LoongArchSubtarget &Subtarget) {
2124 if (DCI.isBeforeLegalizeOps())
2125 return SDValue();
2126
2127 // $dst = srl (and $src, Mask), Shamt
2128 // =>
2129 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2130 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2131 //
2132
2133 SDValue FirstOperand = N->getOperand(0);
2134 ConstantSDNode *CN;
2135 EVT ValTy = N->getValueType(0);
2136 SDLoc DL(N);
2137 MVT GRLenVT = Subtarget.getGRLenVT();
2138 unsigned MaskIdx, MaskLen;
2139 uint64_t Shamt;
2140
2141 // The first operand must be an AND and the second operand of the AND must be
2142 // a shifted mask.
2143 if (FirstOperand.getOpcode() != ISD::AND ||
2144 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2145 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2146 return SDValue();
2147
2148 // The second operand (shift amount) must be an immediate.
2149 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2150 return SDValue();
2151
2152 Shamt = CN->getZExtValue();
2153 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2154 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2155 FirstOperand->getOperand(0),
2156 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2157 DAG.getConstant(Shamt, DL, GRLenVT));
2158
2159 return SDValue();
2160}
2161
2164 const LoongArchSubtarget &Subtarget) {
2165 MVT GRLenVT = Subtarget.getGRLenVT();
2166 EVT ValTy = N->getValueType(0);
2167 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2168 ConstantSDNode *CN0, *CN1;
2169 SDLoc DL(N);
2170 unsigned ValBits = ValTy.getSizeInBits();
2171 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2172 unsigned Shamt;
2173 bool SwapAndRetried = false;
2174
2175 if (DCI.isBeforeLegalizeOps())
2176 return SDValue();
2177
2178 if (ValBits != 32 && ValBits != 64)
2179 return SDValue();
2180
2181Retry:
2182 // 1st pattern to match BSTRINS:
2183 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
2184 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2185 // =>
2186 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2187 if (N0.getOpcode() == ISD::AND &&
2188 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2189 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2190 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2191 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2192 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2193 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2194 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2195 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2196 (MaskIdx0 + MaskLen0 <= ValBits)) {
2197 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2198 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2199 N1.getOperand(0).getOperand(0),
2200 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2201 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2202 }
2203
2204 // 2nd pattern to match BSTRINS:
2205 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2206 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2207 // =>
2208 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2209 if (N0.getOpcode() == ISD::AND &&
2210 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2211 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2212 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2213 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2214 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2215 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2216 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2217 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2218 (MaskIdx0 + MaskLen0 <= ValBits)) {
2219 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2220 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2221 N1.getOperand(0).getOperand(0),
2222 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2223 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2224 }
2225
2226 // 3rd pattern to match BSTRINS:
2227 // R = or (and X, mask0), (and Y, mask1)
2228 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2229 // =>
2230 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2231 // where msb = lsb + size - 1
2232 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2233 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2234 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2235 (MaskIdx0 + MaskLen0 <= 64) &&
2236 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2237 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2238 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2239 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2240 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2241 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2242 DAG.getConstant(ValBits == 32
2243 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2244 : (MaskIdx0 + MaskLen0 - 1),
2245 DL, GRLenVT),
2246 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2247 }
2248
2249 // 4th pattern to match BSTRINS:
2250 // R = or (and X, mask), (shl Y, shamt)
2251 // where mask = (2**shamt - 1)
2252 // =>
2253 // R = BSTRINS X, Y, ValBits - 1, shamt
2254 // where ValBits = 32 or 64
2255 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2256 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2257 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2258 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2259 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2260 (MaskIdx0 + MaskLen0 <= ValBits)) {
2261 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2262 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2263 N1.getOperand(0),
2264 DAG.getConstant((ValBits - 1), DL, GRLenVT),
2265 DAG.getConstant(Shamt, DL, GRLenVT));
2266 }
2267
2268 // 5th pattern to match BSTRINS:
2269 // R = or (and X, mask), const
2270 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2271 // =>
2272 // R = BSTRINS X, (const >> lsb), msb, lsb
2273 // where msb = lsb + size - 1
2274 if (N0.getOpcode() == ISD::AND &&
2275 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2276 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2277 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2278 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2279 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2280 return DAG.getNode(
2281 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2282 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2283 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2284 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2285 }
2286
2287 // 6th pattern.
2288 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2289 // by the incoming bits are known to be zero.
2290 // =>
2291 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2292 //
2293 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2294 // pattern is more common than the 1st. So we put the 1st before the 6th in
2295 // order to match as many nodes as possible.
2296 ConstantSDNode *CNMask, *CNShamt;
2297 unsigned MaskIdx, MaskLen;
2298 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2299 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2300 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2301 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2302 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2303 Shamt = CNShamt->getZExtValue();
2304 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2305 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2306 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2307 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2308 N1.getOperand(0).getOperand(0),
2309 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2310 DAG.getConstant(Shamt, DL, GRLenVT));
2311 }
2312 }
2313
2314 // 7th pattern.
2315 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2316 // overwritten by the incoming bits are known to be zero.
2317 // =>
2318 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2319 //
2320 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2321 // before the 7th in order to match as many nodes as possible.
2322 if (N1.getOpcode() == ISD::AND &&
2323 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2324 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2325 N1.getOperand(0).getOpcode() == ISD::SHL &&
2326 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2327 CNShamt->getZExtValue() == MaskIdx) {
2328 APInt ShMask(ValBits, CNMask->getZExtValue());
2329 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2330 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2331 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2332 N1.getOperand(0).getOperand(0),
2333 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2334 DAG.getConstant(MaskIdx, DL, GRLenVT));
2335 }
2336 }
2337
2338 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2339 if (!SwapAndRetried) {
2340 std::swap(N0, N1);
2341 SwapAndRetried = true;
2342 goto Retry;
2343 }
2344
2345 SwapAndRetried = false;
2346Retry2:
2347 // 8th pattern.
2348 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2349 // the incoming bits are known to be zero.
2350 // =>
2351 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2352 //
2353 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2354 // we put it here in order to match as many nodes as possible or generate less
2355 // instructions.
2356 if (N1.getOpcode() == ISD::AND &&
2357 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2358 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2359 APInt ShMask(ValBits, CNMask->getZExtValue());
2360 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2361 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2362 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2363 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2364 N1->getOperand(0),
2365 DAG.getConstant(MaskIdx, DL, GRLenVT)),
2366 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2367 DAG.getConstant(MaskIdx, DL, GRLenVT));
2368 }
2369 }
2370 // Swap N0/N1 and retry.
2371 if (!SwapAndRetried) {
2372 std::swap(N0, N1);
2373 SwapAndRetried = true;
2374 goto Retry2;
2375 }
2376
2377 return SDValue();
2378}
2379
2380// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2383 const LoongArchSubtarget &Subtarget) {
2384 if (DCI.isBeforeLegalizeOps())
2385 return SDValue();
2386
2387 SDValue Src = N->getOperand(0);
2388 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2389 return SDValue();
2390
2391 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2392 Src.getOperand(0));
2393}
2394
2395template <unsigned N>
2397 SelectionDAG &DAG,
2398 const LoongArchSubtarget &Subtarget,
2399 bool IsSigned = false) {
2400 SDLoc DL(Node);
2401 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2402 // Check the ImmArg.
2403 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2404 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2405 DAG.getContext()->emitError(Node->getOperationName(0) +
2406 ": argument out of range.");
2407 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2408 }
2409 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2410}
2411
2412template <unsigned N>
2413static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2414 SelectionDAG &DAG, bool IsSigned = false) {
2415 SDLoc DL(Node);
2416 EVT ResTy = Node->getValueType(0);
2417 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2418
2419 // Check the ImmArg.
2420 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2421 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2422 DAG.getContext()->emitError(Node->getOperationName(0) +
2423 ": argument out of range.");
2424 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2425 }
2426 return DAG.getConstant(
2428 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2429 DL, ResTy);
2430}
2431
2433 SDLoc DL(Node);
2434 EVT ResTy = Node->getValueType(0);
2435 SDValue Vec = Node->getOperand(2);
2436 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2437 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2438}
2439
2441 SDLoc DL(Node);
2442 EVT ResTy = Node->getValueType(0);
2443 SDValue One = DAG.getConstant(1, DL, ResTy);
2444 SDValue Bit =
2445 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2446
2447 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2448 DAG.getNOT(DL, Bit, ResTy));
2449}
2450
2451template <unsigned N>
2453 SDLoc DL(Node);
2454 EVT ResTy = Node->getValueType(0);
2455 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2456 // Check the unsigned ImmArg.
2457 if (!isUInt<N>(CImm->getZExtValue())) {
2458 DAG.getContext()->emitError(Node->getOperationName(0) +
2459 ": argument out of range.");
2460 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2461 }
2462
2463 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2464 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2465
2466 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2467}
2468
2469template <unsigned N>
2471 SDLoc DL(Node);
2472 EVT ResTy = Node->getValueType(0);
2473 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2474 // Check the unsigned ImmArg.
2475 if (!isUInt<N>(CImm->getZExtValue())) {
2476 DAG.getContext()->emitError(Node->getOperationName(0) +
2477 ": argument out of range.");
2478 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2479 }
2480
2481 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2482 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2483 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2484}
2485
2486template <unsigned N>
2488 SDLoc DL(Node);
2489 EVT ResTy = Node->getValueType(0);
2490 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2491 // Check the unsigned ImmArg.
2492 if (!isUInt<N>(CImm->getZExtValue())) {
2493 DAG.getContext()->emitError(Node->getOperationName(0) +
2494 ": argument out of range.");
2495 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2496 }
2497
2498 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2499 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2500 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2501}
2502
2503static SDValue
2506 const LoongArchSubtarget &Subtarget) {
2507 SDLoc DL(N);
2508 switch (N->getConstantOperandVal(0)) {
2509 default:
2510 break;
2511 case Intrinsic::loongarch_lsx_vadd_b:
2512 case Intrinsic::loongarch_lsx_vadd_h:
2513 case Intrinsic::loongarch_lsx_vadd_w:
2514 case Intrinsic::loongarch_lsx_vadd_d:
2515 case Intrinsic::loongarch_lasx_xvadd_b:
2516 case Intrinsic::loongarch_lasx_xvadd_h:
2517 case Intrinsic::loongarch_lasx_xvadd_w:
2518 case Intrinsic::loongarch_lasx_xvadd_d:
2519 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2520 N->getOperand(2));
2521 case Intrinsic::loongarch_lsx_vaddi_bu:
2522 case Intrinsic::loongarch_lsx_vaddi_hu:
2523 case Intrinsic::loongarch_lsx_vaddi_wu:
2524 case Intrinsic::loongarch_lsx_vaddi_du:
2525 case Intrinsic::loongarch_lasx_xvaddi_bu:
2526 case Intrinsic::loongarch_lasx_xvaddi_hu:
2527 case Intrinsic::loongarch_lasx_xvaddi_wu:
2528 case Intrinsic::loongarch_lasx_xvaddi_du:
2529 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2530 lowerVectorSplatImm<5>(N, 2, DAG));
2531 case Intrinsic::loongarch_lsx_vsub_b:
2532 case Intrinsic::loongarch_lsx_vsub_h:
2533 case Intrinsic::loongarch_lsx_vsub_w:
2534 case Intrinsic::loongarch_lsx_vsub_d:
2535 case Intrinsic::loongarch_lasx_xvsub_b:
2536 case Intrinsic::loongarch_lasx_xvsub_h:
2537 case Intrinsic::loongarch_lasx_xvsub_w:
2538 case Intrinsic::loongarch_lasx_xvsub_d:
2539 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2540 N->getOperand(2));
2541 case Intrinsic::loongarch_lsx_vsubi_bu:
2542 case Intrinsic::loongarch_lsx_vsubi_hu:
2543 case Intrinsic::loongarch_lsx_vsubi_wu:
2544 case Intrinsic::loongarch_lsx_vsubi_du:
2545 case Intrinsic::loongarch_lasx_xvsubi_bu:
2546 case Intrinsic::loongarch_lasx_xvsubi_hu:
2547 case Intrinsic::loongarch_lasx_xvsubi_wu:
2548 case Intrinsic::loongarch_lasx_xvsubi_du:
2549 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2550 lowerVectorSplatImm<5>(N, 2, DAG));
2551 case Intrinsic::loongarch_lsx_vneg_b:
2552 case Intrinsic::loongarch_lsx_vneg_h:
2553 case Intrinsic::loongarch_lsx_vneg_w:
2554 case Intrinsic::loongarch_lsx_vneg_d:
2555 case Intrinsic::loongarch_lasx_xvneg_b:
2556 case Intrinsic::loongarch_lasx_xvneg_h:
2557 case Intrinsic::loongarch_lasx_xvneg_w:
2558 case Intrinsic::loongarch_lasx_xvneg_d:
2559 return DAG.getNode(
2560 ISD::SUB, DL, N->getValueType(0),
2561 DAG.getConstant(
2562 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2563 /*isSigned=*/true),
2564 SDLoc(N), N->getValueType(0)),
2565 N->getOperand(1));
2566 case Intrinsic::loongarch_lsx_vmax_b:
2567 case Intrinsic::loongarch_lsx_vmax_h:
2568 case Intrinsic::loongarch_lsx_vmax_w:
2569 case Intrinsic::loongarch_lsx_vmax_d:
2570 case Intrinsic::loongarch_lasx_xvmax_b:
2571 case Intrinsic::loongarch_lasx_xvmax_h:
2572 case Intrinsic::loongarch_lasx_xvmax_w:
2573 case Intrinsic::loongarch_lasx_xvmax_d:
2574 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2575 N->getOperand(2));
2576 case Intrinsic::loongarch_lsx_vmax_bu:
2577 case Intrinsic::loongarch_lsx_vmax_hu:
2578 case Intrinsic::loongarch_lsx_vmax_wu:
2579 case Intrinsic::loongarch_lsx_vmax_du:
2580 case Intrinsic::loongarch_lasx_xvmax_bu:
2581 case Intrinsic::loongarch_lasx_xvmax_hu:
2582 case Intrinsic::loongarch_lasx_xvmax_wu:
2583 case Intrinsic::loongarch_lasx_xvmax_du:
2584 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2585 N->getOperand(2));
2586 case Intrinsic::loongarch_lsx_vmaxi_b:
2587 case Intrinsic::loongarch_lsx_vmaxi_h:
2588 case Intrinsic::loongarch_lsx_vmaxi_w:
2589 case Intrinsic::loongarch_lsx_vmaxi_d:
2590 case Intrinsic::loongarch_lasx_xvmaxi_b:
2591 case Intrinsic::loongarch_lasx_xvmaxi_h:
2592 case Intrinsic::loongarch_lasx_xvmaxi_w:
2593 case Intrinsic::loongarch_lasx_xvmaxi_d:
2594 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2595 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2596 case Intrinsic::loongarch_lsx_vmaxi_bu:
2597 case Intrinsic::loongarch_lsx_vmaxi_hu:
2598 case Intrinsic::loongarch_lsx_vmaxi_wu:
2599 case Intrinsic::loongarch_lsx_vmaxi_du:
2600 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2601 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2602 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2603 case Intrinsic::loongarch_lasx_xvmaxi_du:
2604 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2605 lowerVectorSplatImm<5>(N, 2, DAG));
2606 case Intrinsic::loongarch_lsx_vmin_b:
2607 case Intrinsic::loongarch_lsx_vmin_h:
2608 case Intrinsic::loongarch_lsx_vmin_w:
2609 case Intrinsic::loongarch_lsx_vmin_d:
2610 case Intrinsic::loongarch_lasx_xvmin_b:
2611 case Intrinsic::loongarch_lasx_xvmin_h:
2612 case Intrinsic::loongarch_lasx_xvmin_w:
2613 case Intrinsic::loongarch_lasx_xvmin_d:
2614 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2615 N->getOperand(2));
2616 case Intrinsic::loongarch_lsx_vmin_bu:
2617 case Intrinsic::loongarch_lsx_vmin_hu:
2618 case Intrinsic::loongarch_lsx_vmin_wu:
2619 case Intrinsic::loongarch_lsx_vmin_du:
2620 case Intrinsic::loongarch_lasx_xvmin_bu:
2621 case Intrinsic::loongarch_lasx_xvmin_hu:
2622 case Intrinsic::loongarch_lasx_xvmin_wu:
2623 case Intrinsic::loongarch_lasx_xvmin_du:
2624 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2625 N->getOperand(2));
2626 case Intrinsic::loongarch_lsx_vmini_b:
2627 case Intrinsic::loongarch_lsx_vmini_h:
2628 case Intrinsic::loongarch_lsx_vmini_w:
2629 case Intrinsic::loongarch_lsx_vmini_d:
2630 case Intrinsic::loongarch_lasx_xvmini_b:
2631 case Intrinsic::loongarch_lasx_xvmini_h:
2632 case Intrinsic::loongarch_lasx_xvmini_w:
2633 case Intrinsic::loongarch_lasx_xvmini_d:
2634 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2635 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2636 case Intrinsic::loongarch_lsx_vmini_bu:
2637 case Intrinsic::loongarch_lsx_vmini_hu:
2638 case Intrinsic::loongarch_lsx_vmini_wu:
2639 case Intrinsic::loongarch_lsx_vmini_du:
2640 case Intrinsic::loongarch_lasx_xvmini_bu:
2641 case Intrinsic::loongarch_lasx_xvmini_hu:
2642 case Intrinsic::loongarch_lasx_xvmini_wu:
2643 case Intrinsic::loongarch_lasx_xvmini_du:
2644 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2645 lowerVectorSplatImm<5>(N, 2, DAG));
2646 case Intrinsic::loongarch_lsx_vmul_b:
2647 case Intrinsic::loongarch_lsx_vmul_h:
2648 case Intrinsic::loongarch_lsx_vmul_w:
2649 case Intrinsic::loongarch_lsx_vmul_d:
2650 case Intrinsic::loongarch_lasx_xvmul_b:
2651 case Intrinsic::loongarch_lasx_xvmul_h:
2652 case Intrinsic::loongarch_lasx_xvmul_w:
2653 case Intrinsic::loongarch_lasx_xvmul_d:
2654 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2655 N->getOperand(2));
2656 case Intrinsic::loongarch_lsx_vmadd_b:
2657 case Intrinsic::loongarch_lsx_vmadd_h:
2658 case Intrinsic::loongarch_lsx_vmadd_w:
2659 case Intrinsic::loongarch_lsx_vmadd_d:
2660 case Intrinsic::loongarch_lasx_xvmadd_b:
2661 case Intrinsic::loongarch_lasx_xvmadd_h:
2662 case Intrinsic::loongarch_lasx_xvmadd_w:
2663 case Intrinsic::loongarch_lasx_xvmadd_d: {
2664 EVT ResTy = N->getValueType(0);
2665 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2666 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2667 N->getOperand(3)));
2668 }
2669 case Intrinsic::loongarch_lsx_vmsub_b:
2670 case Intrinsic::loongarch_lsx_vmsub_h:
2671 case Intrinsic::loongarch_lsx_vmsub_w:
2672 case Intrinsic::loongarch_lsx_vmsub_d:
2673 case Intrinsic::loongarch_lasx_xvmsub_b:
2674 case Intrinsic::loongarch_lasx_xvmsub_h:
2675 case Intrinsic::loongarch_lasx_xvmsub_w:
2676 case Intrinsic::loongarch_lasx_xvmsub_d: {
2677 EVT ResTy = N->getValueType(0);
2678 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2679 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2680 N->getOperand(3)));
2681 }
2682 case Intrinsic::loongarch_lsx_vdiv_b:
2683 case Intrinsic::loongarch_lsx_vdiv_h:
2684 case Intrinsic::loongarch_lsx_vdiv_w:
2685 case Intrinsic::loongarch_lsx_vdiv_d:
2686 case Intrinsic::loongarch_lasx_xvdiv_b:
2687 case Intrinsic::loongarch_lasx_xvdiv_h:
2688 case Intrinsic::loongarch_lasx_xvdiv_w:
2689 case Intrinsic::loongarch_lasx_xvdiv_d:
2690 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2691 N->getOperand(2));
2692 case Intrinsic::loongarch_lsx_vdiv_bu:
2693 case Intrinsic::loongarch_lsx_vdiv_hu:
2694 case Intrinsic::loongarch_lsx_vdiv_wu:
2695 case Intrinsic::loongarch_lsx_vdiv_du:
2696 case Intrinsic::loongarch_lasx_xvdiv_bu:
2697 case Intrinsic::loongarch_lasx_xvdiv_hu:
2698 case Intrinsic::loongarch_lasx_xvdiv_wu:
2699 case Intrinsic::loongarch_lasx_xvdiv_du:
2700 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2701 N->getOperand(2));
2702 case Intrinsic::loongarch_lsx_vmod_b:
2703 case Intrinsic::loongarch_lsx_vmod_h:
2704 case Intrinsic::loongarch_lsx_vmod_w:
2705 case Intrinsic::loongarch_lsx_vmod_d:
2706 case Intrinsic::loongarch_lasx_xvmod_b:
2707 case Intrinsic::loongarch_lasx_xvmod_h:
2708 case Intrinsic::loongarch_lasx_xvmod_w:
2709 case Intrinsic::loongarch_lasx_xvmod_d:
2710 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2711 N->getOperand(2));
2712 case Intrinsic::loongarch_lsx_vmod_bu:
2713 case Intrinsic::loongarch_lsx_vmod_hu:
2714 case Intrinsic::loongarch_lsx_vmod_wu:
2715 case Intrinsic::loongarch_lsx_vmod_du:
2716 case Intrinsic::loongarch_lasx_xvmod_bu:
2717 case Intrinsic::loongarch_lasx_xvmod_hu:
2718 case Intrinsic::loongarch_lasx_xvmod_wu:
2719 case Intrinsic::loongarch_lasx_xvmod_du:
2720 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2721 N->getOperand(2));
2722 case Intrinsic::loongarch_lsx_vand_v:
2723 case Intrinsic::loongarch_lasx_xvand_v:
2724 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2725 N->getOperand(2));
2726 case Intrinsic::loongarch_lsx_vor_v:
2727 case Intrinsic::loongarch_lasx_xvor_v:
2728 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2729 N->getOperand(2));
2730 case Intrinsic::loongarch_lsx_vxor_v:
2731 case Intrinsic::loongarch_lasx_xvxor_v:
2732 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2733 N->getOperand(2));
2734 case Intrinsic::loongarch_lsx_vnor_v:
2735 case Intrinsic::loongarch_lasx_xvnor_v: {
2736 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2737 N->getOperand(2));
2738 return DAG.getNOT(DL, Res, Res->getValueType(0));
2739 }
2740 case Intrinsic::loongarch_lsx_vandi_b:
2741 case Intrinsic::loongarch_lasx_xvandi_b:
2742 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2743 lowerVectorSplatImm<8>(N, 2, DAG));
2744 case Intrinsic::loongarch_lsx_vori_b:
2745 case Intrinsic::loongarch_lasx_xvori_b:
2746 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2747 lowerVectorSplatImm<8>(N, 2, DAG));
2748 case Intrinsic::loongarch_lsx_vxori_b:
2749 case Intrinsic::loongarch_lasx_xvxori_b:
2750 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2751 lowerVectorSplatImm<8>(N, 2, DAG));
2752 case Intrinsic::loongarch_lsx_vsll_b:
2753 case Intrinsic::loongarch_lsx_vsll_h:
2754 case Intrinsic::loongarch_lsx_vsll_w:
2755 case Intrinsic::loongarch_lsx_vsll_d:
2756 case Intrinsic::loongarch_lasx_xvsll_b:
2757 case Intrinsic::loongarch_lasx_xvsll_h:
2758 case Intrinsic::loongarch_lasx_xvsll_w:
2759 case Intrinsic::loongarch_lasx_xvsll_d:
2760 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2761 truncateVecElts(N, DAG));
2762 case Intrinsic::loongarch_lsx_vslli_b:
2763 case Intrinsic::loongarch_lasx_xvslli_b:
2764 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2765 lowerVectorSplatImm<3>(N, 2, DAG));
2766 case Intrinsic::loongarch_lsx_vslli_h:
2767 case Intrinsic::loongarch_lasx_xvslli_h:
2768 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2769 lowerVectorSplatImm<4>(N, 2, DAG));
2770 case Intrinsic::loongarch_lsx_vslli_w:
2771 case Intrinsic::loongarch_lasx_xvslli_w:
2772 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2773 lowerVectorSplatImm<5>(N, 2, DAG));
2774 case Intrinsic::loongarch_lsx_vslli_d:
2775 case Intrinsic::loongarch_lasx_xvslli_d:
2776 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2777 lowerVectorSplatImm<6>(N, 2, DAG));
2778 case Intrinsic::loongarch_lsx_vsrl_b:
2779 case Intrinsic::loongarch_lsx_vsrl_h:
2780 case Intrinsic::loongarch_lsx_vsrl_w:
2781 case Intrinsic::loongarch_lsx_vsrl_d:
2782 case Intrinsic::loongarch_lasx_xvsrl_b:
2783 case Intrinsic::loongarch_lasx_xvsrl_h:
2784 case Intrinsic::loongarch_lasx_xvsrl_w:
2785 case Intrinsic::loongarch_lasx_xvsrl_d:
2786 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2787 truncateVecElts(N, DAG));
2788 case Intrinsic::loongarch_lsx_vsrli_b:
2789 case Intrinsic::loongarch_lasx_xvsrli_b:
2790 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2791 lowerVectorSplatImm<3>(N, 2, DAG));
2792 case Intrinsic::loongarch_lsx_vsrli_h:
2793 case Intrinsic::loongarch_lasx_xvsrli_h:
2794 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2795 lowerVectorSplatImm<4>(N, 2, DAG));
2796 case Intrinsic::loongarch_lsx_vsrli_w:
2797 case Intrinsic::loongarch_lasx_xvsrli_w:
2798 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2799 lowerVectorSplatImm<5>(N, 2, DAG));
2800 case Intrinsic::loongarch_lsx_vsrli_d:
2801 case Intrinsic::loongarch_lasx_xvsrli_d:
2802 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2803 lowerVectorSplatImm<6>(N, 2, DAG));
2804 case Intrinsic::loongarch_lsx_vsra_b:
2805 case Intrinsic::loongarch_lsx_vsra_h:
2806 case Intrinsic::loongarch_lsx_vsra_w:
2807 case Intrinsic::loongarch_lsx_vsra_d:
2808 case Intrinsic::loongarch_lasx_xvsra_b:
2809 case Intrinsic::loongarch_lasx_xvsra_h:
2810 case Intrinsic::loongarch_lasx_xvsra_w:
2811 case Intrinsic::loongarch_lasx_xvsra_d:
2812 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2813 truncateVecElts(N, DAG));
2814 case Intrinsic::loongarch_lsx_vsrai_b:
2815 case Intrinsic::loongarch_lasx_xvsrai_b:
2816 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2817 lowerVectorSplatImm<3>(N, 2, DAG));
2818 case Intrinsic::loongarch_lsx_vsrai_h:
2819 case Intrinsic::loongarch_lasx_xvsrai_h:
2820 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2821 lowerVectorSplatImm<4>(N, 2, DAG));
2822 case Intrinsic::loongarch_lsx_vsrai_w:
2823 case Intrinsic::loongarch_lasx_xvsrai_w:
2824 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2825 lowerVectorSplatImm<5>(N, 2, DAG));
2826 case Intrinsic::loongarch_lsx_vsrai_d:
2827 case Intrinsic::loongarch_lasx_xvsrai_d:
2828 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2829 lowerVectorSplatImm<6>(N, 2, DAG));
2830 case Intrinsic::loongarch_lsx_vclz_b:
2831 case Intrinsic::loongarch_lsx_vclz_h:
2832 case Intrinsic::loongarch_lsx_vclz_w:
2833 case Intrinsic::loongarch_lsx_vclz_d:
2834 case Intrinsic::loongarch_lasx_xvclz_b:
2835 case Intrinsic::loongarch_lasx_xvclz_h:
2836 case Intrinsic::loongarch_lasx_xvclz_w:
2837 case Intrinsic::loongarch_lasx_xvclz_d:
2838 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2839 case Intrinsic::loongarch_lsx_vpcnt_b:
2840 case Intrinsic::loongarch_lsx_vpcnt_h:
2841 case Intrinsic::loongarch_lsx_vpcnt_w:
2842 case Intrinsic::loongarch_lsx_vpcnt_d:
2843 case Intrinsic::loongarch_lasx_xvpcnt_b:
2844 case Intrinsic::loongarch_lasx_xvpcnt_h:
2845 case Intrinsic::loongarch_lasx_xvpcnt_w:
2846 case Intrinsic::loongarch_lasx_xvpcnt_d:
2847 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2848 case Intrinsic::loongarch_lsx_vbitclr_b:
2849 case Intrinsic::loongarch_lsx_vbitclr_h:
2850 case Intrinsic::loongarch_lsx_vbitclr_w:
2851 case Intrinsic::loongarch_lsx_vbitclr_d:
2852 case Intrinsic::loongarch_lasx_xvbitclr_b:
2853 case Intrinsic::loongarch_lasx_xvbitclr_h:
2854 case Intrinsic::loongarch_lasx_xvbitclr_w:
2855 case Intrinsic::loongarch_lasx_xvbitclr_d:
2856 return lowerVectorBitClear(N, DAG);
2857 case Intrinsic::loongarch_lsx_vbitclri_b:
2858 case Intrinsic::loongarch_lasx_xvbitclri_b:
2859 return lowerVectorBitClearImm<3>(N, DAG);
2860 case Intrinsic::loongarch_lsx_vbitclri_h:
2861 case Intrinsic::loongarch_lasx_xvbitclri_h:
2862 return lowerVectorBitClearImm<4>(N, DAG);
2863 case Intrinsic::loongarch_lsx_vbitclri_w:
2864 case Intrinsic::loongarch_lasx_xvbitclri_w:
2865 return lowerVectorBitClearImm<5>(N, DAG);
2866 case Intrinsic::loongarch_lsx_vbitclri_d:
2867 case Intrinsic::loongarch_lasx_xvbitclri_d:
2868 return lowerVectorBitClearImm<6>(N, DAG);
2869 case Intrinsic::loongarch_lsx_vbitset_b:
2870 case Intrinsic::loongarch_lsx_vbitset_h:
2871 case Intrinsic::loongarch_lsx_vbitset_w:
2872 case Intrinsic::loongarch_lsx_vbitset_d:
2873 case Intrinsic::loongarch_lasx_xvbitset_b:
2874 case Intrinsic::loongarch_lasx_xvbitset_h:
2875 case Intrinsic::loongarch_lasx_xvbitset_w:
2876 case Intrinsic::loongarch_lasx_xvbitset_d: {
2877 EVT VecTy = N->getValueType(0);
2878 SDValue One = DAG.getConstant(1, DL, VecTy);
2879 return DAG.getNode(
2880 ISD::OR, DL, VecTy, N->getOperand(1),
2881 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2882 }
2883 case Intrinsic::loongarch_lsx_vbitseti_b:
2884 case Intrinsic::loongarch_lasx_xvbitseti_b:
2885 return lowerVectorBitSetImm<3>(N, DAG);
2886 case Intrinsic::loongarch_lsx_vbitseti_h:
2887 case Intrinsic::loongarch_lasx_xvbitseti_h:
2888 return lowerVectorBitSetImm<4>(N, DAG);
2889 case Intrinsic::loongarch_lsx_vbitseti_w:
2890 case Intrinsic::loongarch_lasx_xvbitseti_w:
2891 return lowerVectorBitSetImm<5>(N, DAG);
2892 case Intrinsic::loongarch_lsx_vbitseti_d:
2893 case Intrinsic::loongarch_lasx_xvbitseti_d:
2894 return lowerVectorBitSetImm<6>(N, DAG);
2895 case Intrinsic::loongarch_lsx_vbitrev_b:
2896 case Intrinsic::loongarch_lsx_vbitrev_h:
2897 case Intrinsic::loongarch_lsx_vbitrev_w:
2898 case Intrinsic::loongarch_lsx_vbitrev_d:
2899 case Intrinsic::loongarch_lasx_xvbitrev_b:
2900 case Intrinsic::loongarch_lasx_xvbitrev_h:
2901 case Intrinsic::loongarch_lasx_xvbitrev_w:
2902 case Intrinsic::loongarch_lasx_xvbitrev_d: {
2903 EVT VecTy = N->getValueType(0);
2904 SDValue One = DAG.getConstant(1, DL, VecTy);
2905 return DAG.getNode(
2906 ISD::XOR, DL, VecTy, N->getOperand(1),
2907 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2908 }
2909 case Intrinsic::loongarch_lsx_vbitrevi_b:
2910 case Intrinsic::loongarch_lasx_xvbitrevi_b:
2911 return lowerVectorBitRevImm<3>(N, DAG);
2912 case Intrinsic::loongarch_lsx_vbitrevi_h:
2913 case Intrinsic::loongarch_lasx_xvbitrevi_h:
2914 return lowerVectorBitRevImm<4>(N, DAG);
2915 case Intrinsic::loongarch_lsx_vbitrevi_w:
2916 case Intrinsic::loongarch_lasx_xvbitrevi_w:
2917 return lowerVectorBitRevImm<5>(N, DAG);
2918 case Intrinsic::loongarch_lsx_vbitrevi_d:
2919 case Intrinsic::loongarch_lasx_xvbitrevi_d:
2920 return lowerVectorBitRevImm<6>(N, DAG);
2921 case Intrinsic::loongarch_lsx_vfadd_s:
2922 case Intrinsic::loongarch_lsx_vfadd_d:
2923 case Intrinsic::loongarch_lasx_xvfadd_s:
2924 case Intrinsic::loongarch_lasx_xvfadd_d:
2925 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
2926 N->getOperand(2));
2927 case Intrinsic::loongarch_lsx_vfsub_s:
2928 case Intrinsic::loongarch_lsx_vfsub_d:
2929 case Intrinsic::loongarch_lasx_xvfsub_s:
2930 case Intrinsic::loongarch_lasx_xvfsub_d:
2931 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
2932 N->getOperand(2));
2933 case Intrinsic::loongarch_lsx_vfmul_s:
2934 case Intrinsic::loongarch_lsx_vfmul_d:
2935 case Intrinsic::loongarch_lasx_xvfmul_s:
2936 case Intrinsic::loongarch_lasx_xvfmul_d:
2937 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
2938 N->getOperand(2));
2939 case Intrinsic::loongarch_lsx_vfdiv_s:
2940 case Intrinsic::loongarch_lsx_vfdiv_d:
2941 case Intrinsic::loongarch_lasx_xvfdiv_s:
2942 case Intrinsic::loongarch_lasx_xvfdiv_d:
2943 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
2944 N->getOperand(2));
2945 case Intrinsic::loongarch_lsx_vfmadd_s:
2946 case Intrinsic::loongarch_lsx_vfmadd_d:
2947 case Intrinsic::loongarch_lasx_xvfmadd_s:
2948 case Intrinsic::loongarch_lasx_xvfmadd_d:
2949 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
2950 N->getOperand(2), N->getOperand(3));
2951 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
2952 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2953 N->getOperand(1), N->getOperand(2),
2954 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
2955 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
2956 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
2957 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2958 N->getOperand(1), N->getOperand(2),
2959 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
2960 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
2961 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
2962 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2963 N->getOperand(1), N->getOperand(2),
2964 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
2965 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
2966 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
2967 N->getOperand(1), N->getOperand(2),
2968 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
2969 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
2970 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
2971 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
2972 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
2973 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
2974 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
2975 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
2976 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
2977 EVT ResTy = N->getValueType(0);
2978 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
2979 return DAG.getBuildVector(ResTy, DL, Ops);
2980 }
2981 case Intrinsic::loongarch_lsx_vreplve_b:
2982 case Intrinsic::loongarch_lsx_vreplve_h:
2983 case Intrinsic::loongarch_lsx_vreplve_w:
2984 case Intrinsic::loongarch_lsx_vreplve_d:
2985 case Intrinsic::loongarch_lasx_xvreplve_b:
2986 case Intrinsic::loongarch_lasx_xvreplve_h:
2987 case Intrinsic::loongarch_lasx_xvreplve_w:
2988 case Intrinsic::loongarch_lasx_xvreplve_d:
2989 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
2990 N->getOperand(1),
2991 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
2992 N->getOperand(2)));
2993 }
2994 return SDValue();
2995}
2996
2998 DAGCombinerInfo &DCI) const {
2999 SelectionDAG &DAG = DCI.DAG;
3000 switch (N->getOpcode()) {
3001 default:
3002 break;
3003 case ISD::AND:
3004 return performANDCombine(N, DAG, DCI, Subtarget);
3005 case ISD::OR:
3006 return performORCombine(N, DAG, DCI, Subtarget);
3007 case ISD::SRL:
3008 return performSRLCombine(N, DAG, DCI, Subtarget);
3010 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3012 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3013 }
3014 return SDValue();
3015}
3016
3019 if (!ZeroDivCheck)
3020 return MBB;
3021
3022 // Build instructions:
3023 // MBB:
3024 // div(or mod) $dst, $dividend, $divisor
3025 // bnez $divisor, SinkMBB
3026 // BreakMBB:
3027 // break 7 // BRK_DIVZERO
3028 // SinkMBB:
3029 // fallthrough
3030 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3032 MachineFunction *MF = MBB->getParent();
3033 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3034 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3035 MF->insert(It, BreakMBB);
3036 MF->insert(It, SinkMBB);
3037
3038 // Transfer the remainder of MBB and its successor edges to SinkMBB.
3039 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3040 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3041
3042 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3043 DebugLoc DL = MI.getDebugLoc();
3044 MachineOperand &Divisor = MI.getOperand(2);
3045 Register DivisorReg = Divisor.getReg();
3046
3047 // MBB:
3048 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3049 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3050 .addMBB(SinkMBB);
3051 MBB->addSuccessor(BreakMBB);
3052 MBB->addSuccessor(SinkMBB);
3053
3054 // BreakMBB:
3055 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3056 // definition of BRK_DIVZERO.
3057 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3058 BreakMBB->addSuccessor(SinkMBB);
3059
3060 // Clear Divisor's kill flag.
3061 Divisor.setIsKill(false);
3062
3063 return SinkMBB;
3064}
3065
3066static MachineBasicBlock *
3068 const LoongArchSubtarget &Subtarget) {
3069 unsigned CondOpc;
3070 switch (MI.getOpcode()) {
3071 default:
3072 llvm_unreachable("Unexpected opcode");
3073 case LoongArch::PseudoVBZ:
3074 CondOpc = LoongArch::VSETEQZ_V;
3075 break;
3076 case LoongArch::PseudoVBZ_B:
3077 CondOpc = LoongArch::VSETANYEQZ_B;
3078 break;
3079 case LoongArch::PseudoVBZ_H:
3080 CondOpc = LoongArch::VSETANYEQZ_H;
3081 break;
3082 case LoongArch::PseudoVBZ_W:
3083 CondOpc = LoongArch::VSETANYEQZ_W;
3084 break;
3085 case LoongArch::PseudoVBZ_D:
3086 CondOpc = LoongArch::VSETANYEQZ_D;
3087 break;
3088 case LoongArch::PseudoVBNZ:
3089 CondOpc = LoongArch::VSETNEZ_V;
3090 break;
3091 case LoongArch::PseudoVBNZ_B:
3092 CondOpc = LoongArch::VSETALLNEZ_B;
3093 break;
3094 case LoongArch::PseudoVBNZ_H:
3095 CondOpc = LoongArch::VSETALLNEZ_H;
3096 break;
3097 case LoongArch::PseudoVBNZ_W:
3098 CondOpc = LoongArch::VSETALLNEZ_W;
3099 break;
3100 case LoongArch::PseudoVBNZ_D:
3101 CondOpc = LoongArch::VSETALLNEZ_D;
3102 break;
3103 case LoongArch::PseudoXVBZ:
3104 CondOpc = LoongArch::XVSETEQZ_V;
3105 break;
3106 case LoongArch::PseudoXVBZ_B:
3107 CondOpc = LoongArch::XVSETANYEQZ_B;
3108 break;
3109 case LoongArch::PseudoXVBZ_H:
3110 CondOpc = LoongArch::XVSETANYEQZ_H;
3111 break;
3112 case LoongArch::PseudoXVBZ_W:
3113 CondOpc = LoongArch::XVSETANYEQZ_W;
3114 break;
3115 case LoongArch::PseudoXVBZ_D:
3116 CondOpc = LoongArch::XVSETANYEQZ_D;
3117 break;
3118 case LoongArch::PseudoXVBNZ:
3119 CondOpc = LoongArch::XVSETNEZ_V;
3120 break;
3121 case LoongArch::PseudoXVBNZ_B:
3122 CondOpc = LoongArch::XVSETALLNEZ_B;
3123 break;
3124 case LoongArch::PseudoXVBNZ_H:
3125 CondOpc = LoongArch::XVSETALLNEZ_H;
3126 break;
3127 case LoongArch::PseudoXVBNZ_W:
3128 CondOpc = LoongArch::XVSETALLNEZ_W;
3129 break;
3130 case LoongArch::PseudoXVBNZ_D:
3131 CondOpc = LoongArch::XVSETALLNEZ_D;
3132 break;
3133 }
3134
3135 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3136 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3137 DebugLoc DL = MI.getDebugLoc();
3140
3141 MachineFunction *F = BB->getParent();
3142 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3143 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3144 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3145
3146 F->insert(It, FalseBB);
3147 F->insert(It, TrueBB);
3148 F->insert(It, SinkBB);
3149
3150 // Transfer the remainder of MBB and its successor edges to Sink.
3151 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3153
3154 // Insert the real instruction to BB.
3155 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3156 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3157
3158 // Insert branch.
3159 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3160 BB->addSuccessor(FalseBB);
3161 BB->addSuccessor(TrueBB);
3162
3163 // FalseBB.
3164 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3165 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3166 .addReg(LoongArch::R0)
3167 .addImm(0);
3168 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3169 FalseBB->addSuccessor(SinkBB);
3170
3171 // TrueBB.
3172 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3173 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3174 .addReg(LoongArch::R0)
3175 .addImm(1);
3176 TrueBB->addSuccessor(SinkBB);
3177
3178 // SinkBB: merge the results.
3179 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3180 MI.getOperand(0).getReg())
3181 .addReg(RD1)
3182 .addMBB(FalseBB)
3183 .addReg(RD2)
3184 .addMBB(TrueBB);
3185
3186 // The pseudo instruction is gone now.
3187 MI.eraseFromParent();
3188 return SinkBB;
3189}
3190
3191static MachineBasicBlock *
3193 const LoongArchSubtarget &Subtarget) {
3194 unsigned InsOp;
3195 unsigned HalfSize;
3196 switch (MI.getOpcode()) {
3197 default:
3198 llvm_unreachable("Unexpected opcode");
3199 case LoongArch::PseudoXVINSGR2VR_B:
3200 HalfSize = 16;
3201 InsOp = LoongArch::VINSGR2VR_B;
3202 break;
3203 case LoongArch::PseudoXVINSGR2VR_H:
3204 HalfSize = 8;
3205 InsOp = LoongArch::VINSGR2VR_H;
3206 break;
3207 }
3208 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3209 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3210 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3211 DebugLoc DL = MI.getDebugLoc();
3213 // XDst = vector_insert XSrc, Elt, Idx
3214 Register XDst = MI.getOperand(0).getReg();
3215 Register XSrc = MI.getOperand(1).getReg();
3216 Register Elt = MI.getOperand(2).getReg();
3217 unsigned Idx = MI.getOperand(3).getImm();
3218
3219 Register ScratchReg1 = XSrc;
3220 if (Idx >= HalfSize) {
3221 ScratchReg1 = MRI.createVirtualRegister(RC);
3222 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3223 .addReg(XSrc)
3224 .addReg(XSrc)
3225 .addImm(1);
3226 }
3227
3228 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3229 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3230 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3231 .addReg(ScratchReg1, 0, LoongArch::sub_128);
3232 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3233 .addReg(ScratchSubReg1)
3234 .addReg(Elt)
3235 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3236
3237 Register ScratchReg2 = XDst;
3238 if (Idx >= HalfSize)
3239 ScratchReg2 = MRI.createVirtualRegister(RC);
3240
3241 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3242 .addImm(0)
3243 .addReg(ScratchSubReg2)
3244 .addImm(LoongArch::sub_128);
3245
3246 if (Idx >= HalfSize)
3247 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3248 .addReg(XSrc)
3249 .addReg(ScratchReg2)
3250 .addImm(2);
3251
3252 MI.eraseFromParent();
3253 return BB;
3254}
3255
3256MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3257 MachineInstr &MI, MachineBasicBlock *BB) const {
3258 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3259 DebugLoc DL = MI.getDebugLoc();
3260
3261 switch (MI.getOpcode()) {
3262 default:
3263 llvm_unreachable("Unexpected instr type to insert");
3264 case LoongArch::DIV_W:
3265 case LoongArch::DIV_WU:
3266 case LoongArch::MOD_W:
3267 case LoongArch::MOD_WU:
3268 case LoongArch::DIV_D:
3269 case LoongArch::DIV_DU:
3270 case LoongArch::MOD_D:
3271 case LoongArch::MOD_DU:
3272 return insertDivByZeroTrap(MI, BB);
3273 break;
3274 case LoongArch::WRFCSR: {
3275 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3276 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3277 .addReg(MI.getOperand(1).getReg());
3278 MI.eraseFromParent();
3279 return BB;
3280 }
3281 case LoongArch::RDFCSR: {
3282 MachineInstr *ReadFCSR =
3283 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3284 MI.getOperand(0).getReg())
3285 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3286 ReadFCSR->getOperand(1).setIsUndef();
3287 MI.eraseFromParent();
3288 return BB;
3289 }
3290 case LoongArch::PseudoVBZ:
3291 case LoongArch::PseudoVBZ_B:
3292 case LoongArch::PseudoVBZ_H:
3293 case LoongArch::PseudoVBZ_W:
3294 case LoongArch::PseudoVBZ_D:
3295 case LoongArch::PseudoVBNZ:
3296 case LoongArch::PseudoVBNZ_B:
3297 case LoongArch::PseudoVBNZ_H:
3298 case LoongArch::PseudoVBNZ_W:
3299 case LoongArch::PseudoVBNZ_D:
3300 case LoongArch::PseudoXVBZ:
3301 case LoongArch::PseudoXVBZ_B:
3302 case LoongArch::PseudoXVBZ_H:
3303 case LoongArch::PseudoXVBZ_W:
3304 case LoongArch::PseudoXVBZ_D:
3305 case LoongArch::PseudoXVBNZ:
3306 case LoongArch::PseudoXVBNZ_B:
3307 case LoongArch::PseudoXVBNZ_H:
3308 case LoongArch::PseudoXVBNZ_W:
3309 case LoongArch::PseudoXVBNZ_D:
3310 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3311 case LoongArch::PseudoXVINSGR2VR_B:
3312 case LoongArch::PseudoXVINSGR2VR_H:
3313 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3314 }
3315}
3316
3318 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3319 unsigned *Fast) const {
3320 if (!Subtarget.hasUAL())
3321 return false;
3322
3323 // TODO: set reasonable speed number.
3324 if (Fast)
3325 *Fast = 1;
3326 return true;
3327}
3328
3330 switch ((LoongArchISD::NodeType)Opcode) {
3332 break;
3333
3334#define NODE_NAME_CASE(node) \
3335 case LoongArchISD::node: \
3336 return "LoongArchISD::" #node;
3337
3338 // TODO: Add more target-dependent nodes later.
3339 NODE_NAME_CASE(CALL)
3340 NODE_NAME_CASE(RET)
3341 NODE_NAME_CASE(TAIL)
3342 NODE_NAME_CASE(SLL_W)
3343 NODE_NAME_CASE(SRA_W)
3344 NODE_NAME_CASE(SRL_W)
3345 NODE_NAME_CASE(BSTRINS)
3346 NODE_NAME_CASE(BSTRPICK)
3347 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3348 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3349 NODE_NAME_CASE(FTINT)
3350 NODE_NAME_CASE(REVB_2H)
3351 NODE_NAME_CASE(REVB_2W)
3352 NODE_NAME_CASE(BITREV_4B)
3353 NODE_NAME_CASE(BITREV_W)
3354 NODE_NAME_CASE(ROTR_W)
3355 NODE_NAME_CASE(ROTL_W)
3356 NODE_NAME_CASE(CLZ_W)
3357 NODE_NAME_CASE(CTZ_W)
3358 NODE_NAME_CASE(DBAR)
3359 NODE_NAME_CASE(IBAR)
3360 NODE_NAME_CASE(BREAK)
3361 NODE_NAME_CASE(SYSCALL)
3362 NODE_NAME_CASE(CRC_W_B_W)
3363 NODE_NAME_CASE(CRC_W_H_W)
3364 NODE_NAME_CASE(CRC_W_W_W)
3365 NODE_NAME_CASE(CRC_W_D_W)
3366 NODE_NAME_CASE(CRCC_W_B_W)
3367 NODE_NAME_CASE(CRCC_W_H_W)
3368 NODE_NAME_CASE(CRCC_W_W_W)
3369 NODE_NAME_CASE(CRCC_W_D_W)
3370 NODE_NAME_CASE(CSRRD)
3371 NODE_NAME_CASE(CSRWR)
3372 NODE_NAME_CASE(CSRXCHG)
3373 NODE_NAME_CASE(IOCSRRD_B)
3374 NODE_NAME_CASE(IOCSRRD_H)
3375 NODE_NAME_CASE(IOCSRRD_W)
3376 NODE_NAME_CASE(IOCSRRD_D)
3377 NODE_NAME_CASE(IOCSRWR_B)
3378 NODE_NAME_CASE(IOCSRWR_H)
3379 NODE_NAME_CASE(IOCSRWR_W)
3380 NODE_NAME_CASE(IOCSRWR_D)
3381 NODE_NAME_CASE(CPUCFG)
3382 NODE_NAME_CASE(MOVGR2FCSR)
3383 NODE_NAME_CASE(MOVFCSR2GR)
3384 NODE_NAME_CASE(CACOP_D)
3385 NODE_NAME_CASE(CACOP_W)
3386 NODE_NAME_CASE(VPICK_SEXT_ELT)
3387 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3388 NODE_NAME_CASE(VREPLVE)
3389 NODE_NAME_CASE(VALL_ZERO)
3390 NODE_NAME_CASE(VANY_ZERO)
3391 NODE_NAME_CASE(VALL_NONZERO)
3392 NODE_NAME_CASE(VANY_NONZERO)
3393 }
3394#undef NODE_NAME_CASE
3395 return nullptr;
3396}
3397
3398//===----------------------------------------------------------------------===//
3399// Calling Convention Implementation
3400//===----------------------------------------------------------------------===//
3401
3402// Eight general-purpose registers a0-a7 used for passing integer arguments,
3403// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3404// fixed-point arguments, and floating-point arguments when no FPR is available
3405// or with soft float ABI.
3406const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3407 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3408 LoongArch::R10, LoongArch::R11};
3409// Eight floating-point registers fa0-fa7 used for passing floating-point
3410// arguments, and fa0-fa1 are also used to return values.
3411const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3412 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3413 LoongArch::F6, LoongArch::F7};
3414// FPR32 and FPR64 alias each other.
3416 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3417 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3418
3419const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3420 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3421 LoongArch::VR6, LoongArch::VR7};
3422
3423const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3424 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3425 LoongArch::XR6, LoongArch::XR7};
3426
3427// Pass a 2*GRLen argument that has been split into two GRLen values through
3428// registers or the stack as necessary.
3429static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3430 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3431 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3432 ISD::ArgFlagsTy ArgFlags2) {
3433 unsigned GRLenInBytes = GRLen / 8;
3434 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3435 // At least one half can be passed via register.
3436 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3437 VA1.getLocVT(), CCValAssign::Full));
3438 } else {
3439 // Both halves must be passed on the stack, with proper alignment.
3440 Align StackAlign =
3441 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3442 State.addLoc(
3444 State.AllocateStack(GRLenInBytes, StackAlign),
3445 VA1.getLocVT(), CCValAssign::Full));
3447 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3448 LocVT2, CCValAssign::Full));
3449 return false;
3450 }
3451 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3452 // The second half can also be passed via register.
3453 State.addLoc(
3454 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3455 } else {
3456 // The second half is passed via the stack, without additional alignment.
3458 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3459 LocVT2, CCValAssign::Full));
3460 }
3461 return false;
3462}
3463
3464// Implements the LoongArch calling convention. Returns true upon failure.
3466 unsigned ValNo, MVT ValVT,
3467 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3468 CCState &State, bool IsFixed, bool IsRet,
3469 Type *OrigTy) {
3470 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3471 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3472 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3473 MVT LocVT = ValVT;
3474
3475 // Any return value split into more than two values can't be returned
3476 // directly.
3477 if (IsRet && ValNo > 1)
3478 return true;
3479
3480 // If passing a variadic argument, or if no FPR is available.
3481 bool UseGPRForFloat = true;
3482
3483 switch (ABI) {
3484 default:
3485 llvm_unreachable("Unexpected ABI");
3489 report_fatal_error("Unimplemented ABI");
3490 break;
3493 UseGPRForFloat = !IsFixed;
3494 break;
3496 break;
3497 }
3498
3499 // FPR32 and FPR64 alias each other.
3500 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3501 UseGPRForFloat = true;
3502
3503 if (UseGPRForFloat && ValVT == MVT::f32) {
3504 LocVT = GRLenVT;
3505 LocInfo = CCValAssign::BCvt;
3506 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3507 LocVT = MVT::i64;
3508 LocInfo = CCValAssign::BCvt;
3509 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3510 // TODO: Handle passing f64 on LA32 with D feature.
3511 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3512 }
3513
3514 // If this is a variadic argument, the LoongArch calling convention requires
3515 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3516 // byte alignment. An aligned register should be used regardless of whether
3517 // the original argument was split during legalisation or not. The argument
3518 // will not be passed by registers if the original type is larger than
3519 // 2*GRLen, so the register alignment rule does not apply.
3520 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3521 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3522 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3523 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3524 // Skip 'odd' register if necessary.
3525 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3526 State.AllocateReg(ArgGPRs);
3527 }
3528
3529 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3530 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3531 State.getPendingArgFlags();
3532
3533 assert(PendingLocs.size() == PendingArgFlags.size() &&
3534 "PendingLocs and PendingArgFlags out of sync");
3535
3536 // Split arguments might be passed indirectly, so keep track of the pending
3537 // values.
3538 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3539 LocVT = GRLenVT;
3540 LocInfo = CCValAssign::Indirect;
3541 PendingLocs.push_back(
3542 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3543 PendingArgFlags.push_back(ArgFlags);
3544 if (!ArgFlags.isSplitEnd()) {
3545 return false;
3546 }
3547 }
3548
3549 // If the split argument only had two elements, it should be passed directly
3550 // in registers or on the stack.
3551 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3552 PendingLocs.size() <= 2) {
3553 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3554 // Apply the normal calling convention rules to the first half of the
3555 // split argument.
3556 CCValAssign VA = PendingLocs[0];
3557 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3558 PendingLocs.clear();
3559 PendingArgFlags.clear();
3560 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3561 ArgFlags);
3562 }
3563
3564 // Allocate to a register if possible, or else a stack slot.
3565 Register Reg;
3566 unsigned StoreSizeBytes = GRLen / 8;
3567 Align StackAlign = Align(GRLen / 8);
3568
3569 if (ValVT == MVT::f32 && !UseGPRForFloat)
3570 Reg = State.AllocateReg(ArgFPR32s);
3571 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3572 Reg = State.AllocateReg(ArgFPR64s);
3573 else if (ValVT.is128BitVector())
3574 Reg = State.AllocateReg(ArgVRs);
3575 else if (ValVT.is256BitVector())
3576 Reg = State.AllocateReg(ArgXRs);
3577 else
3578 Reg = State.AllocateReg(ArgGPRs);
3579
3580 unsigned StackOffset =
3581 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3582
3583 // If we reach this point and PendingLocs is non-empty, we must be at the
3584 // end of a split argument that must be passed indirectly.
3585 if (!PendingLocs.empty()) {
3586 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3587 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3588 for (auto &It : PendingLocs) {
3589 if (Reg)
3590 It.convertToReg(Reg);
3591 else
3592 It.convertToMem(StackOffset);
3593 State.addLoc(It);
3594 }
3595 PendingLocs.clear();
3596 PendingArgFlags.clear();
3597 return false;
3598 }
3599 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3600 "Expected an GRLenVT at this stage");
3601
3602 if (Reg) {
3603 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3604 return false;
3605 }
3606
3607 // When a floating-point value is passed on the stack, no bit-cast is needed.
3608 if (ValVT.isFloatingPoint()) {
3609 LocVT = ValVT;
3610 LocInfo = CCValAssign::Full;
3611 }
3612
3613 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3614 return false;
3615}
3616
3617void LoongArchTargetLowering::analyzeInputArgs(
3618 MachineFunction &MF, CCState &CCInfo,
3619 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3620 LoongArchCCAssignFn Fn) const {
3622 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3623 MVT ArgVT = Ins[i].VT;
3624 Type *ArgTy = nullptr;
3625 if (IsRet)
3626 ArgTy = FType->getReturnType();
3627 else if (Ins[i].isOrigArg())
3628 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3631 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3632 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3633 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3634 << '\n');
3635 llvm_unreachable("");
3636 }
3637 }
3638}
3639
3640void LoongArchTargetLowering::analyzeOutputArgs(
3641 MachineFunction &MF, CCState &CCInfo,
3642 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3643 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3644 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3645 MVT ArgVT = Outs[i].VT;
3646 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3649 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3650 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3651 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3652 << "\n");
3653 llvm_unreachable("");
3654 }
3655 }
3656}
3657
3658// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3659// values.
3661 const CCValAssign &VA, const SDLoc &DL) {
3662 switch (VA.getLocInfo()) {
3663 default:
3664 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3665 case CCValAssign::Full:
3667 break;
3668 case CCValAssign::BCvt:
3669 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3670 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3671 else
3672 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3673 break;
3674 }
3675 return Val;
3676}
3677
3679 const CCValAssign &VA, const SDLoc &DL,
3680 const LoongArchTargetLowering &TLI) {
3683 EVT LocVT = VA.getLocVT();
3684 SDValue Val;
3685 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3686 Register VReg = RegInfo.createVirtualRegister(RC);
3687 RegInfo.addLiveIn(VA.getLocReg(), VReg);
3688 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3689
3690 return convertLocVTToValVT(DAG, Val, VA, DL);
3691}
3692
3693// The caller is responsible for loading the full value if the argument is
3694// passed with CCValAssign::Indirect.
3696 const CCValAssign &VA, const SDLoc &DL) {
3698 MachineFrameInfo &MFI = MF.getFrameInfo();
3699 EVT ValVT = VA.getValVT();
3700 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3701 /*IsImmutable=*/true);
3702 SDValue FIN = DAG.getFrameIndex(
3704
3705 ISD::LoadExtType ExtType;
3706 switch (VA.getLocInfo()) {
3707 default:
3708 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3709 case CCValAssign::Full:
3711 case CCValAssign::BCvt:
3712 ExtType = ISD::NON_EXTLOAD;
3713 break;
3714 }
3715 return DAG.getExtLoad(
3716 ExtType, DL, VA.getLocVT(), Chain, FIN,
3718}
3719
3721 const CCValAssign &VA, const SDLoc &DL) {
3722 EVT LocVT = VA.getLocVT();
3723
3724 switch (VA.getLocInfo()) {
3725 default:
3726 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3727 case CCValAssign::Full:
3728 break;
3729 case CCValAssign::BCvt:
3730 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3731 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3732 else
3733 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3734 break;
3735 }
3736 return Val;
3737}
3738
3739static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3740 CCValAssign::LocInfo LocInfo,
3741 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3742 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3743 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3744 // s0 s1 s2 s3 s4 s5 s6 s7 s8
3745 static const MCPhysReg GPRList[] = {
3746 LoongArch::R23, LoongArch::R24, LoongArch::R25,
3747 LoongArch::R26, LoongArch::R27, LoongArch::R28,
3748 LoongArch::R29, LoongArch::R30, LoongArch::R31};
3749 if (unsigned Reg = State.AllocateReg(GPRList)) {
3750 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3751 return false;
3752 }
3753 }
3754
3755 if (LocVT == MVT::f32) {
3756 // Pass in STG registers: F1, F2, F3, F4
3757 // fs0,fs1,fs2,fs3
3758 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3759 LoongArch::F26, LoongArch::F27};
3760 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3761 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3762 return false;
3763 }
3764 }
3765
3766 if (LocVT == MVT::f64) {
3767 // Pass in STG registers: D1, D2, D3, D4
3768 // fs4,fs5,fs6,fs7
3769 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3770 LoongArch::F30_64, LoongArch::F31_64};
3771 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3772 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3773 return false;
3774 }
3775 }
3776
3777 report_fatal_error("No registers left in GHC calling convention");
3778 return true;
3779}
3780
3781// Transform physical registers into virtual registers.
3783 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3784 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3785 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3786
3788
3789 switch (CallConv) {
3790 default:
3791 llvm_unreachable("Unsupported calling convention");
3792 case CallingConv::C:
3793 case CallingConv::Fast:
3794 break;
3795 case CallingConv::GHC:
3796 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3797 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3799 "GHC calling convention requires the F and D extensions");
3800 }
3801
3802 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3803 MVT GRLenVT = Subtarget.getGRLenVT();
3804 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3805 // Used with varargs to acumulate store chains.
3806 std::vector<SDValue> OutChains;
3807
3808 // Assign locations to all of the incoming arguments.
3810 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3811
3812 if (CallConv == CallingConv::GHC)
3814 else
3815 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3816
3817 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3818 CCValAssign &VA = ArgLocs[i];
3819 SDValue ArgValue;
3820 if (VA.isRegLoc())
3821 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3822 else
3823 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3824 if (VA.getLocInfo() == CCValAssign::Indirect) {
3825 // If the original argument was split and passed by reference, we need to
3826 // load all parts of it here (using the same address).
3827 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3829 unsigned ArgIndex = Ins[i].OrigArgIndex;
3830 unsigned ArgPartOffset = Ins[i].PartOffset;
3831 assert(ArgPartOffset == 0);
3832 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3833 CCValAssign &PartVA = ArgLocs[i + 1];
3834 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3835 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3836 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3837 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3839 ++i;
3840 }
3841 continue;
3842 }
3843 InVals.push_back(ArgValue);
3844 }
3845
3846 if (IsVarArg) {
3848 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3849 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3850 MachineFrameInfo &MFI = MF.getFrameInfo();
3851 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3852 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3853
3854 // Offset of the first variable argument from stack pointer, and size of
3855 // the vararg save area. For now, the varargs save area is either zero or
3856 // large enough to hold a0-a7.
3857 int VaArgOffset, VarArgsSaveSize;
3858
3859 // If all registers are allocated, then all varargs must be passed on the
3860 // stack and we don't need to save any argregs.
3861 if (ArgRegs.size() == Idx) {
3862 VaArgOffset = CCInfo.getStackSize();
3863 VarArgsSaveSize = 0;
3864 } else {
3865 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3866 VaArgOffset = -VarArgsSaveSize;
3867 }
3868
3869 // Record the frame index of the first variable argument
3870 // which is a value necessary to VASTART.
3871 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3872 LoongArchFI->setVarArgsFrameIndex(FI);
3873
3874 // If saving an odd number of registers then create an extra stack slot to
3875 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3876 // offsets to even-numbered registered remain 2*GRLen-aligned.
3877 if (Idx % 2) {
3878 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3879 true);
3880 VarArgsSaveSize += GRLenInBytes;
3881 }
3882
3883 // Copy the integer registers that may have been used for passing varargs
3884 // to the vararg save area.
3885 for (unsigned I = Idx; I < ArgRegs.size();
3886 ++I, VaArgOffset += GRLenInBytes) {
3887 const Register Reg = RegInfo.createVirtualRegister(RC);
3888 RegInfo.addLiveIn(ArgRegs[I], Reg);
3889 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3890 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3891 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3892 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3894 cast<StoreSDNode>(Store.getNode())
3895 ->getMemOperand()
3896 ->setValue((Value *)nullptr);
3897 OutChains.push_back(Store);
3898 }
3899 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3900 }
3901
3902 // All stores are grouped in one node to allow the matching between
3903 // the size of Ins and InVals. This only happens for vararg functions.
3904 if (!OutChains.empty()) {
3905 OutChains.push_back(Chain);
3906 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3907 }
3908
3909 return Chain;
3910}
3911
3913 return CI->isTailCall();
3914}
3915
3916// Check if the return value is used as only a return value, as otherwise
3917// we can't perform a tail-call.
3919 SDValue &Chain) const {
3920 if (N->getNumValues() != 1)
3921 return false;
3922 if (!N->hasNUsesOfValue(1, 0))
3923 return false;
3924
3925 SDNode *Copy = *N->use_begin();
3926 if (Copy->getOpcode() != ISD::CopyToReg)
3927 return false;
3928
3929 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3930 // isn't safe to perform a tail call.
3931 if (Copy->getGluedNode())
3932 return false;
3933
3934 // The copy must be used by a LoongArchISD::RET, and nothing else.
3935 bool HasRet = false;
3936 for (SDNode *Node : Copy->uses()) {
3937 if (Node->getOpcode() != LoongArchISD::RET)
3938 return false;
3939 HasRet = true;
3940 }
3941
3942 if (!HasRet)
3943 return false;
3944
3945 Chain = Copy->getOperand(0);
3946 return true;
3947}
3948
3949// Check whether the call is eligible for tail call optimization.
3950bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
3951 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
3952 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
3953
3954 auto CalleeCC = CLI.CallConv;
3955 auto &Outs = CLI.Outs;
3956 auto &Caller = MF.getFunction();
3957 auto CallerCC = Caller.getCallingConv();
3958
3959 // Do not tail call opt if the stack is used to pass parameters.
3960 if (CCInfo.getStackSize() != 0)
3961 return false;
3962
3963 // Do not tail call opt if any parameters need to be passed indirectly.
3964 for (auto &VA : ArgLocs)
3965 if (VA.getLocInfo() == CCValAssign::Indirect)
3966 return false;
3967
3968 // Do not tail call opt if either caller or callee uses struct return
3969 // semantics.
3970 auto IsCallerStructRet = Caller.hasStructRetAttr();
3971 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3972 if (IsCallerStructRet || IsCalleeStructRet)
3973 return false;
3974
3975 // Do not tail call opt if either the callee or caller has a byval argument.
3976 for (auto &Arg : Outs)
3977 if (Arg.Flags.isByVal())
3978 return false;
3979
3980 // The callee has to preserve all registers the caller needs to preserve.
3981 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
3982 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3983 if (CalleeCC != CallerCC) {
3984 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3985 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3986 return false;
3987 }
3988 return true;
3989}
3990
3992 return DAG.getDataLayout().getPrefTypeAlign(
3993 VT.getTypeForEVT(*DAG.getContext()));
3994}
3995
3996// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
3997// and output parameter nodes.
3998SDValue
4000 SmallVectorImpl<SDValue> &InVals) const {
4001 SelectionDAG &DAG = CLI.DAG;
4002 SDLoc &DL = CLI.DL;
4004 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4006 SDValue Chain = CLI.Chain;
4007 SDValue Callee = CLI.Callee;
4008 CallingConv::ID CallConv = CLI.CallConv;
4009 bool IsVarArg = CLI.IsVarArg;
4010 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4011 MVT GRLenVT = Subtarget.getGRLenVT();
4012 bool &IsTailCall = CLI.IsTailCall;
4013
4015
4016 // Analyze the operands of the call, assigning locations to each operand.
4018 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4019
4020 if (CallConv == CallingConv::GHC)
4021 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4022 else
4023 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4024
4025 // Check if it's really possible to do a tail call.
4026 if (IsTailCall)
4027 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4028
4029 if (IsTailCall)
4030 ++NumTailCalls;
4031 else if (CLI.CB && CLI.CB->isMustTailCall())
4032 report_fatal_error("failed to perform tail call elimination on a call "
4033 "site marked musttail");
4034
4035 // Get a count of how many bytes are to be pushed on the stack.
4036 unsigned NumBytes = ArgCCInfo.getStackSize();
4037
4038 // Create local copies for byval args.
4039 SmallVector<SDValue> ByValArgs;
4040 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4041 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4042 if (!Flags.isByVal())
4043 continue;
4044
4045 SDValue Arg = OutVals[i];
4046 unsigned Size = Flags.getByValSize();
4047 Align Alignment = Flags.getNonZeroByValAlign();
4048
4049 int FI =
4050 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4051 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4052 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4053
4054 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4055 /*IsVolatile=*/false,
4056 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4058 ByValArgs.push_back(FIPtr);
4059 }
4060
4061 if (!IsTailCall)
4062 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4063
4064 // Copy argument values to their designated locations.
4066 SmallVector<SDValue> MemOpChains;
4067 SDValue StackPtr;
4068 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4069 CCValAssign &VA = ArgLocs[i];
4070 SDValue ArgValue = OutVals[i];
4071 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4072
4073 // Promote the value if needed.
4074 // For now, only handle fully promoted and indirect arguments.
4075 if (VA.getLocInfo() == CCValAssign::Indirect) {
4076 // Store the argument in a stack slot and pass its address.
4077 Align StackAlign =
4078 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4079 getPrefTypeAlign(ArgValue.getValueType(), DAG));
4080 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4081 // If the original argument was split and passed by reference, we need to
4082 // store the required parts of it here (and pass just one address).
4083 unsigned ArgIndex = Outs[i].OrigArgIndex;
4084 unsigned ArgPartOffset = Outs[i].PartOffset;
4085 assert(ArgPartOffset == 0);
4086 // Calculate the total size to store. We don't have access to what we're
4087 // actually storing other than performing the loop and collecting the
4088 // info.
4090 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4091 SDValue PartValue = OutVals[i + 1];
4092 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4093 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4094 EVT PartVT = PartValue.getValueType();
4095
4096 StoredSize += PartVT.getStoreSize();
4097 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4098 Parts.push_back(std::make_pair(PartValue, Offset));
4099 ++i;
4100 }
4101 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4102 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4103 MemOpChains.push_back(
4104 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4106 for (const auto &Part : Parts) {
4107 SDValue PartValue = Part.first;
4108 SDValue PartOffset = Part.second;
4110 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4111 MemOpChains.push_back(
4112 DAG.getStore(Chain, DL, PartValue, Address,
4114 }
4115 ArgValue = SpillSlot;
4116 } else {
4117 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4118 }
4119
4120 // Use local copy if it is a byval arg.
4121 if (Flags.isByVal())
4122 ArgValue = ByValArgs[j++];
4123
4124 if (VA.isRegLoc()) {
4125 // Queue up the argument copies and emit them at the end.
4126 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4127 } else {
4128 assert(VA.isMemLoc() && "Argument not register or memory");
4129 assert(!IsTailCall && "Tail call not allowed if stack is used "
4130 "for passing parameters");
4131
4132 // Work out the address of the stack slot.
4133 if (!StackPtr.getNode())
4134 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4136 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4138
4139 // Emit the store.
4140 MemOpChains.push_back(
4141 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4142 }
4143 }
4144
4145 // Join the stores, which are independent of one another.
4146 if (!MemOpChains.empty())
4147 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4148
4149 SDValue Glue;
4150
4151 // Build a sequence of copy-to-reg nodes, chained and glued together.
4152 for (auto &Reg : RegsToPass) {
4153 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4154 Glue = Chain.getValue(1);
4155 }
4156
4157 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4158 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4159 // split it and then direct call can be matched by PseudoCALL.
4160 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4161 const GlobalValue *GV = S->getGlobal();
4162 unsigned OpFlags =
4166 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4167 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4168 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
4169 *MF.getFunction().getParent(), nullptr)
4172 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4173 }
4174
4175 // The first call operand is the chain and the second is the target address.
4177 Ops.push_back(Chain);
4178 Ops.push_back(Callee);
4179
4180 // Add argument registers to the end of the list so that they are
4181 // known live into the call.
4182 for (auto &Reg : RegsToPass)
4183 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4184
4185 if (!IsTailCall) {
4186 // Add a register mask operand representing the call-preserved registers.
4187 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4188 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4189 assert(Mask && "Missing call preserved mask for calling convention");
4190 Ops.push_back(DAG.getRegisterMask(Mask));
4191 }
4192
4193 // Glue the call to the argument copies, if any.
4194 if (Glue.getNode())
4195 Ops.push_back(Glue);
4196
4197 // Emit the call.
4198 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4199
4200 if (IsTailCall) {
4202 SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
4203 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
4204 return Ret;
4205 }
4206
4207 Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
4208 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4209 Glue = Chain.getValue(1);
4210
4211 // Mark the end of the call, which is glued to the call itself.
4212 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
4213 Glue = Chain.getValue(1);
4214
4215 // Assign locations to each value returned by this call.
4217 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4218 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
4219
4220 // Copy all of the result registers out of their specified physreg.
4221 for (auto &VA : RVLocs) {
4222 // Copy the value out.
4223 SDValue RetValue =
4224 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4225 // Glue the RetValue to the end of the call sequence.
4226 Chain = RetValue.getValue(1);
4227 Glue = RetValue.getValue(2);
4228
4229 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4230
4231 InVals.push_back(RetValue);
4232 }
4233
4234 return Chain;
4235}
4236
4238 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4239 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4241 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4242
4243 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4244 LoongArchABI::ABI ABI =
4245 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4246 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
4247 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4248 nullptr))
4249 return false;
4250 }
4251 return true;
4252}
4253
4255 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4257 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4258 SelectionDAG &DAG) const {
4259 // Stores the assignment of the return value to a location.
4261
4262 // Info about the registers and stack slot.
4263 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4264 *DAG.getContext());
4265
4266 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4267 nullptr, CC_LoongArch);
4268 if (CallConv == CallingConv::GHC && !RVLocs.empty())
4269 report_fatal_error("GHC functions return void only");
4270 SDValue Glue;
4271 SmallVector<SDValue, 4> RetOps(1, Chain);
4272
4273 // Copy the result values into the output registers.
4274 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4275 CCValAssign &VA = RVLocs[i];
4276 assert(VA.isRegLoc() && "Can only return in registers!");
4277
4278 // Handle a 'normal' return.