LLVM 19.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
139
142 }
143
144 // Set operations for LA32 only.
145
146 if (!Subtarget.is64Bit()) {
152
153 // Set libcalls.
154 setLibcallName(RTLIB::MUL_I128, nullptr);
155 // The MULO libcall is not part of libgcc, only compiler-rt.
156 setLibcallName(RTLIB::MULO_I64, nullptr);
157 }
158
159 // The MULO libcall is not part of libgcc, only compiler-rt.
160 setLibcallName(RTLIB::MULO_I128, nullptr);
161
163
164 static const ISD::CondCode FPCCToExpand[] = {
167
168 // Set operations for 'F' feature.
169
170 if (Subtarget.hasBasicF()) {
171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172
186
187 if (Subtarget.is64Bit())
189
190 if (!Subtarget.hasBasicD()) {
192 if (Subtarget.is64Bit()) {
195 }
196 }
197 }
198
199 // Set operations for 'D' feature.
200
201 if (Subtarget.hasBasicD()) {
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205
219
220 if (Subtarget.is64Bit())
222 }
223
224 // Set operations for 'LSX' feature.
225
226 if (Subtarget.hasExtLSX()) {
228 // Expand all truncating stores and extending loads.
229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230 setTruncStoreAction(VT, InnerVT, Expand);
233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234 }
235 // By default everything must be expanded. Then we will selectively turn
236 // on ones that can be effectively codegen'd.
237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
239 }
240
241 for (MVT VT : LSXVTs) {
245
249
252 }
253 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
257 Legal);
259 VT, Legal);
266 Expand);
267 }
268 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
271 }
272 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
280 VT, Expand);
281 }
282 }
283
284 // Set operations for 'LASX' feature.
285
286 if (Subtarget.hasExtLASX()) {
287 for (MVT VT : LASXVTs) {
291
295
298 }
299 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
303 Legal);
305 VT, Legal);
312 Expand);
313 }
314 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
317 }
318 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
326 VT, Expand);
327 }
328 }
329
330 // Set DAG combine for LA32 and LA64.
331
335
336 // Set DAG combine for 'LSX' feature.
337
338 if (Subtarget.hasExtLSX())
340
341 // Compute derived properties from the register classes.
343
345
348
350
352
353 // Function alignments.
355 // Set preferred alignments.
359}
360
362 const GlobalAddressSDNode *GA) const {
363 // In order to maximise the opportunity for common subexpression elimination,
364 // keep a separate ADD node for the global address offset instead of folding
365 // it in the global address node. Later peephole optimisations may choose to
366 // fold it back in when profitable.
367 return false;
368}
369
371 SelectionDAG &DAG) const {
372 switch (Op.getOpcode()) {
374 return lowerATOMIC_FENCE(Op, DAG);
376 return lowerEH_DWARF_CFA(Op, DAG);
378 return lowerGlobalAddress(Op, DAG);
380 return lowerGlobalTLSAddress(Op, DAG);
382 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
384 return lowerINTRINSIC_W_CHAIN(Op, DAG);
386 return lowerINTRINSIC_VOID(Op, DAG);
388 return lowerBlockAddress(Op, DAG);
389 case ISD::JumpTable:
390 return lowerJumpTable(Op, DAG);
391 case ISD::SHL_PARTS:
392 return lowerShiftLeftParts(Op, DAG);
393 case ISD::SRA_PARTS:
394 return lowerShiftRightParts(Op, DAG, true);
395 case ISD::SRL_PARTS:
396 return lowerShiftRightParts(Op, DAG, false);
398 return lowerConstantPool(Op, DAG);
399 case ISD::FP_TO_SINT:
400 return lowerFP_TO_SINT(Op, DAG);
401 case ISD::BITCAST:
402 return lowerBITCAST(Op, DAG);
403 case ISD::UINT_TO_FP:
404 return lowerUINT_TO_FP(Op, DAG);
405 case ISD::SINT_TO_FP:
406 return lowerSINT_TO_FP(Op, DAG);
407 case ISD::VASTART:
408 return lowerVASTART(Op, DAG);
409 case ISD::FRAMEADDR:
410 return lowerFRAMEADDR(Op, DAG);
411 case ISD::RETURNADDR:
412 return lowerRETURNADDR(Op, DAG);
414 return lowerWRITE_REGISTER(Op, DAG);
416 return lowerINSERT_VECTOR_ELT(Op, DAG);
418 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
420 return lowerBUILD_VECTOR(Op, DAG);
422 return lowerVECTOR_SHUFFLE(Op, DAG);
423 }
424 return SDValue();
425}
426
427SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
428 SelectionDAG &DAG) const {
429 // TODO: custom shuffle.
430 return SDValue();
431}
432
433static bool isConstantOrUndef(const SDValue Op) {
434 if (Op->isUndef())
435 return true;
436 if (isa<ConstantSDNode>(Op))
437 return true;
438 if (isa<ConstantFPSDNode>(Op))
439 return true;
440 return false;
441}
442
444 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
445 if (isConstantOrUndef(Op->getOperand(i)))
446 return true;
447 return false;
448}
449
450SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
451 SelectionDAG &DAG) const {
452 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
453 EVT ResTy = Op->getValueType(0);
454 SDLoc DL(Op);
455 APInt SplatValue, SplatUndef;
456 unsigned SplatBitSize;
457 bool HasAnyUndefs;
458 bool Is128Vec = ResTy.is128BitVector();
459 bool Is256Vec = ResTy.is256BitVector();
460
461 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
462 (!Subtarget.hasExtLASX() || !Is256Vec))
463 return SDValue();
464
465 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
466 /*MinSplatBits=*/8) &&
467 SplatBitSize <= 64) {
468 // We can only cope with 8, 16, 32, or 64-bit elements.
469 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
470 SplatBitSize != 64)
471 return SDValue();
472
473 EVT ViaVecTy;
474
475 switch (SplatBitSize) {
476 default:
477 return SDValue();
478 case 8:
479 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
480 break;
481 case 16:
482 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
483 break;
484 case 32:
485 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
486 break;
487 case 64:
488 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
489 break;
490 }
491
492 // SelectionDAG::getConstant will promote SplatValue appropriately.
493 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
494
495 // Bitcast to the type we originally wanted.
496 if (ViaVecTy != ResTy)
497 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
498
499 return Result;
500 }
501
502 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
503 return Op;
504
506 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
507 // The resulting code is the same length as the expansion, but it doesn't
508 // use memory operations.
509 EVT ResTy = Node->getValueType(0);
510
511 assert(ResTy.isVector());
512
513 unsigned NumElts = ResTy.getVectorNumElements();
514 SDValue Vector = DAG.getUNDEF(ResTy);
515 for (unsigned i = 0; i < NumElts; ++i) {
517 Node->getOperand(i),
518 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
519 }
520 return Vector;
521 }
522
523 return SDValue();
524}
525
527LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
528 SelectionDAG &DAG) const {
529 EVT VecTy = Op->getOperand(0)->getValueType(0);
530 SDValue Idx = Op->getOperand(1);
531 EVT EltTy = VecTy.getVectorElementType();
532 unsigned NumElts = VecTy.getVectorNumElements();
533
534 if (isa<ConstantSDNode>(Idx) &&
535 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
536 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
537 return Op;
538
539 return SDValue();
540}
541
543LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
544 SelectionDAG &DAG) const {
545 if (isa<ConstantSDNode>(Op->getOperand(2)))
546 return Op;
547 return SDValue();
548}
549
550SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
551 SelectionDAG &DAG) const {
552 SDLoc DL(Op);
553 SyncScope::ID FenceSSID =
554 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
555
556 // singlethread fences only synchronize with signal handlers on the same
557 // thread and thus only need to preserve instruction order, not actually
558 // enforce memory ordering.
559 if (FenceSSID == SyncScope::SingleThread)
560 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
561 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
562
563 return Op;
564}
565
566SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
567 SelectionDAG &DAG) const {
568
569 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
570 DAG.getContext()->emitError(
571 "On LA64, only 64-bit registers can be written.");
572 return Op.getOperand(0);
573 }
574
575 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
576 DAG.getContext()->emitError(
577 "On LA32, only 32-bit registers can be written.");
578 return Op.getOperand(0);
579 }
580
581 return Op;
582}
583
584SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
585 SelectionDAG &DAG) const {
586 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
587 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
588 "be a constant integer");
589 return SDValue();
590 }
591
594 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
595 EVT VT = Op.getValueType();
596 SDLoc DL(Op);
597 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
598 unsigned Depth = Op.getConstantOperandVal(0);
599 int GRLenInBytes = Subtarget.getGRLen() / 8;
600
601 while (Depth--) {
602 int Offset = -(GRLenInBytes * 2);
603 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
605 FrameAddr =
606 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
607 }
608 return FrameAddr;
609}
610
611SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
612 SelectionDAG &DAG) const {
614 return SDValue();
615
616 // Currently only support lowering return address for current frame.
617 if (Op.getConstantOperandVal(0) != 0) {
618 DAG.getContext()->emitError(
619 "return address can only be determined for the current frame");
620 return SDValue();
621 }
622
625 MVT GRLenVT = Subtarget.getGRLenVT();
626
627 // Return the value of the return address register, marking it an implicit
628 // live-in.
629 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
630 getRegClassFor(GRLenVT));
631 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
632}
633
634SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
635 SelectionDAG &DAG) const {
637 auto Size = Subtarget.getGRLen() / 8;
638 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
639 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
640}
641
642SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
643 SelectionDAG &DAG) const {
645 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
646
647 SDLoc DL(Op);
648 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
650
651 // vastart just stores the address of the VarArgsFrameIndex slot into the
652 // memory location argument.
653 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
654 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
656}
657
658SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
659 SelectionDAG &DAG) const {
660 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
661 !Subtarget.hasBasicD() && "unexpected target features");
662
663 SDLoc DL(Op);
664 SDValue Op0 = Op.getOperand(0);
665 if (Op0->getOpcode() == ISD::AND) {
666 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
667 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
668 return Op;
669 }
670
671 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
672 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
673 Op0.getConstantOperandVal(2) == UINT64_C(0))
674 return Op;
675
676 if (Op0.getOpcode() == ISD::AssertZext &&
677 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
678 return Op;
679
680 EVT OpVT = Op0.getValueType();
681 EVT RetVT = Op.getValueType();
682 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
683 MakeLibCallOptions CallOptions;
684 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
685 SDValue Chain = SDValue();
687 std::tie(Result, Chain) =
688 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
689 return Result;
690}
691
692SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
693 SelectionDAG &DAG) const {
694 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
695 !Subtarget.hasBasicD() && "unexpected target features");
696
697 SDLoc DL(Op);
698 SDValue Op0 = Op.getOperand(0);
699
700 if ((Op0.getOpcode() == ISD::AssertSext ||
702 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
703 return Op;
704
705 EVT OpVT = Op0.getValueType();
706 EVT RetVT = Op.getValueType();
707 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
708 MakeLibCallOptions CallOptions;
709 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
710 SDValue Chain = SDValue();
712 std::tie(Result, Chain) =
713 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
714 return Result;
715}
716
717SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
718 SelectionDAG &DAG) const {
719
720 SDLoc DL(Op);
721 SDValue Op0 = Op.getOperand(0);
722
723 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
724 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
725 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
727 }
728 return Op;
729}
730
731SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
732 SelectionDAG &DAG) const {
733
734 SDLoc DL(Op);
735
736 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
737 !Subtarget.hasBasicD()) {
738 SDValue Dst =
739 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
740 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
741 }
742
743 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
744 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
745 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
746}
747
749 SelectionDAG &DAG, unsigned Flags) {
750 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
751}
752
754 SelectionDAG &DAG, unsigned Flags) {
755 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
756 Flags);
757}
758
760 SelectionDAG &DAG, unsigned Flags) {
761 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
762 N->getOffset(), Flags);
763}
764
766 SelectionDAG &DAG, unsigned Flags) {
767 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
768}
769
770template <class NodeTy>
771SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
773 bool IsLocal) const {
774 SDLoc DL(N);
775 EVT Ty = getPointerTy(DAG.getDataLayout());
776 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
777
778 switch (M) {
779 default:
780 report_fatal_error("Unsupported code model");
781
782 case CodeModel::Large: {
783 assert(Subtarget.is64Bit() && "Large code model requires LA64");
784
785 // This is not actually used, but is necessary for successfully matching
786 // the PseudoLA_*_LARGE nodes.
787 SDValue Tmp = DAG.getConstant(0, DL, Ty);
788 if (IsLocal)
789 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
790 // eventually becomes the desired 5-insn code sequence.
791 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
792 Tmp, Addr),
793 0);
794
795 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
796 // becomes the desired 5-insn code sequence.
797 return SDValue(
798 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
799 0);
800 }
801
802 case CodeModel::Small:
804 if (IsLocal)
805 // This generates the pattern (PseudoLA_PCREL sym), which expands to
806 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
807 return SDValue(
808 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
809
810 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
811 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
812 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
813 0);
814 }
815}
816
817SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
818 SelectionDAG &DAG) const {
819 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
820 DAG.getTarget().getCodeModel());
821}
822
823SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
824 SelectionDAG &DAG) const {
825 return getAddr(cast<JumpTableSDNode>(Op), DAG,
826 DAG.getTarget().getCodeModel());
827}
828
829SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
830 SelectionDAG &DAG) const {
831 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
832 DAG.getTarget().getCodeModel());
833}
834
835SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
836 SelectionDAG &DAG) const {
837 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
838 assert(N->getOffset() == 0 && "unexpected offset in global node");
839 auto CM = DAG.getTarget().getCodeModel();
840 const GlobalValue *GV = N->getGlobal();
841
842 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
843 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
844 CM = *GCM;
845 }
846
847 return getAddr(N, DAG, CM, GV->isDSOLocal());
848}
849
850SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
851 SelectionDAG &DAG,
852 unsigned Opc,
853 bool Large) const {
854 SDLoc DL(N);
855 EVT Ty = getPointerTy(DAG.getDataLayout());
856 MVT GRLenVT = Subtarget.getGRLenVT();
857
858 // This is not actually used, but is necessary for successfully matching the
859 // PseudoLA_*_LARGE nodes.
860 SDValue Tmp = DAG.getConstant(0, DL, Ty);
861 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
863 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
864 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
865
866 // Add the thread pointer.
867 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
868 DAG.getRegister(LoongArch::R2, GRLenVT));
869}
870
871SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
872 SelectionDAG &DAG,
873 unsigned Opc,
874 bool Large) const {
875 SDLoc DL(N);
876 EVT Ty = getPointerTy(DAG.getDataLayout());
877 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
878
879 // This is not actually used, but is necessary for successfully matching the
880 // PseudoLA_*_LARGE nodes.
881 SDValue Tmp = DAG.getConstant(0, DL, Ty);
882
883 // Use a PC-relative addressing mode to access the dynamic GOT address.
884 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
885 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
886 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
887
888 // Prepare argument list to generate call.
890 ArgListEntry Entry;
891 Entry.Node = Load;
892 Entry.Ty = CallTy;
893 Args.push_back(Entry);
894
895 // Setup call to __tls_get_addr.
897 CLI.setDebugLoc(DL)
898 .setChain(DAG.getEntryNode())
899 .setLibCallee(CallingConv::C, CallTy,
900 DAG.getExternalSymbol("__tls_get_addr", Ty),
901 std::move(Args));
902
903 return LowerCallTo(CLI).first;
904}
905
906SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
907 SelectionDAG &DAG, unsigned Opc,
908 bool Large) const {
909 SDLoc DL(N);
910 EVT Ty = getPointerTy(DAG.getDataLayout());
911 const GlobalValue *GV = N->getGlobal();
912
913 // This is not actually used, but is necessary for successfully matching the
914 // PseudoLA_*_LARGE nodes.
915 SDValue Tmp = DAG.getConstant(0, DL, Ty);
916
917 // Use a PC-relative addressing mode to access the global dynamic GOT address.
918 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
919 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
920 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
921 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
922}
923
925LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
926 SelectionDAG &DAG) const {
929 report_fatal_error("In GHC calling convention TLS is not supported");
930
932 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
933
934 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
935 assert(N->getOffset() == 0 && "unexpected offset in global node");
936
937 bool IsDesc = DAG.getTarget().useTLSDESC();
938
939 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
941 // In this model, application code calls the dynamic linker function
942 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
943 // runtime.
944 if (!IsDesc)
945 return getDynamicTLSAddr(N, DAG,
946 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
947 : LoongArch::PseudoLA_TLS_GD,
948 Large);
949 break;
951 // Same as GeneralDynamic, except for assembly modifiers and relocation
952 // records.
953 if (!IsDesc)
954 return getDynamicTLSAddr(N, DAG,
955 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
956 : LoongArch::PseudoLA_TLS_LD,
957 Large);
958 break;
960 // This model uses the GOT to resolve TLS offsets.
961 return getStaticTLSAddr(N, DAG,
962 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
963 : LoongArch::PseudoLA_TLS_IE,
964 Large);
966 // This model is used when static linking as the TLS offsets are resolved
967 // during program linking.
968 //
969 // This node doesn't need an extra argument for the large code model.
970 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
971 }
972
973 return getTLSDescAddr(N, DAG,
974 Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
975 : LoongArch::PseudoLA_TLS_DESC_PC,
976 Large);
977}
978
979template <unsigned N>
980static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
981 SelectionDAG &DAG, bool IsSigned = false) {
982 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
983 // Check the ImmArg.
984 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
985 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
986 DAG.getContext()->emitError(Op->getOperationName(0) +
987 ": argument out of range.");
988 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
989 }
990 return SDValue();
991}
992
994LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
995 SelectionDAG &DAG) const {
996 SDLoc DL(Op);
997 switch (Op.getConstantOperandVal(0)) {
998 default:
999 return SDValue(); // Don't custom lower most intrinsics.
1000 case Intrinsic::thread_pointer: {
1001 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1002 return DAG.getRegister(LoongArch::R2, PtrVT);
1003 }
1004 case Intrinsic::loongarch_lsx_vpickve2gr_d:
1005 case Intrinsic::loongarch_lsx_vpickve2gr_du:
1006 case Intrinsic::loongarch_lsx_vreplvei_d:
1007 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1008 return checkIntrinsicImmArg<1>(Op, 2, DAG);
1009 case Intrinsic::loongarch_lsx_vreplvei_w:
1010 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1011 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1012 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1013 case Intrinsic::loongarch_lasx_xvpickve_d:
1014 case Intrinsic::loongarch_lasx_xvpickve_d_f:
1015 return checkIntrinsicImmArg<2>(Op, 2, DAG);
1016 case Intrinsic::loongarch_lasx_xvinsve0_d:
1017 return checkIntrinsicImmArg<2>(Op, 3, DAG);
1018 case Intrinsic::loongarch_lsx_vsat_b:
1019 case Intrinsic::loongarch_lsx_vsat_bu:
1020 case Intrinsic::loongarch_lsx_vrotri_b:
1021 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1022 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1023 case Intrinsic::loongarch_lsx_vsrlri_b:
1024 case Intrinsic::loongarch_lsx_vsrari_b:
1025 case Intrinsic::loongarch_lsx_vreplvei_h:
1026 case Intrinsic::loongarch_lasx_xvsat_b:
1027 case Intrinsic::loongarch_lasx_xvsat_bu:
1028 case Intrinsic::loongarch_lasx_xvrotri_b:
1029 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1030 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1031 case Intrinsic::loongarch_lasx_xvsrlri_b:
1032 case Intrinsic::loongarch_lasx_xvsrari_b:
1033 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1034 case Intrinsic::loongarch_lasx_xvpickve_w:
1035 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1036 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1037 case Intrinsic::loongarch_lasx_xvinsve0_w:
1038 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1039 case Intrinsic::loongarch_lsx_vsat_h:
1040 case Intrinsic::loongarch_lsx_vsat_hu:
1041 case Intrinsic::loongarch_lsx_vrotri_h:
1042 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1043 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1044 case Intrinsic::loongarch_lsx_vsrlri_h:
1045 case Intrinsic::loongarch_lsx_vsrari_h:
1046 case Intrinsic::loongarch_lsx_vreplvei_b:
1047 case Intrinsic::loongarch_lasx_xvsat_h:
1048 case Intrinsic::loongarch_lasx_xvsat_hu:
1049 case Intrinsic::loongarch_lasx_xvrotri_h:
1050 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1051 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1052 case Intrinsic::loongarch_lasx_xvsrlri_h:
1053 case Intrinsic::loongarch_lasx_xvsrari_h:
1054 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1055 return checkIntrinsicImmArg<4>(Op, 2, DAG);
1056 case Intrinsic::loongarch_lsx_vsrlni_b_h:
1057 case Intrinsic::loongarch_lsx_vsrani_b_h:
1058 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1059 case Intrinsic::loongarch_lsx_vsrarni_b_h:
1060 case Intrinsic::loongarch_lsx_vssrlni_b_h:
1061 case Intrinsic::loongarch_lsx_vssrani_b_h:
1062 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1063 case Intrinsic::loongarch_lsx_vssrani_bu_h:
1064 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1065 case Intrinsic::loongarch_lsx_vssrarni_b_h:
1066 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1067 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1068 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1069 case Intrinsic::loongarch_lasx_xvsrani_b_h:
1070 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1071 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1072 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1073 case Intrinsic::loongarch_lasx_xvssrani_b_h:
1074 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1075 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1076 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1077 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1078 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1079 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1080 return checkIntrinsicImmArg<4>(Op, 3, DAG);
1081 case Intrinsic::loongarch_lsx_vsat_w:
1082 case Intrinsic::loongarch_lsx_vsat_wu:
1083 case Intrinsic::loongarch_lsx_vrotri_w:
1084 case Intrinsic::loongarch_lsx_vsllwil_d_w:
1085 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1086 case Intrinsic::loongarch_lsx_vsrlri_w:
1087 case Intrinsic::loongarch_lsx_vsrari_w:
1088 case Intrinsic::loongarch_lsx_vslei_bu:
1089 case Intrinsic::loongarch_lsx_vslei_hu:
1090 case Intrinsic::loongarch_lsx_vslei_wu:
1091 case Intrinsic::loongarch_lsx_vslei_du:
1092 case Intrinsic::loongarch_lsx_vslti_bu:
1093 case Intrinsic::loongarch_lsx_vslti_hu:
1094 case Intrinsic::loongarch_lsx_vslti_wu:
1095 case Intrinsic::loongarch_lsx_vslti_du:
1096 case Intrinsic::loongarch_lsx_vbsll_v:
1097 case Intrinsic::loongarch_lsx_vbsrl_v:
1098 case Intrinsic::loongarch_lasx_xvsat_w:
1099 case Intrinsic::loongarch_lasx_xvsat_wu:
1100 case Intrinsic::loongarch_lasx_xvrotri_w:
1101 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1102 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1103 case Intrinsic::loongarch_lasx_xvsrlri_w:
1104 case Intrinsic::loongarch_lasx_xvsrari_w:
1105 case Intrinsic::loongarch_lasx_xvslei_bu:
1106 case Intrinsic::loongarch_lasx_xvslei_hu:
1107 case Intrinsic::loongarch_lasx_xvslei_wu:
1108 case Intrinsic::loongarch_lasx_xvslei_du:
1109 case Intrinsic::loongarch_lasx_xvslti_bu:
1110 case Intrinsic::loongarch_lasx_xvslti_hu:
1111 case Intrinsic::loongarch_lasx_xvslti_wu:
1112 case Intrinsic::loongarch_lasx_xvslti_du:
1113 case Intrinsic::loongarch_lasx_xvbsll_v:
1114 case Intrinsic::loongarch_lasx_xvbsrl_v:
1115 return checkIntrinsicImmArg<5>(Op, 2, DAG);
1116 case Intrinsic::loongarch_lsx_vseqi_b:
1117 case Intrinsic::loongarch_lsx_vseqi_h:
1118 case Intrinsic::loongarch_lsx_vseqi_w:
1119 case Intrinsic::loongarch_lsx_vseqi_d:
1120 case Intrinsic::loongarch_lsx_vslei_b:
1121 case Intrinsic::loongarch_lsx_vslei_h:
1122 case Intrinsic::loongarch_lsx_vslei_w:
1123 case Intrinsic::loongarch_lsx_vslei_d:
1124 case Intrinsic::loongarch_lsx_vslti_b:
1125 case Intrinsic::loongarch_lsx_vslti_h:
1126 case Intrinsic::loongarch_lsx_vslti_w:
1127 case Intrinsic::loongarch_lsx_vslti_d:
1128 case Intrinsic::loongarch_lasx_xvseqi_b:
1129 case Intrinsic::loongarch_lasx_xvseqi_h:
1130 case Intrinsic::loongarch_lasx_xvseqi_w:
1131 case Intrinsic::loongarch_lasx_xvseqi_d:
1132 case Intrinsic::loongarch_lasx_xvslei_b:
1133 case Intrinsic::loongarch_lasx_xvslei_h:
1134 case Intrinsic::loongarch_lasx_xvslei_w:
1135 case Intrinsic::loongarch_lasx_xvslei_d:
1136 case Intrinsic::loongarch_lasx_xvslti_b:
1137 case Intrinsic::loongarch_lasx_xvslti_h:
1138 case Intrinsic::loongarch_lasx_xvslti_w:
1139 case Intrinsic::loongarch_lasx_xvslti_d:
1140 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1141 case Intrinsic::loongarch_lsx_vsrlni_h_w:
1142 case Intrinsic::loongarch_lsx_vsrani_h_w:
1143 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1144 case Intrinsic::loongarch_lsx_vsrarni_h_w:
1145 case Intrinsic::loongarch_lsx_vssrlni_h_w:
1146 case Intrinsic::loongarch_lsx_vssrani_h_w:
1147 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1148 case Intrinsic::loongarch_lsx_vssrani_hu_w:
1149 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1150 case Intrinsic::loongarch_lsx_vssrarni_h_w:
1151 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1152 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1153 case Intrinsic::loongarch_lsx_vfrstpi_b:
1154 case Intrinsic::loongarch_lsx_vfrstpi_h:
1155 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1156 case Intrinsic::loongarch_lasx_xvsrani_h_w:
1157 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1158 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1159 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1160 case Intrinsic::loongarch_lasx_xvssrani_h_w:
1161 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1162 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1163 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1164 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1165 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1166 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1167 case Intrinsic::loongarch_lasx_xvfrstpi_b:
1168 case Intrinsic::loongarch_lasx_xvfrstpi_h:
1169 return checkIntrinsicImmArg<5>(Op, 3, DAG);
1170 case Intrinsic::loongarch_lsx_vsat_d:
1171 case Intrinsic::loongarch_lsx_vsat_du:
1172 case Intrinsic::loongarch_lsx_vrotri_d:
1173 case Intrinsic::loongarch_lsx_vsrlri_d:
1174 case Intrinsic::loongarch_lsx_vsrari_d:
1175 case Intrinsic::loongarch_lasx_xvsat_d:
1176 case Intrinsic::loongarch_lasx_xvsat_du:
1177 case Intrinsic::loongarch_lasx_xvrotri_d:
1178 case Intrinsic::loongarch_lasx_xvsrlri_d:
1179 case Intrinsic::loongarch_lasx_xvsrari_d:
1180 return checkIntrinsicImmArg<6>(Op, 2, DAG);
1181 case Intrinsic::loongarch_lsx_vsrlni_w_d:
1182 case Intrinsic::loongarch_lsx_vsrani_w_d:
1183 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1184 case Intrinsic::loongarch_lsx_vsrarni_w_d:
1185 case Intrinsic::loongarch_lsx_vssrlni_w_d:
1186 case Intrinsic::loongarch_lsx_vssrani_w_d:
1187 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1188 case Intrinsic::loongarch_lsx_vssrani_wu_d:
1189 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1190 case Intrinsic::loongarch_lsx_vssrarni_w_d:
1191 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1192 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1193 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1194 case Intrinsic::loongarch_lasx_xvsrani_w_d:
1195 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1196 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1197 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1198 case Intrinsic::loongarch_lasx_xvssrani_w_d:
1199 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1200 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1201 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1202 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1203 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1204 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1205 return checkIntrinsicImmArg<6>(Op, 3, DAG);
1206 case Intrinsic::loongarch_lsx_vsrlni_d_q:
1207 case Intrinsic::loongarch_lsx_vsrani_d_q:
1208 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1209 case Intrinsic::loongarch_lsx_vsrarni_d_q:
1210 case Intrinsic::loongarch_lsx_vssrlni_d_q:
1211 case Intrinsic::loongarch_lsx_vssrani_d_q:
1212 case Intrinsic::loongarch_lsx_vssrlni_du_q:
1213 case Intrinsic::loongarch_lsx_vssrani_du_q:
1214 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1215 case Intrinsic::loongarch_lsx_vssrarni_d_q:
1216 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1217 case Intrinsic::loongarch_lsx_vssrarni_du_q:
1218 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1219 case Intrinsic::loongarch_lasx_xvsrani_d_q:
1220 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1221 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1222 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1223 case Intrinsic::loongarch_lasx_xvssrani_d_q:
1224 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1225 case Intrinsic::loongarch_lasx_xvssrani_du_q:
1226 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1227 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1228 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1229 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1230 return checkIntrinsicImmArg<7>(Op, 3, DAG);
1231 case Intrinsic::loongarch_lsx_vnori_b:
1232 case Intrinsic::loongarch_lsx_vshuf4i_b:
1233 case Intrinsic::loongarch_lsx_vshuf4i_h:
1234 case Intrinsic::loongarch_lsx_vshuf4i_w:
1235 case Intrinsic::loongarch_lasx_xvnori_b:
1236 case Intrinsic::loongarch_lasx_xvshuf4i_b:
1237 case Intrinsic::loongarch_lasx_xvshuf4i_h:
1238 case Intrinsic::loongarch_lasx_xvshuf4i_w:
1239 case Intrinsic::loongarch_lasx_xvpermi_d:
1240 return checkIntrinsicImmArg<8>(Op, 2, DAG);
1241 case Intrinsic::loongarch_lsx_vshuf4i_d:
1242 case Intrinsic::loongarch_lsx_vpermi_w:
1243 case Intrinsic::loongarch_lsx_vbitseli_b:
1244 case Intrinsic::loongarch_lsx_vextrins_b:
1245 case Intrinsic::loongarch_lsx_vextrins_h:
1246 case Intrinsic::loongarch_lsx_vextrins_w:
1247 case Intrinsic::loongarch_lsx_vextrins_d:
1248 case Intrinsic::loongarch_lasx_xvshuf4i_d:
1249 case Intrinsic::loongarch_lasx_xvpermi_w:
1250 case Intrinsic::loongarch_lasx_xvpermi_q:
1251 case Intrinsic::loongarch_lasx_xvbitseli_b:
1252 case Intrinsic::loongarch_lasx_xvextrins_b:
1253 case Intrinsic::loongarch_lasx_xvextrins_h:
1254 case Intrinsic::loongarch_lasx_xvextrins_w:
1255 case Intrinsic::loongarch_lasx_xvextrins_d:
1256 return checkIntrinsicImmArg<8>(Op, 3, DAG);
1257 case Intrinsic::loongarch_lsx_vrepli_b:
1258 case Intrinsic::loongarch_lsx_vrepli_h:
1259 case Intrinsic::loongarch_lsx_vrepli_w:
1260 case Intrinsic::loongarch_lsx_vrepli_d:
1261 case Intrinsic::loongarch_lasx_xvrepli_b:
1262 case Intrinsic::loongarch_lasx_xvrepli_h:
1263 case Intrinsic::loongarch_lasx_xvrepli_w:
1264 case Intrinsic::loongarch_lasx_xvrepli_d:
1265 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1266 case Intrinsic::loongarch_lsx_vldi:
1267 case Intrinsic::loongarch_lasx_xvldi:
1268 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1269 }
1270}
1271
1272// Helper function that emits error message for intrinsics with chain and return
1273// merge values of a UNDEF and the chain.
1275 StringRef ErrorMsg,
1276 SelectionDAG &DAG) {
1277 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1278 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1279 SDLoc(Op));
1280}
1281
1282SDValue
1283LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1284 SelectionDAG &DAG) const {
1285 SDLoc DL(Op);
1286 MVT GRLenVT = Subtarget.getGRLenVT();
1287 EVT VT = Op.getValueType();
1288 SDValue Chain = Op.getOperand(0);
1289 const StringRef ErrorMsgOOR = "argument out of range";
1290 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1291 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1292
1293 switch (Op.getConstantOperandVal(1)) {
1294 default:
1295 return Op;
1296 case Intrinsic::loongarch_crc_w_b_w:
1297 case Intrinsic::loongarch_crc_w_h_w:
1298 case Intrinsic::loongarch_crc_w_w_w:
1299 case Intrinsic::loongarch_crc_w_d_w:
1300 case Intrinsic::loongarch_crcc_w_b_w:
1301 case Intrinsic::loongarch_crcc_w_h_w:
1302 case Intrinsic::loongarch_crcc_w_w_w:
1303 case Intrinsic::loongarch_crcc_w_d_w:
1304 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1305 case Intrinsic::loongarch_csrrd_w:
1306 case Intrinsic::loongarch_csrrd_d: {
1307 unsigned Imm = Op.getConstantOperandVal(2);
1308 return !isUInt<14>(Imm)
1309 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1310 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1311 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1312 }
1313 case Intrinsic::loongarch_csrwr_w:
1314 case Intrinsic::loongarch_csrwr_d: {
1315 unsigned Imm = Op.getConstantOperandVal(3);
1316 return !isUInt<14>(Imm)
1317 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1318 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1319 {Chain, Op.getOperand(2),
1320 DAG.getConstant(Imm, DL, GRLenVT)});
1321 }
1322 case Intrinsic::loongarch_csrxchg_w:
1323 case Intrinsic::loongarch_csrxchg_d: {
1324 unsigned Imm = Op.getConstantOperandVal(4);
1325 return !isUInt<14>(Imm)
1326 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1327 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1328 {Chain, Op.getOperand(2), Op.getOperand(3),
1329 DAG.getConstant(Imm, DL, GRLenVT)});
1330 }
1331 case Intrinsic::loongarch_iocsrrd_d: {
1332 return DAG.getNode(
1333 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1334 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1335 }
1336#define IOCSRRD_CASE(NAME, NODE) \
1337 case Intrinsic::loongarch_##NAME: { \
1338 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1339 {Chain, Op.getOperand(2)}); \
1340 }
1341 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1342 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1343 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1344#undef IOCSRRD_CASE
1345 case Intrinsic::loongarch_cpucfg: {
1346 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1347 {Chain, Op.getOperand(2)});
1348 }
1349 case Intrinsic::loongarch_lddir_d: {
1350 unsigned Imm = Op.getConstantOperandVal(3);
1351 return !isUInt<8>(Imm)
1352 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1353 : Op;
1354 }
1355 case Intrinsic::loongarch_movfcsr2gr: {
1356 if (!Subtarget.hasBasicF())
1357 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1358 unsigned Imm = Op.getConstantOperandVal(2);
1359 return !isUInt<2>(Imm)
1360 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1361 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1362 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1363 }
1364 case Intrinsic::loongarch_lsx_vld:
1365 case Intrinsic::loongarch_lsx_vldrepl_b:
1366 case Intrinsic::loongarch_lasx_xvld:
1367 case Intrinsic::loongarch_lasx_xvldrepl_b:
1368 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1369 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1370 : SDValue();
1371 case Intrinsic::loongarch_lsx_vldrepl_h:
1372 case Intrinsic::loongarch_lasx_xvldrepl_h:
1373 return !isShiftedInt<11, 1>(
1374 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1376 Op, "argument out of range or not a multiple of 2", DAG)
1377 : SDValue();
1378 case Intrinsic::loongarch_lsx_vldrepl_w:
1379 case Intrinsic::loongarch_lasx_xvldrepl_w:
1380 return !isShiftedInt<10, 2>(
1381 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1383 Op, "argument out of range or not a multiple of 4", DAG)
1384 : SDValue();
1385 case Intrinsic::loongarch_lsx_vldrepl_d:
1386 case Intrinsic::loongarch_lasx_xvldrepl_d:
1387 return !isShiftedInt<9, 3>(
1388 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1390 Op, "argument out of range or not a multiple of 8", DAG)
1391 : SDValue();
1392 }
1393}
1394
1395// Helper function that emits error message for intrinsics with void return
1396// value and return the chain.
1398 SelectionDAG &DAG) {
1399
1400 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1401 return Op.getOperand(0);
1402}
1403
1404SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1405 SelectionDAG &DAG) const {
1406 SDLoc DL(Op);
1407 MVT GRLenVT = Subtarget.getGRLenVT();
1408 SDValue Chain = Op.getOperand(0);
1409 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1410 SDValue Op2 = Op.getOperand(2);
1411 const StringRef ErrorMsgOOR = "argument out of range";
1412 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1413 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1414 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1415
1416 switch (IntrinsicEnum) {
1417 default:
1418 // TODO: Add more Intrinsics.
1419 return SDValue();
1420 case Intrinsic::loongarch_cacop_d:
1421 case Intrinsic::loongarch_cacop_w: {
1422 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1423 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1424 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1425 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1426 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1427 unsigned Imm1 = Op2->getAsZExtVal();
1428 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1429 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1430 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1431 return Op;
1432 }
1433 case Intrinsic::loongarch_dbar: {
1434 unsigned Imm = Op2->getAsZExtVal();
1435 return !isUInt<15>(Imm)
1436 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1437 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1438 DAG.getConstant(Imm, DL, GRLenVT));
1439 }
1440 case Intrinsic::loongarch_ibar: {
1441 unsigned Imm = Op2->getAsZExtVal();
1442 return !isUInt<15>(Imm)
1443 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1444 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1445 DAG.getConstant(Imm, DL, GRLenVT));
1446 }
1447 case Intrinsic::loongarch_break: {
1448 unsigned Imm = Op2->getAsZExtVal();
1449 return !isUInt<15>(Imm)
1450 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1451 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1452 DAG.getConstant(Imm, DL, GRLenVT));
1453 }
1454 case Intrinsic::loongarch_movgr2fcsr: {
1455 if (!Subtarget.hasBasicF())
1456 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1457 unsigned Imm = Op2->getAsZExtVal();
1458 return !isUInt<2>(Imm)
1459 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1460 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1461 DAG.getConstant(Imm, DL, GRLenVT),
1462 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1463 Op.getOperand(3)));
1464 }
1465 case Intrinsic::loongarch_syscall: {
1466 unsigned Imm = Op2->getAsZExtVal();
1467 return !isUInt<15>(Imm)
1468 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1469 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1470 DAG.getConstant(Imm, DL, GRLenVT));
1471 }
1472#define IOCSRWR_CASE(NAME, NODE) \
1473 case Intrinsic::loongarch_##NAME: { \
1474 SDValue Op3 = Op.getOperand(3); \
1475 return Subtarget.is64Bit() \
1476 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1477 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1478 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1479 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1480 Op3); \
1481 }
1482 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1483 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1484 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1485#undef IOCSRWR_CASE
1486 case Intrinsic::loongarch_iocsrwr_d: {
1487 return !Subtarget.is64Bit()
1488 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1489 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1490 Op2,
1491 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1492 Op.getOperand(3)));
1493 }
1494#define ASRT_LE_GT_CASE(NAME) \
1495 case Intrinsic::loongarch_##NAME: { \
1496 return !Subtarget.is64Bit() \
1497 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1498 : Op; \
1499 }
1500 ASRT_LE_GT_CASE(asrtle_d)
1501 ASRT_LE_GT_CASE(asrtgt_d)
1502#undef ASRT_LE_GT_CASE
1503 case Intrinsic::loongarch_ldpte_d: {
1504 unsigned Imm = Op.getConstantOperandVal(3);
1505 return !Subtarget.is64Bit()
1506 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1507 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1508 : Op;
1509 }
1510 case Intrinsic::loongarch_lsx_vst:
1511 case Intrinsic::loongarch_lasx_xvst:
1512 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1513 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1514 : SDValue();
1515 case Intrinsic::loongarch_lasx_xvstelm_b:
1516 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1517 !isUInt<5>(Op.getConstantOperandVal(5)))
1518 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1519 : SDValue();
1520 case Intrinsic::loongarch_lsx_vstelm_b:
1521 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1522 !isUInt<4>(Op.getConstantOperandVal(5)))
1523 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1524 : SDValue();
1525 case Intrinsic::loongarch_lasx_xvstelm_h:
1526 return (!isShiftedInt<8, 1>(
1527 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1528 !isUInt<4>(Op.getConstantOperandVal(5)))
1530 Op, "argument out of range or not a multiple of 2", DAG)
1531 : SDValue();
1532 case Intrinsic::loongarch_lsx_vstelm_h:
1533 return (!isShiftedInt<8, 1>(
1534 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1535 !isUInt<3>(Op.getConstantOperandVal(5)))
1537 Op, "argument out of range or not a multiple of 2", DAG)
1538 : SDValue();
1539 case Intrinsic::loongarch_lasx_xvstelm_w:
1540 return (!isShiftedInt<8, 2>(
1541 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1542 !isUInt<3>(Op.getConstantOperandVal(5)))
1544 Op, "argument out of range or not a multiple of 4", DAG)
1545 : SDValue();
1546 case Intrinsic::loongarch_lsx_vstelm_w:
1547 return (!isShiftedInt<8, 2>(
1548 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1549 !isUInt<2>(Op.getConstantOperandVal(5)))
1551 Op, "argument out of range or not a multiple of 4", DAG)
1552 : SDValue();
1553 case Intrinsic::loongarch_lasx_xvstelm_d:
1554 return (!isShiftedInt<8, 3>(
1555 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1556 !isUInt<2>(Op.getConstantOperandVal(5)))
1558 Op, "argument out of range or not a multiple of 8", DAG)
1559 : SDValue();
1560 case Intrinsic::loongarch_lsx_vstelm_d:
1561 return (!isShiftedInt<8, 3>(
1562 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1563 !isUInt<1>(Op.getConstantOperandVal(5)))
1565 Op, "argument out of range or not a multiple of 8", DAG)
1566 : SDValue();
1567 }
1568}
1569
1570SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1571 SelectionDAG &DAG) const {
1572 SDLoc DL(Op);
1573 SDValue Lo = Op.getOperand(0);
1574 SDValue Hi = Op.getOperand(1);
1575 SDValue Shamt = Op.getOperand(2);
1576 EVT VT = Lo.getValueType();
1577
1578 // if Shamt-GRLen < 0: // Shamt < GRLen
1579 // Lo = Lo << Shamt
1580 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1581 // else:
1582 // Lo = 0
1583 // Hi = Lo << (Shamt-GRLen)
1584
1585 SDValue Zero = DAG.getConstant(0, DL, VT);
1586 SDValue One = DAG.getConstant(1, DL, VT);
1587 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1588 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1589 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1590 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1591
1592 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1593 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1594 SDValue ShiftRightLo =
1595 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1596 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1597 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1598 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1599
1600 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1601
1602 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1603 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1604
1605 SDValue Parts[2] = {Lo, Hi};
1606 return DAG.getMergeValues(Parts, DL);
1607}
1608
1609SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1610 SelectionDAG &DAG,
1611 bool IsSRA) const {
1612 SDLoc DL(Op);
1613 SDValue Lo = Op.getOperand(0);
1614 SDValue Hi = Op.getOperand(1);
1615 SDValue Shamt = Op.getOperand(2);
1616 EVT VT = Lo.getValueType();
1617
1618 // SRA expansion:
1619 // if Shamt-GRLen < 0: // Shamt < GRLen
1620 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1621 // Hi = Hi >>s Shamt
1622 // else:
1623 // Lo = Hi >>s (Shamt-GRLen);
1624 // Hi = Hi >>s (GRLen-1)
1625 //
1626 // SRL expansion:
1627 // if Shamt-GRLen < 0: // Shamt < GRLen
1628 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1629 // Hi = Hi >>u Shamt
1630 // else:
1631 // Lo = Hi >>u (Shamt-GRLen);
1632 // Hi = 0;
1633
1634 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1635
1636 SDValue Zero = DAG.getConstant(0, DL, VT);
1637 SDValue One = DAG.getConstant(1, DL, VT);
1638 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1639 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1640 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1641 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1642
1643 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1644 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1645 SDValue ShiftLeftHi =
1646 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1647 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1648 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1649 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1650 SDValue HiFalse =
1651 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1652
1653 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1654
1655 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1656 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1657
1658 SDValue Parts[2] = {Lo, Hi};
1659 return DAG.getMergeValues(Parts, DL);
1660}
1661
1662// Returns the opcode of the target-specific SDNode that implements the 32-bit
1663// form of the given Opcode.
1665 switch (Opcode) {
1666 default:
1667 llvm_unreachable("Unexpected opcode");
1668 case ISD::SHL:
1669 return LoongArchISD::SLL_W;
1670 case ISD::SRA:
1671 return LoongArchISD::SRA_W;
1672 case ISD::SRL:
1673 return LoongArchISD::SRL_W;
1674 case ISD::ROTR:
1675 return LoongArchISD::ROTR_W;
1676 case ISD::ROTL:
1677 return LoongArchISD::ROTL_W;
1678 case ISD::CTTZ:
1679 return LoongArchISD::CTZ_W;
1680 case ISD::CTLZ:
1681 return LoongArchISD::CLZ_W;
1682 }
1683}
1684
1685// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1686// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1687// otherwise be promoted to i64, making it difficult to select the
1688// SLL_W/.../*W later one because the fact the operation was originally of
1689// type i8/i16/i32 is lost.
1691 unsigned ExtOpc = ISD::ANY_EXTEND) {
1692 SDLoc DL(N);
1693 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1694 SDValue NewOp0, NewRes;
1695
1696 switch (NumOp) {
1697 default:
1698 llvm_unreachable("Unexpected NumOp");
1699 case 1: {
1700 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1701 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1702 break;
1703 }
1704 case 2: {
1705 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1706 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1707 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1708 break;
1709 }
1710 // TODO:Handle more NumOp.
1711 }
1712
1713 // ReplaceNodeResults requires we maintain the same type for the return
1714 // value.
1715 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1716}
1717
1718// Helper function that emits error message for intrinsics with/without chain
1719// and return a UNDEF or and the chain as the results.
1722 StringRef ErrorMsg, bool WithChain = true) {
1723 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1724 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1725 if (!WithChain)
1726 return;
1727 Results.push_back(N->getOperand(0));
1728}
1729
1730template <unsigned N>
1731static void
1733 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1734 unsigned ResOp) {
1735 const StringRef ErrorMsgOOR = "argument out of range";
1736 unsigned Imm = Node->getConstantOperandVal(2);
1737 if (!isUInt<N>(Imm)) {
1739 /*WithChain=*/false);
1740 return;
1741 }
1742 SDLoc DL(Node);
1743 SDValue Vec = Node->getOperand(1);
1744
1745 SDValue PickElt =
1746 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1747 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1749 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1750 PickElt.getValue(0)));
1751}
1752
1755 SelectionDAG &DAG,
1756 const LoongArchSubtarget &Subtarget,
1757 unsigned ResOp) {
1758 SDLoc DL(N);
1759 SDValue Vec = N->getOperand(1);
1760
1761 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1762 Results.push_back(
1763 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1764}
1765
1766static void
1768 SelectionDAG &DAG,
1769 const LoongArchSubtarget &Subtarget) {
1770 switch (N->getConstantOperandVal(0)) {
1771 default:
1772 llvm_unreachable("Unexpected Intrinsic.");
1773 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1774 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1776 break;
1777 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1778 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1779 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1781 break;
1782 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1783 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1785 break;
1786 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1787 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1789 break;
1790 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1791 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1792 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1794 break;
1795 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1796 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1798 break;
1799 case Intrinsic::loongarch_lsx_bz_b:
1800 case Intrinsic::loongarch_lsx_bz_h:
1801 case Intrinsic::loongarch_lsx_bz_w:
1802 case Intrinsic::loongarch_lsx_bz_d:
1803 case Intrinsic::loongarch_lasx_xbz_b:
1804 case Intrinsic::loongarch_lasx_xbz_h:
1805 case Intrinsic::loongarch_lasx_xbz_w:
1806 case Intrinsic::loongarch_lasx_xbz_d:
1807 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1809 break;
1810 case Intrinsic::loongarch_lsx_bz_v:
1811 case Intrinsic::loongarch_lasx_xbz_v:
1812 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1814 break;
1815 case Intrinsic::loongarch_lsx_bnz_b:
1816 case Intrinsic::loongarch_lsx_bnz_h:
1817 case Intrinsic::loongarch_lsx_bnz_w:
1818 case Intrinsic::loongarch_lsx_bnz_d:
1819 case Intrinsic::loongarch_lasx_xbnz_b:
1820 case Intrinsic::loongarch_lasx_xbnz_h:
1821 case Intrinsic::loongarch_lasx_xbnz_w:
1822 case Intrinsic::loongarch_lasx_xbnz_d:
1823 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1825 break;
1826 case Intrinsic::loongarch_lsx_bnz_v:
1827 case Intrinsic::loongarch_lasx_xbnz_v:
1828 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1830 break;
1831 }
1832}
1833
1836 SDLoc DL(N);
1837 EVT VT = N->getValueType(0);
1838 switch (N->getOpcode()) {
1839 default:
1840 llvm_unreachable("Don't know how to legalize this operation");
1841 case ISD::SHL:
1842 case ISD::SRA:
1843 case ISD::SRL:
1844 case ISD::ROTR:
1845 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1846 "Unexpected custom legalisation");
1847 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1848 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1849 break;
1850 }
1851 break;
1852 case ISD::ROTL:
1853 ConstantSDNode *CN;
1854 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1855 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1856 break;
1857 }
1858 break;
1859 case ISD::FP_TO_SINT: {
1860 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1861 "Unexpected custom legalisation");
1862 SDValue Src = N->getOperand(0);
1863 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1864 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1866 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1867 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1868 return;
1869 }
1870 // If the FP type needs to be softened, emit a library call using the 'si'
1871 // version. If we left it to default legalization we'd end up with 'di'.
1872 RTLIB::Libcall LC;
1873 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1874 MakeLibCallOptions CallOptions;
1875 EVT OpVT = Src.getValueType();
1876 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1877 SDValue Chain = SDValue();
1878 SDValue Result;
1879 std::tie(Result, Chain) =
1880 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1881 Results.push_back(Result);
1882 break;
1883 }
1884 case ISD::BITCAST: {
1885 SDValue Src = N->getOperand(0);
1886 EVT SrcVT = Src.getValueType();
1887 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1888 Subtarget.hasBasicF()) {
1889 SDValue Dst =
1890 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1891 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1892 }
1893 break;
1894 }
1895 case ISD::FP_TO_UINT: {
1896 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1897 "Unexpected custom legalisation");
1898 auto &TLI = DAG.getTargetLoweringInfo();
1899 SDValue Tmp1, Tmp2;
1900 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1901 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1902 break;
1903 }
1904 case ISD::BSWAP: {
1905 SDValue Src = N->getOperand(0);
1906 assert((VT == MVT::i16 || VT == MVT::i32) &&
1907 "Unexpected custom legalization");
1908 MVT GRLenVT = Subtarget.getGRLenVT();
1909 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1910 SDValue Tmp;
1911 switch (VT.getSizeInBits()) {
1912 default:
1913 llvm_unreachable("Unexpected operand width");
1914 case 16:
1915 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1916 break;
1917 case 32:
1918 // Only LA64 will get to here due to the size mismatch between VT and
1919 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1920 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1921 break;
1922 }
1923 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1924 break;
1925 }
1926 case ISD::BITREVERSE: {
1927 SDValue Src = N->getOperand(0);
1928 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1929 "Unexpected custom legalization");
1930 MVT GRLenVT = Subtarget.getGRLenVT();
1931 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1932 SDValue Tmp;
1933 switch (VT.getSizeInBits()) {
1934 default:
1935 llvm_unreachable("Unexpected operand width");
1936 case 8:
1937 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1938 break;
1939 case 32:
1940 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1941 break;
1942 }
1943 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1944 break;
1945 }
1946 case ISD::CTLZ:
1947 case ISD::CTTZ: {
1948 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1949 "Unexpected custom legalisation");
1950 Results.push_back(customLegalizeToWOp(N, DAG, 1));
1951 break;
1952 }
1954 SDValue Chain = N->getOperand(0);
1955 SDValue Op2 = N->getOperand(2);
1956 MVT GRLenVT = Subtarget.getGRLenVT();
1957 const StringRef ErrorMsgOOR = "argument out of range";
1958 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1959 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1960
1961 switch (N->getConstantOperandVal(1)) {
1962 default:
1963 llvm_unreachable("Unexpected Intrinsic.");
1964 case Intrinsic::loongarch_movfcsr2gr: {
1965 if (!Subtarget.hasBasicF()) {
1966 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1967 return;
1968 }
1969 unsigned Imm = Op2->getAsZExtVal();
1970 if (!isUInt<2>(Imm)) {
1971 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1972 return;
1973 }
1974 SDValue MOVFCSR2GRResults = DAG.getNode(
1975 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1976 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1977 Results.push_back(
1978 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1979 Results.push_back(MOVFCSR2GRResults.getValue(1));
1980 break;
1981 }
1982#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1983 case Intrinsic::loongarch_##NAME: { \
1984 SDValue NODE = DAG.getNode( \
1985 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1986 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1987 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1988 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1989 Results.push_back(NODE.getValue(1)); \
1990 break; \
1991 }
1992 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1993 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1994 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1995 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1996 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1997 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1998#undef CRC_CASE_EXT_BINARYOP
1999
2000#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2001 case Intrinsic::loongarch_##NAME: { \
2002 SDValue NODE = DAG.getNode( \
2003 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2004 {Chain, Op2, \
2005 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2006 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2007 Results.push_back(NODE.getValue(1)); \
2008 break; \
2009 }
2010 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2011 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2012#undef CRC_CASE_EXT_UNARYOP
2013#define CSR_CASE(ID) \
2014 case Intrinsic::loongarch_##ID: { \
2015 if (!Subtarget.is64Bit()) \
2016 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
2017 break; \
2018 }
2019 CSR_CASE(csrrd_d);
2020 CSR_CASE(csrwr_d);
2021 CSR_CASE(csrxchg_d);
2022 CSR_CASE(iocsrrd_d);
2023#undef CSR_CASE
2024 case Intrinsic::loongarch_csrrd_w: {
2025 unsigned Imm = Op2->getAsZExtVal();
2026 if (!isUInt<14>(Imm)) {
2027 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2028 return;
2029 }
2030 SDValue CSRRDResults =
2031 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2032 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2033 Results.push_back(
2034 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2035 Results.push_back(CSRRDResults.getValue(1));
2036 break;
2037 }
2038 case Intrinsic::loongarch_csrwr_w: {
2039 unsigned Imm = N->getConstantOperandVal(3);
2040 if (!isUInt<14>(Imm)) {
2041 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2042 return;
2043 }
2044 SDValue CSRWRResults =
2045 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2046 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2047 DAG.getConstant(Imm, DL, GRLenVT)});
2048 Results.push_back(
2049 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2050 Results.push_back(CSRWRResults.getValue(1));
2051 break;
2052 }
2053 case Intrinsic::loongarch_csrxchg_w: {
2054 unsigned Imm = N->getConstantOperandVal(4);
2055 if (!isUInt<14>(Imm)) {
2056 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2057 return;
2058 }
2059 SDValue CSRXCHGResults = DAG.getNode(
2060 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2061 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2062 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2063 DAG.getConstant(Imm, DL, GRLenVT)});
2064 Results.push_back(
2065 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2066 Results.push_back(CSRXCHGResults.getValue(1));
2067 break;
2068 }
2069#define IOCSRRD_CASE(NAME, NODE) \
2070 case Intrinsic::loongarch_##NAME: { \
2071 SDValue IOCSRRDResults = \
2072 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2073 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2074 Results.push_back( \
2075 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
2076 Results.push_back(IOCSRRDResults.getValue(1)); \
2077 break; \
2078 }
2079 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2080 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2081 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2082#undef IOCSRRD_CASE
2083 case Intrinsic::loongarch_cpucfg: {
2084 SDValue CPUCFGResults =
2085 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2086 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2087 Results.push_back(
2088 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2089 Results.push_back(CPUCFGResults.getValue(1));
2090 break;
2091 }
2092 case Intrinsic::loongarch_lddir_d: {
2093 if (!Subtarget.is64Bit()) {
2094 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2095 return;
2096 }
2097 break;
2098 }
2099 }
2100 break;
2101 }
2102 case ISD::READ_REGISTER: {
2103 if (Subtarget.is64Bit())
2104 DAG.getContext()->emitError(
2105 "On LA64, only 64-bit registers can be read.");
2106 else
2107 DAG.getContext()->emitError(
2108 "On LA32, only 32-bit registers can be read.");
2109 Results.push_back(DAG.getUNDEF(VT));
2110 Results.push_back(N->getOperand(0));
2111 break;
2112 }
2114 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2115 break;
2116 }
2117 }
2118}
2119
2122 const LoongArchSubtarget &Subtarget) {
2123 if (DCI.isBeforeLegalizeOps())
2124 return SDValue();
2125
2126 SDValue FirstOperand = N->getOperand(0);
2127 SDValue SecondOperand = N->getOperand(1);
2128 unsigned FirstOperandOpc = FirstOperand.getOpcode();
2129 EVT ValTy = N->getValueType(0);
2130 SDLoc DL(N);
2131 uint64_t lsb, msb;
2132 unsigned SMIdx, SMLen;
2133 ConstantSDNode *CN;
2134 SDValue NewOperand;
2135 MVT GRLenVT = Subtarget.getGRLenVT();
2136
2137 // Op's second operand must be a shifted mask.
2138 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2139 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2140 return SDValue();
2141
2142 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2143 // Pattern match BSTRPICK.
2144 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2145 // => BSTRPICK $dst, $src, msb, lsb
2146 // where msb = lsb + len - 1
2147
2148 // The second operand of the shift must be an immediate.
2149 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2150 return SDValue();
2151
2152 lsb = CN->getZExtValue();
2153
2154 // Return if the shifted mask does not start at bit 0 or the sum of its
2155 // length and lsb exceeds the word's size.
2156 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2157 return SDValue();
2158
2159 NewOperand = FirstOperand.getOperand(0);
2160 } else {
2161 // Pattern match BSTRPICK.
2162 // $dst = and $src, (2**len- 1) , if len > 12
2163 // => BSTRPICK $dst, $src, msb, lsb
2164 // where lsb = 0 and msb = len - 1
2165
2166 // If the mask is <= 0xfff, andi can be used instead.
2167 if (CN->getZExtValue() <= 0xfff)
2168 return SDValue();
2169
2170 // Return if the MSB exceeds.
2171 if (SMIdx + SMLen > ValTy.getSizeInBits())
2172 return SDValue();
2173
2174 if (SMIdx > 0) {
2175 // Omit if the constant has more than 2 uses. This a conservative
2176 // decision. Whether it is a win depends on the HW microarchitecture.
2177 // However it should always be better for 1 and 2 uses.
2178 if (CN->use_size() > 2)
2179 return SDValue();
2180 // Return if the constant can be composed by a single LU12I.W.
2181 if ((CN->getZExtValue() & 0xfff) == 0)
2182 return SDValue();
2183 // Return if the constand can be composed by a single ADDI with
2184 // the zero register.
2185 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2186 return SDValue();
2187 }
2188
2189 lsb = SMIdx;
2190 NewOperand = FirstOperand;
2191 }
2192
2193 msb = lsb + SMLen - 1;
2194 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2195 DAG.getConstant(msb, DL, GRLenVT),
2196 DAG.getConstant(lsb, DL, GRLenVT));
2197 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2198 return NR0;
2199 // Try to optimize to
2200 // bstrpick $Rd, $Rs, msb, lsb
2201 // slli $Rd, $Rd, lsb
2202 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2203 DAG.getConstant(lsb, DL, GRLenVT));
2204}
2205
2208 const LoongArchSubtarget &Subtarget) {
2209 if (DCI.isBeforeLegalizeOps())
2210 return SDValue();
2211
2212 // $dst = srl (and $src, Mask), Shamt
2213 // =>
2214 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2215 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2216 //
2217
2218 SDValue FirstOperand = N->getOperand(0);
2219 ConstantSDNode *CN;
2220 EVT ValTy = N->getValueType(0);
2221 SDLoc DL(N);
2222 MVT GRLenVT = Subtarget.getGRLenVT();
2223 unsigned MaskIdx, MaskLen;
2224 uint64_t Shamt;
2225
2226 // The first operand must be an AND and the second operand of the AND must be
2227 // a shifted mask.
2228 if (FirstOperand.getOpcode() != ISD::AND ||
2229 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2230 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2231 return SDValue();
2232
2233 // The second operand (shift amount) must be an immediate.
2234 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2235 return SDValue();
2236
2237 Shamt = CN->getZExtValue();
2238 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2239 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2240 FirstOperand->getOperand(0),
2241 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2242 DAG.getConstant(Shamt, DL, GRLenVT));
2243
2244 return SDValue();
2245}
2246
2249 const LoongArchSubtarget &Subtarget) {
2250 MVT GRLenVT = Subtarget.getGRLenVT();
2251 EVT ValTy = N->getValueType(0);
2252 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2253 ConstantSDNode *CN0, *CN1;
2254 SDLoc DL(N);
2255 unsigned ValBits = ValTy.getSizeInBits();
2256 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2257 unsigned Shamt;
2258 bool SwapAndRetried = false;
2259
2260 if (DCI.isBeforeLegalizeOps())
2261 return SDValue();
2262
2263 if (ValBits != 32 && ValBits != 64)
2264 return SDValue();
2265
2266Retry:
2267 // 1st pattern to match BSTRINS:
2268 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
2269 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2270 // =>
2271 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2272 if (N0.getOpcode() == ISD::AND &&
2273 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2274 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2275 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2276 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2277 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2278 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2279 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2280 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2281 (MaskIdx0 + MaskLen0 <= ValBits)) {
2282 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2283 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2284 N1.getOperand(0).getOperand(0),
2285 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2286 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2287 }
2288
2289 // 2nd pattern to match BSTRINS:
2290 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2291 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2292 // =>
2293 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2294 if (N0.getOpcode() == ISD::AND &&
2295 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2296 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2297 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2298 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2299 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2300 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2301 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2302 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2303 (MaskIdx0 + MaskLen0 <= ValBits)) {
2304 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2305 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2306 N1.getOperand(0).getOperand(0),
2307 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2308 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2309 }
2310
2311 // 3rd pattern to match BSTRINS:
2312 // R = or (and X, mask0), (and Y, mask1)
2313 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2314 // =>
2315 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2316 // where msb = lsb + size - 1
2317 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2318 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2319 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2320 (MaskIdx0 + MaskLen0 <= 64) &&
2321 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2322 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2323 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2324 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2325 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2326 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2327 DAG.getConstant(ValBits == 32
2328 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2329 : (MaskIdx0 + MaskLen0 - 1),
2330 DL, GRLenVT),
2331 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2332 }
2333
2334 // 4th pattern to match BSTRINS:
2335 // R = or (and X, mask), (shl Y, shamt)
2336 // where mask = (2**shamt - 1)
2337 // =>
2338 // R = BSTRINS X, Y, ValBits - 1, shamt
2339 // where ValBits = 32 or 64
2340 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2341 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2342 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2343 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2344 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2345 (MaskIdx0 + MaskLen0 <= ValBits)) {
2346 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2347 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2348 N1.getOperand(0),
2349 DAG.getConstant((ValBits - 1), DL, GRLenVT),
2350 DAG.getConstant(Shamt, DL, GRLenVT));
2351 }
2352
2353 // 5th pattern to match BSTRINS:
2354 // R = or (and X, mask), const
2355 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2356 // =>
2357 // R = BSTRINS X, (const >> lsb), msb, lsb
2358 // where msb = lsb + size - 1
2359 if (N0.getOpcode() == ISD::AND &&
2360 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2361 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2362 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2363 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2364 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2365 return DAG.getNode(
2366 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2367 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2368 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2369 : (MaskIdx0 + MaskLen0 - 1),
2370 DL, GRLenVT),
2371 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2372 }
2373
2374 // 6th pattern.
2375 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2376 // by the incoming bits are known to be zero.
2377 // =>
2378 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2379 //
2380 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2381 // pattern is more common than the 1st. So we put the 1st before the 6th in
2382 // order to match as many nodes as possible.
2383 ConstantSDNode *CNMask, *CNShamt;
2384 unsigned MaskIdx, MaskLen;
2385 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2386 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2387 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2388 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2389 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2390 Shamt = CNShamt->getZExtValue();
2391 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2392 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2393 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2394 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2395 N1.getOperand(0).getOperand(0),
2396 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2397 DAG.getConstant(Shamt, DL, GRLenVT));
2398 }
2399 }
2400
2401 // 7th pattern.
2402 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2403 // overwritten by the incoming bits are known to be zero.
2404 // =>
2405 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2406 //
2407 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2408 // before the 7th in order to match as many nodes as possible.
2409 if (N1.getOpcode() == ISD::AND &&
2410 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2411 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2412 N1.getOperand(0).getOpcode() == ISD::SHL &&
2413 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2414 CNShamt->getZExtValue() == MaskIdx) {
2415 APInt ShMask(ValBits, CNMask->getZExtValue());
2416 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2417 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2418 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2419 N1.getOperand(0).getOperand(0),
2420 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2421 DAG.getConstant(MaskIdx, DL, GRLenVT));
2422 }
2423 }
2424
2425 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2426 if (!SwapAndRetried) {
2427 std::swap(N0, N1);
2428 SwapAndRetried = true;
2429 goto Retry;
2430 }
2431
2432 SwapAndRetried = false;
2433Retry2:
2434 // 8th pattern.
2435 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2436 // the incoming bits are known to be zero.
2437 // =>
2438 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2439 //
2440 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2441 // we put it here in order to match as many nodes as possible or generate less
2442 // instructions.
2443 if (N1.getOpcode() == ISD::AND &&
2444 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2445 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2446 APInt ShMask(ValBits, CNMask->getZExtValue());
2447 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2448 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2449 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2450 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2451 N1->getOperand(0),
2452 DAG.getConstant(MaskIdx, DL, GRLenVT)),
2453 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2454 DAG.getConstant(MaskIdx, DL, GRLenVT));
2455 }
2456 }
2457 // Swap N0/N1 and retry.
2458 if (!SwapAndRetried) {
2459 std::swap(N0, N1);
2460 SwapAndRetried = true;
2461 goto Retry2;
2462 }
2463
2464 return SDValue();
2465}
2466
2467// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2470 const LoongArchSubtarget &Subtarget) {
2471 if (DCI.isBeforeLegalizeOps())
2472 return SDValue();
2473
2474 SDValue Src = N->getOperand(0);
2475 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2476 return SDValue();
2477
2478 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2479 Src.getOperand(0));
2480}
2481
2482template <unsigned N>
2484 SelectionDAG &DAG,
2485 const LoongArchSubtarget &Subtarget,
2486 bool IsSigned = false) {
2487 SDLoc DL(Node);
2488 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2489 // Check the ImmArg.
2490 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2491 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2492 DAG.getContext()->emitError(Node->getOperationName(0) +
2493 ": argument out of range.");
2494 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2495 }
2496 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2497}
2498
2499template <unsigned N>
2500static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2501 SelectionDAG &DAG, bool IsSigned = false) {
2502 SDLoc DL(Node);
2503 EVT ResTy = Node->getValueType(0);
2504 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2505
2506 // Check the ImmArg.
2507 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2508 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2509 DAG.getContext()->emitError(Node->getOperationName(0) +
2510 ": argument out of range.");
2511 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2512 }
2513 return DAG.getConstant(
2515 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2516 DL, ResTy);
2517}
2518
2520 SDLoc DL(Node);
2521 EVT ResTy = Node->getValueType(0);
2522 SDValue Vec = Node->getOperand(2);
2523 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2524 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2525}
2526
2528 SDLoc DL(Node);
2529 EVT ResTy = Node->getValueType(0);
2530 SDValue One = DAG.getConstant(1, DL, ResTy);
2531 SDValue Bit =
2532 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2533
2534 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2535 DAG.getNOT(DL, Bit, ResTy));
2536}
2537
2538template <unsigned N>
2540 SDLoc DL(Node);
2541 EVT ResTy = Node->getValueType(0);
2542 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2543 // Check the unsigned ImmArg.
2544 if (!isUInt<N>(CImm->getZExtValue())) {
2545 DAG.getContext()->emitError(Node->getOperationName(0) +
2546 ": argument out of range.");
2547 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2548 }
2549
2550 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2551 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2552
2553 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2554}
2555
2556template <unsigned N>
2558 SDLoc DL(Node);
2559 EVT ResTy = Node->getValueType(0);
2560 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2561 // Check the unsigned ImmArg.
2562 if (!isUInt<N>(CImm->getZExtValue())) {
2563 DAG.getContext()->emitError(Node->getOperationName(0) +
2564 ": argument out of range.");
2565 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2566 }
2567
2568 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2569 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2570 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2571}
2572
2573template <unsigned N>
2575 SDLoc DL(Node);
2576 EVT ResTy = Node->getValueType(0);
2577 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2578 // Check the unsigned ImmArg.
2579 if (!isUInt<N>(CImm->getZExtValue())) {
2580 DAG.getContext()->emitError(Node->getOperationName(0) +
2581 ": argument out of range.");
2582 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2583 }
2584
2585 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2586 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2587 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2588}
2589
2590static SDValue
2593 const LoongArchSubtarget &Subtarget) {
2594 SDLoc DL(N);
2595 switch (N->getConstantOperandVal(0)) {
2596 default:
2597 break;
2598 case Intrinsic::loongarch_lsx_vadd_b:
2599 case Intrinsic::loongarch_lsx_vadd_h:
2600 case Intrinsic::loongarch_lsx_vadd_w:
2601 case Intrinsic::loongarch_lsx_vadd_d:
2602 case Intrinsic::loongarch_lasx_xvadd_b:
2603 case Intrinsic::loongarch_lasx_xvadd_h:
2604 case Intrinsic::loongarch_lasx_xvadd_w:
2605 case Intrinsic::loongarch_lasx_xvadd_d:
2606 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2607 N->getOperand(2));
2608 case Intrinsic::loongarch_lsx_vaddi_bu:
2609 case Intrinsic::loongarch_lsx_vaddi_hu:
2610 case Intrinsic::loongarch_lsx_vaddi_wu:
2611 case Intrinsic::loongarch_lsx_vaddi_du:
2612 case Intrinsic::loongarch_lasx_xvaddi_bu:
2613 case Intrinsic::loongarch_lasx_xvaddi_hu:
2614 case Intrinsic::loongarch_lasx_xvaddi_wu:
2615 case Intrinsic::loongarch_lasx_xvaddi_du:
2616 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2617 lowerVectorSplatImm<5>(N, 2, DAG));
2618 case Intrinsic::loongarch_lsx_vsub_b:
2619 case Intrinsic::loongarch_lsx_vsub_h:
2620 case Intrinsic::loongarch_lsx_vsub_w:
2621 case Intrinsic::loongarch_lsx_vsub_d:
2622 case Intrinsic::loongarch_lasx_xvsub_b:
2623 case Intrinsic::loongarch_lasx_xvsub_h:
2624 case Intrinsic::loongarch_lasx_xvsub_w:
2625 case Intrinsic::loongarch_lasx_xvsub_d:
2626 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2627 N->getOperand(2));
2628 case Intrinsic::loongarch_lsx_vsubi_bu:
2629 case Intrinsic::loongarch_lsx_vsubi_hu:
2630 case Intrinsic::loongarch_lsx_vsubi_wu:
2631 case Intrinsic::loongarch_lsx_vsubi_du:
2632 case Intrinsic::loongarch_lasx_xvsubi_bu:
2633 case Intrinsic::loongarch_lasx_xvsubi_hu:
2634 case Intrinsic::loongarch_lasx_xvsubi_wu:
2635 case Intrinsic::loongarch_lasx_xvsubi_du:
2636 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2637 lowerVectorSplatImm<5>(N, 2, DAG));
2638 case Intrinsic::loongarch_lsx_vneg_b:
2639 case Intrinsic::loongarch_lsx_vneg_h:
2640 case Intrinsic::loongarch_lsx_vneg_w:
2641 case Intrinsic::loongarch_lsx_vneg_d:
2642 case Intrinsic::loongarch_lasx_xvneg_b:
2643 case Intrinsic::loongarch_lasx_xvneg_h:
2644 case Intrinsic::loongarch_lasx_xvneg_w:
2645 case Intrinsic::loongarch_lasx_xvneg_d:
2646 return DAG.getNode(
2647 ISD::SUB, DL, N->getValueType(0),
2648 DAG.getConstant(
2649 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2650 /*isSigned=*/true),
2651 SDLoc(N), N->getValueType(0)),
2652 N->getOperand(1));
2653 case Intrinsic::loongarch_lsx_vmax_b:
2654 case Intrinsic::loongarch_lsx_vmax_h:
2655 case Intrinsic::loongarch_lsx_vmax_w:
2656 case Intrinsic::loongarch_lsx_vmax_d:
2657 case Intrinsic::loongarch_lasx_xvmax_b:
2658 case Intrinsic::loongarch_lasx_xvmax_h:
2659 case Intrinsic::loongarch_lasx_xvmax_w:
2660 case Intrinsic::loongarch_lasx_xvmax_d:
2661 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2662 N->getOperand(2));
2663 case Intrinsic::loongarch_lsx_vmax_bu:
2664 case Intrinsic::loongarch_lsx_vmax_hu:
2665 case Intrinsic::loongarch_lsx_vmax_wu:
2666 case Intrinsic::loongarch_lsx_vmax_du:
2667 case Intrinsic::loongarch_lasx_xvmax_bu:
2668 case Intrinsic::loongarch_lasx_xvmax_hu:
2669 case Intrinsic::loongarch_lasx_xvmax_wu:
2670 case Intrinsic::loongarch_lasx_xvmax_du:
2671 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2672 N->getOperand(2));
2673 case Intrinsic::loongarch_lsx_vmaxi_b:
2674 case Intrinsic::loongarch_lsx_vmaxi_h:
2675 case Intrinsic::loongarch_lsx_vmaxi_w:
2676 case Intrinsic::loongarch_lsx_vmaxi_d:
2677 case Intrinsic::loongarch_lasx_xvmaxi_b:
2678 case Intrinsic::loongarch_lasx_xvmaxi_h:
2679 case Intrinsic::loongarch_lasx_xvmaxi_w:
2680 case Intrinsic::loongarch_lasx_xvmaxi_d:
2681 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2682 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2683 case Intrinsic::loongarch_lsx_vmaxi_bu:
2684 case Intrinsic::loongarch_lsx_vmaxi_hu:
2685 case Intrinsic::loongarch_lsx_vmaxi_wu:
2686 case Intrinsic::loongarch_lsx_vmaxi_du:
2687 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2688 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2689 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2690 case Intrinsic::loongarch_lasx_xvmaxi_du:
2691 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2692 lowerVectorSplatImm<5>(N, 2, DAG));
2693 case Intrinsic::loongarch_lsx_vmin_b:
2694 case Intrinsic::loongarch_lsx_vmin_h:
2695 case Intrinsic::loongarch_lsx_vmin_w:
2696 case Intrinsic::loongarch_lsx_vmin_d:
2697 case Intrinsic::loongarch_lasx_xvmin_b:
2698 case Intrinsic::loongarch_lasx_xvmin_h:
2699 case Intrinsic::loongarch_lasx_xvmin_w:
2700 case Intrinsic::loongarch_lasx_xvmin_d:
2701 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2702 N->getOperand(2));
2703 case Intrinsic::loongarch_lsx_vmin_bu:
2704 case Intrinsic::loongarch_lsx_vmin_hu:
2705 case Intrinsic::loongarch_lsx_vmin_wu:
2706 case Intrinsic::loongarch_lsx_vmin_du:
2707 case Intrinsic::loongarch_lasx_xvmin_bu:
2708 case Intrinsic::loongarch_lasx_xvmin_hu:
2709 case Intrinsic::loongarch_lasx_xvmin_wu:
2710 case Intrinsic::loongarch_lasx_xvmin_du:
2711 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2712 N->getOperand(2));
2713 case Intrinsic::loongarch_lsx_vmini_b:
2714 case Intrinsic::loongarch_lsx_vmini_h:
2715 case Intrinsic::loongarch_lsx_vmini_w:
2716 case Intrinsic::loongarch_lsx_vmini_d:
2717 case Intrinsic::loongarch_lasx_xvmini_b:
2718 case Intrinsic::loongarch_lasx_xvmini_h:
2719 case Intrinsic::loongarch_lasx_xvmini_w:
2720 case Intrinsic::loongarch_lasx_xvmini_d:
2721 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2722 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2723 case Intrinsic::loongarch_lsx_vmini_bu:
2724 case Intrinsic::loongarch_lsx_vmini_hu:
2725 case Intrinsic::loongarch_lsx_vmini_wu:
2726 case Intrinsic::loongarch_lsx_vmini_du:
2727 case Intrinsic::loongarch_lasx_xvmini_bu:
2728 case Intrinsic::loongarch_lasx_xvmini_hu:
2729 case Intrinsic::loongarch_lasx_xvmini_wu:
2730 case Intrinsic::loongarch_lasx_xvmini_du:
2731 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2732 lowerVectorSplatImm<5>(N, 2, DAG));
2733 case Intrinsic::loongarch_lsx_vmul_b:
2734 case Intrinsic::loongarch_lsx_vmul_h:
2735 case Intrinsic::loongarch_lsx_vmul_w:
2736 case Intrinsic::loongarch_lsx_vmul_d:
2737 case Intrinsic::loongarch_lasx_xvmul_b:
2738 case Intrinsic::loongarch_lasx_xvmul_h:
2739 case Intrinsic::loongarch_lasx_xvmul_w:
2740 case Intrinsic::loongarch_lasx_xvmul_d:
2741 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2742 N->getOperand(2));
2743 case Intrinsic::loongarch_lsx_vmadd_b:
2744 case Intrinsic::loongarch_lsx_vmadd_h:
2745 case Intrinsic::loongarch_lsx_vmadd_w:
2746 case Intrinsic::loongarch_lsx_vmadd_d:
2747 case Intrinsic::loongarch_lasx_xvmadd_b:
2748 case Intrinsic::loongarch_lasx_xvmadd_h:
2749 case Intrinsic::loongarch_lasx_xvmadd_w:
2750 case Intrinsic::loongarch_lasx_xvmadd_d: {
2751 EVT ResTy = N->getValueType(0);
2752 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2753 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2754 N->getOperand(3)));
2755 }
2756 case Intrinsic::loongarch_lsx_vmsub_b:
2757 case Intrinsic::loongarch_lsx_vmsub_h:
2758 case Intrinsic::loongarch_lsx_vmsub_w:
2759 case Intrinsic::loongarch_lsx_vmsub_d:
2760 case Intrinsic::loongarch_lasx_xvmsub_b:
2761 case Intrinsic::loongarch_lasx_xvmsub_h:
2762 case Intrinsic::loongarch_lasx_xvmsub_w:
2763 case Intrinsic::loongarch_lasx_xvmsub_d: {
2764 EVT ResTy = N->getValueType(0);
2765 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2766 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2767 N->getOperand(3)));
2768 }
2769 case Intrinsic::loongarch_lsx_vdiv_b:
2770 case Intrinsic::loongarch_lsx_vdiv_h:
2771 case Intrinsic::loongarch_lsx_vdiv_w:
2772 case Intrinsic::loongarch_lsx_vdiv_d:
2773 case Intrinsic::loongarch_lasx_xvdiv_b:
2774 case Intrinsic::loongarch_lasx_xvdiv_h:
2775 case Intrinsic::loongarch_lasx_xvdiv_w:
2776 case Intrinsic::loongarch_lasx_xvdiv_d:
2777 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2778 N->getOperand(2));
2779 case Intrinsic::loongarch_lsx_vdiv_bu:
2780 case Intrinsic::loongarch_lsx_vdiv_hu:
2781 case Intrinsic::loongarch_lsx_vdiv_wu:
2782 case Intrinsic::loongarch_lsx_vdiv_du:
2783 case Intrinsic::loongarch_lasx_xvdiv_bu:
2784 case Intrinsic::loongarch_lasx_xvdiv_hu:
2785 case Intrinsic::loongarch_lasx_xvdiv_wu:
2786 case Intrinsic::loongarch_lasx_xvdiv_du:
2787 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2788 N->getOperand(2));
2789 case Intrinsic::loongarch_lsx_vmod_b:
2790 case Intrinsic::loongarch_lsx_vmod_h:
2791 case Intrinsic::loongarch_lsx_vmod_w:
2792 case Intrinsic::loongarch_lsx_vmod_d:
2793 case Intrinsic::loongarch_lasx_xvmod_b:
2794 case Intrinsic::loongarch_lasx_xvmod_h:
2795 case Intrinsic::loongarch_lasx_xvmod_w:
2796 case Intrinsic::loongarch_lasx_xvmod_d:
2797 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2798 N->getOperand(2));
2799 case Intrinsic::loongarch_lsx_vmod_bu:
2800 case Intrinsic::loongarch_lsx_vmod_hu:
2801 case Intrinsic::loongarch_lsx_vmod_wu:
2802 case Intrinsic::loongarch_lsx_vmod_du:
2803 case Intrinsic::loongarch_lasx_xvmod_bu:
2804 case Intrinsic::loongarch_lasx_xvmod_hu:
2805 case Intrinsic::loongarch_lasx_xvmod_wu:
2806 case Intrinsic::loongarch_lasx_xvmod_du:
2807 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2808 N->getOperand(2));
2809 case Intrinsic::loongarch_lsx_vand_v:
2810 case Intrinsic::loongarch_lasx_xvand_v:
2811 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2812 N->getOperand(2));
2813 case Intrinsic::loongarch_lsx_vor_v:
2814 case Intrinsic::loongarch_lasx_xvor_v:
2815 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2816 N->getOperand(2));
2817 case Intrinsic::loongarch_lsx_vxor_v:
2818 case Intrinsic::loongarch_lasx_xvxor_v:
2819 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2820 N->getOperand(2));
2821 case Intrinsic::loongarch_lsx_vnor_v:
2822 case Intrinsic::loongarch_lasx_xvnor_v: {
2823 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2824 N->getOperand(2));
2825 return DAG.getNOT(DL, Res, Res->getValueType(0));
2826 }
2827 case Intrinsic::loongarch_lsx_vandi_b:
2828 case Intrinsic::loongarch_lasx_xvandi_b:
2829 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2830 lowerVectorSplatImm<8>(N, 2, DAG));
2831 case Intrinsic::loongarch_lsx_vori_b:
2832 case Intrinsic::loongarch_lasx_xvori_b:
2833 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2834 lowerVectorSplatImm<8>(N, 2, DAG));
2835 case Intrinsic::loongarch_lsx_vxori_b:
2836 case Intrinsic::loongarch_lasx_xvxori_b:
2837 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2838 lowerVectorSplatImm<8>(N, 2, DAG));
2839 case Intrinsic::loongarch_lsx_vsll_b:
2840 case Intrinsic::loongarch_lsx_vsll_h:
2841 case Intrinsic::loongarch_lsx_vsll_w:
2842 case Intrinsic::loongarch_lsx_vsll_d:
2843 case Intrinsic::loongarch_lasx_xvsll_b:
2844 case Intrinsic::loongarch_lasx_xvsll_h:
2845 case Intrinsic::loongarch_lasx_xvsll_w:
2846 case Intrinsic::loongarch_lasx_xvsll_d:
2847 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2848 truncateVecElts(N, DAG));
2849 case Intrinsic::loongarch_lsx_vslli_b:
2850 case Intrinsic::loongarch_lasx_xvslli_b:
2851 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2852 lowerVectorSplatImm<3>(N, 2, DAG));
2853 case Intrinsic::loongarch_lsx_vslli_h:
2854 case Intrinsic::loongarch_lasx_xvslli_h:
2855 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2856 lowerVectorSplatImm<4>(N, 2, DAG));
2857 case Intrinsic::loongarch_lsx_vslli_w:
2858 case Intrinsic::loongarch_lasx_xvslli_w:
2859 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2860 lowerVectorSplatImm<5>(N, 2, DAG));
2861 case Intrinsic::loongarch_lsx_vslli_d:
2862 case Intrinsic::loongarch_lasx_xvslli_d:
2863 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2864 lowerVectorSplatImm<6>(N, 2, DAG));
2865 case Intrinsic::loongarch_lsx_vsrl_b:
2866 case Intrinsic::loongarch_lsx_vsrl_h:
2867 case Intrinsic::loongarch_lsx_vsrl_w:
2868 case Intrinsic::loongarch_lsx_vsrl_d:
2869 case Intrinsic::loongarch_lasx_xvsrl_b:
2870 case Intrinsic::loongarch_lasx_xvsrl_h:
2871 case Intrinsic::loongarch_lasx_xvsrl_w:
2872 case Intrinsic::loongarch_lasx_xvsrl_d:
2873 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2874 truncateVecElts(N, DAG));
2875 case Intrinsic::loongarch_lsx_vsrli_b:
2876 case Intrinsic::loongarch_lasx_xvsrli_b:
2877 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2878 lowerVectorSplatImm<3>(N, 2, DAG));
2879 case Intrinsic::loongarch_lsx_vsrli_h:
2880 case Intrinsic::loongarch_lasx_xvsrli_h:
2881 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2882 lowerVectorSplatImm<4>(N, 2, DAG));
2883 case Intrinsic::loongarch_lsx_vsrli_w:
2884 case Intrinsic::loongarch_lasx_xvsrli_w:
2885 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2886 lowerVectorSplatImm<5>(N, 2, DAG));
2887 case Intrinsic::loongarch_lsx_vsrli_d:
2888 case Intrinsic::loongarch_lasx_xvsrli_d:
2889 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2890 lowerVectorSplatImm<6>(N, 2, DAG));
2891 case Intrinsic::loongarch_lsx_vsra_b:
2892 case Intrinsic::loongarch_lsx_vsra_h:
2893 case Intrinsic::loongarch_lsx_vsra_w:
2894 case Intrinsic::loongarch_lsx_vsra_d:
2895 case Intrinsic::loongarch_lasx_xvsra_b:
2896 case Intrinsic::loongarch_lasx_xvsra_h:
2897 case Intrinsic::loongarch_lasx_xvsra_w:
2898 case Intrinsic::loongarch_lasx_xvsra_d:
2899 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2900 truncateVecElts(N, DAG));
2901 case Intrinsic::loongarch_lsx_vsrai_b:
2902 case Intrinsic::loongarch_lasx_xvsrai_b:
2903 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2904 lowerVectorSplatImm<3>(N, 2, DAG));
2905 case Intrinsic::loongarch_lsx_vsrai_h:
2906 case Intrinsic::loongarch_lasx_xvsrai_h:
2907 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2908 lowerVectorSplatImm<4>(N, 2, DAG));
2909 case Intrinsic::loongarch_lsx_vsrai_w:
2910 case Intrinsic::loongarch_lasx_xvsrai_w:
2911 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2912 lowerVectorSplatImm<5>(N, 2, DAG));
2913 case Intrinsic::loongarch_lsx_vsrai_d:
2914 case Intrinsic::loongarch_lasx_xvsrai_d:
2915 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2916 lowerVectorSplatImm<6>(N, 2, DAG));
2917 case Intrinsic::loongarch_lsx_vclz_b:
2918 case Intrinsic::loongarch_lsx_vclz_h:
2919 case Intrinsic::loongarch_lsx_vclz_w:
2920 case Intrinsic::loongarch_lsx_vclz_d:
2921 case Intrinsic::loongarch_lasx_xvclz_b:
2922 case Intrinsic::loongarch_lasx_xvclz_h:
2923 case Intrinsic::loongarch_lasx_xvclz_w:
2924 case Intrinsic::loongarch_lasx_xvclz_d:
2925 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2926 case Intrinsic::loongarch_lsx_vpcnt_b:
2927 case Intrinsic::loongarch_lsx_vpcnt_h:
2928 case Intrinsic::loongarch_lsx_vpcnt_w:
2929 case Intrinsic::loongarch_lsx_vpcnt_d:
2930 case Intrinsic::loongarch_lasx_xvpcnt_b:
2931 case Intrinsic::loongarch_lasx_xvpcnt_h:
2932 case Intrinsic::loongarch_lasx_xvpcnt_w:
2933 case Intrinsic::loongarch_lasx_xvpcnt_d:
2934 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2935 case Intrinsic::loongarch_lsx_vbitclr_b:
2936 case Intrinsic::loongarch_lsx_vbitclr_h:
2937 case Intrinsic::loongarch_lsx_vbitclr_w:
2938 case Intrinsic::loongarch_lsx_vbitclr_d:
2939 case Intrinsic::loongarch_lasx_xvbitclr_b:
2940 case Intrinsic::loongarch_lasx_xvbitclr_h:
2941 case Intrinsic::loongarch_lasx_xvbitclr_w:
2942 case Intrinsic::loongarch_lasx_xvbitclr_d:
2943 return lowerVectorBitClear(N, DAG);
2944 case Intrinsic::loongarch_lsx_vbitclri_b:
2945 case Intrinsic::loongarch_lasx_xvbitclri_b:
2946 return lowerVectorBitClearImm<3>(N, DAG);
2947 case Intrinsic::loongarch_lsx_vbitclri_h:
2948 case Intrinsic::loongarch_lasx_xvbitclri_h:
2949 return lowerVectorBitClearImm<4>(N, DAG);
2950 case Intrinsic::loongarch_lsx_vbitclri_w:
2951 case Intrinsic::loongarch_lasx_xvbitclri_w:
2952 return lowerVectorBitClearImm<5>(N, DAG);
2953 case Intrinsic::loongarch_lsx_vbitclri_d:
2954 case Intrinsic::loongarch_lasx_xvbitclri_d:
2955 return lowerVectorBitClearImm<6>(N, DAG);
2956 case Intrinsic::loongarch_lsx_vbitset_b:
2957 case Intrinsic::loongarch_lsx_vbitset_h:
2958 case Intrinsic::loongarch_lsx_vbitset_w:
2959 case Intrinsic::loongarch_lsx_vbitset_d:
2960 case Intrinsic::loongarch_lasx_xvbitset_b:
2961 case Intrinsic::loongarch_lasx_xvbitset_h:
2962 case Intrinsic::loongarch_lasx_xvbitset_w:
2963 case Intrinsic::loongarch_lasx_xvbitset_d: {
2964 EVT VecTy = N->getValueType(0);
2965 SDValue One = DAG.getConstant(1, DL, VecTy);
2966 return DAG.getNode(
2967 ISD::OR, DL, VecTy, N->getOperand(1),
2968 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2969 }
2970 case Intrinsic::loongarch_lsx_vbitseti_b:
2971 case Intrinsic::loongarch_lasx_xvbitseti_b:
2972 return lowerVectorBitSetImm<3>(N, DAG);
2973 case Intrinsic::loongarch_lsx_vbitseti_h:
2974 case Intrinsic::loongarch_lasx_xvbitseti_h:
2975 return lowerVectorBitSetImm<4>(N, DAG);
2976 case Intrinsic::loongarch_lsx_vbitseti_w:
2977 case Intrinsic::loongarch_lasx_xvbitseti_w:
2978 return lowerVectorBitSetImm<5>(N, DAG);
2979 case Intrinsic::loongarch_lsx_vbitseti_d:
2980 case Intrinsic::loongarch_lasx_xvbitseti_d:
2981 return lowerVectorBitSetImm<6>(N, DAG);
2982 case Intrinsic::loongarch_lsx_vbitrev_b:
2983 case Intrinsic::loongarch_lsx_vbitrev_h:
2984 case Intrinsic::loongarch_lsx_vbitrev_w:
2985 case Intrinsic::loongarch_lsx_vbitrev_d:
2986 case Intrinsic::loongarch_lasx_xvbitrev_b:
2987 case Intrinsic::loongarch_lasx_xvbitrev_h:
2988 case Intrinsic::loongarch_lasx_xvbitrev_w:
2989 case Intrinsic::loongarch_lasx_xvbitrev_d: {
2990 EVT VecTy = N->getValueType(0);
2991 SDValue One = DAG.getConstant(1, DL, VecTy);
2992 return DAG.getNode(
2993 ISD::XOR, DL, VecTy, N->getOperand(1),
2994 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2995 }
2996 case Intrinsic::loongarch_lsx_vbitrevi_b:
2997 case Intrinsic::loongarch_lasx_xvbitrevi_b:
2998 return lowerVectorBitRevImm<3>(N, DAG);
2999 case Intrinsic::loongarch_lsx_vbitrevi_h:
3000 case Intrinsic::loongarch_lasx_xvbitrevi_h:
3001 return lowerVectorBitRevImm<4>(N, DAG);
3002 case Intrinsic::loongarch_lsx_vbitrevi_w:
3003 case Intrinsic::loongarch_lasx_xvbitrevi_w:
3004 return lowerVectorBitRevImm<5>(N, DAG);
3005 case Intrinsic::loongarch_lsx_vbitrevi_d:
3006 case Intrinsic::loongarch_lasx_xvbitrevi_d:
3007 return lowerVectorBitRevImm<6>(N, DAG);
3008 case Intrinsic::loongarch_lsx_vfadd_s:
3009 case Intrinsic::loongarch_lsx_vfadd_d:
3010 case Intrinsic::loongarch_lasx_xvfadd_s:
3011 case Intrinsic::loongarch_lasx_xvfadd_d:
3012 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
3013 N->getOperand(2));
3014 case Intrinsic::loongarch_lsx_vfsub_s:
3015 case Intrinsic::loongarch_lsx_vfsub_d:
3016 case Intrinsic::loongarch_lasx_xvfsub_s:
3017 case Intrinsic::loongarch_lasx_xvfsub_d:
3018 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
3019 N->getOperand(2));
3020 case Intrinsic::loongarch_lsx_vfmul_s:
3021 case Intrinsic::loongarch_lsx_vfmul_d:
3022 case Intrinsic::loongarch_lasx_xvfmul_s:
3023 case Intrinsic::loongarch_lasx_xvfmul_d:
3024 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
3025 N->getOperand(2));
3026 case Intrinsic::loongarch_lsx_vfdiv_s:
3027 case Intrinsic::loongarch_lsx_vfdiv_d:
3028 case Intrinsic::loongarch_lasx_xvfdiv_s:
3029 case Intrinsic::loongarch_lasx_xvfdiv_d:
3030 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
3031 N->getOperand(2));
3032 case Intrinsic::loongarch_lsx_vfmadd_s:
3033 case Intrinsic::loongarch_lsx_vfmadd_d:
3034 case Intrinsic::loongarch_lasx_xvfmadd_s:
3035 case Intrinsic::loongarch_lasx_xvfmadd_d:
3036 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3037 N->getOperand(2), N->getOperand(3));
3038 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3039 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3040 N->getOperand(1), N->getOperand(2),
3041 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3042 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3043 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3044 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3045 N->getOperand(1), N->getOperand(2),
3046 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3047 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3048 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3049 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3050 N->getOperand(1), N->getOperand(2),
3051 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3052 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3053 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3054 N->getOperand(1), N->getOperand(2),
3055 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3056 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3057 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3058 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3059 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3060 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3061 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3062 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3063 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3064 EVT ResTy = N->getValueType(0);
3065 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3066 return DAG.getBuildVector(ResTy, DL, Ops);
3067 }
3068 case Intrinsic::loongarch_lsx_vreplve_b:
3069 case Intrinsic::loongarch_lsx_vreplve_h:
3070 case Intrinsic::loongarch_lsx_vreplve_w:
3071 case Intrinsic::loongarch_lsx_vreplve_d:
3072 case Intrinsic::loongarch_lasx_xvreplve_b:
3073 case Intrinsic::loongarch_lasx_xvreplve_h:
3074 case Intrinsic::loongarch_lasx_xvreplve_w:
3075 case Intrinsic::loongarch_lasx_xvreplve_d:
3076 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3077 N->getOperand(1),
3078 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3079 N->getOperand(2)));
3080 }
3081 return SDValue();
3082}
3083
3085 DAGCombinerInfo &DCI) const {
3086 SelectionDAG &DAG = DCI.DAG;
3087 switch (N->getOpcode()) {
3088 default:
3089 break;
3090 case ISD::AND:
3091 return performANDCombine(N, DAG, DCI, Subtarget);
3092 case ISD::OR:
3093 return performORCombine(N, DAG, DCI, Subtarget);
3094 case ISD::SRL:
3095 return performSRLCombine(N, DAG, DCI, Subtarget);
3097 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3099 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3100 }
3101 return SDValue();
3102}
3103
3106 if (!ZeroDivCheck)
3107 return MBB;
3108
3109 // Build instructions:
3110 // MBB:
3111 // div(or mod) $dst, $dividend, $divisor
3112 // bnez $divisor, SinkMBB
3113 // BreakMBB:
3114 // break 7 // BRK_DIVZERO
3115 // SinkMBB:
3116 // fallthrough
3117 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3119 MachineFunction *MF = MBB->getParent();
3120 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3121 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3122 MF->insert(It, BreakMBB);
3123 MF->insert(It, SinkMBB);
3124
3125 // Transfer the remainder of MBB and its successor edges to SinkMBB.
3126 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3127 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3128
3129 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3130 DebugLoc DL = MI.getDebugLoc();
3131 MachineOperand &Divisor = MI.getOperand(2);
3132 Register DivisorReg = Divisor.getReg();
3133
3134 // MBB:
3135 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3136 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3137 .addMBB(SinkMBB);
3138 MBB->addSuccessor(BreakMBB);
3139 MBB->addSuccessor(SinkMBB);
3140
3141 // BreakMBB:
3142 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3143 // definition of BRK_DIVZERO.
3144 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3145 BreakMBB->addSuccessor(SinkMBB);
3146
3147 // Clear Divisor's kill flag.
3148 Divisor.setIsKill(false);
3149
3150 return SinkMBB;
3151}
3152
3153static MachineBasicBlock *
3155 const LoongArchSubtarget &Subtarget) {
3156 unsigned CondOpc;
3157 switch (MI.getOpcode()) {
3158 default:
3159 llvm_unreachable("Unexpected opcode");
3160 case LoongArch::PseudoVBZ:
3161 CondOpc = LoongArch::VSETEQZ_V;
3162 break;
3163 case LoongArch::PseudoVBZ_B:
3164 CondOpc = LoongArch::VSETANYEQZ_B;
3165 break;
3166 case LoongArch::PseudoVBZ_H:
3167 CondOpc = LoongArch::VSETANYEQZ_H;
3168 break;
3169 case LoongArch::PseudoVBZ_W:
3170 CondOpc = LoongArch::VSETANYEQZ_W;
3171 break;
3172 case LoongArch::PseudoVBZ_D:
3173 CondOpc = LoongArch::VSETANYEQZ_D;
3174 break;
3175 case LoongArch::PseudoVBNZ:
3176 CondOpc = LoongArch::VSETNEZ_V;
3177 break;
3178 case LoongArch::PseudoVBNZ_B:
3179 CondOpc = LoongArch::VSETALLNEZ_B;
3180 break;
3181 case LoongArch::PseudoVBNZ_H:
3182 CondOpc = LoongArch::VSETALLNEZ_H;
3183 break;
3184 case LoongArch::PseudoVBNZ_W:
3185 CondOpc = LoongArch::VSETALLNEZ_W;
3186 break;
3187 case LoongArch::PseudoVBNZ_D:
3188 CondOpc = LoongArch::VSETALLNEZ_D;
3189 break;
3190 case LoongArch::PseudoXVBZ:
3191 CondOpc = LoongArch::XVSETEQZ_V;
3192 break;
3193 case LoongArch::PseudoXVBZ_B:
3194 CondOpc = LoongArch::XVSETANYEQZ_B;
3195 break;
3196 case LoongArch::PseudoXVBZ_H:
3197 CondOpc = LoongArch::XVSETANYEQZ_H;
3198 break;
3199 case LoongArch::PseudoXVBZ_W:
3200 CondOpc = LoongArch::XVSETANYEQZ_W;
3201 break;
3202 case LoongArch::PseudoXVBZ_D:
3203 CondOpc = LoongArch::XVSETANYEQZ_D;
3204 break;
3205 case LoongArch::PseudoXVBNZ:
3206 CondOpc = LoongArch::XVSETNEZ_V;
3207 break;
3208 case LoongArch::PseudoXVBNZ_B:
3209 CondOpc = LoongArch::XVSETALLNEZ_B;
3210 break;
3211 case LoongArch::PseudoXVBNZ_H:
3212 CondOpc = LoongArch::XVSETALLNEZ_H;
3213 break;
3214 case LoongArch::PseudoXVBNZ_W:
3215 CondOpc = LoongArch::XVSETALLNEZ_W;
3216 break;
3217 case LoongArch::PseudoXVBNZ_D:
3218 CondOpc = LoongArch::XVSETALLNEZ_D;
3219 break;
3220 }
3221
3222 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3223 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3224 DebugLoc DL = MI.getDebugLoc();
3227
3228 MachineFunction *F = BB->getParent();
3229 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3230 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3231 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3232
3233 F->insert(It, FalseBB);
3234 F->insert(It, TrueBB);
3235 F->insert(It, SinkBB);
3236
3237 // Transfer the remainder of MBB and its successor edges to Sink.
3238 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3240
3241 // Insert the real instruction to BB.
3242 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3243 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3244
3245 // Insert branch.
3246 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3247 BB->addSuccessor(FalseBB);
3248 BB->addSuccessor(TrueBB);
3249
3250 // FalseBB.
3251 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3252 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3253 .addReg(LoongArch::R0)
3254 .addImm(0);
3255 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3256 FalseBB->addSuccessor(SinkBB);
3257
3258 // TrueBB.
3259 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3260 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3261 .addReg(LoongArch::R0)
3262 .addImm(1);
3263 TrueBB->addSuccessor(SinkBB);
3264
3265 // SinkBB: merge the results.
3266 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3267 MI.getOperand(0).getReg())
3268 .addReg(RD1)
3269 .addMBB(FalseBB)
3270 .addReg(RD2)
3271 .addMBB(TrueBB);
3272
3273 // The pseudo instruction is gone now.
3274 MI.eraseFromParent();
3275 return SinkBB;
3276}
3277
3278static MachineBasicBlock *
3280 const LoongArchSubtarget &Subtarget) {
3281 unsigned InsOp;
3282 unsigned HalfSize;
3283 switch (MI.getOpcode()) {
3284 default:
3285 llvm_unreachable("Unexpected opcode");
3286 case LoongArch::PseudoXVINSGR2VR_B:
3287 HalfSize = 16;
3288 InsOp = LoongArch::VINSGR2VR_B;
3289 break;
3290 case LoongArch::PseudoXVINSGR2VR_H:
3291 HalfSize = 8;
3292 InsOp = LoongArch::VINSGR2VR_H;
3293 break;
3294 }
3295 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3296 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3297 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3298 DebugLoc DL = MI.getDebugLoc();
3300 // XDst = vector_insert XSrc, Elt, Idx
3301 Register XDst = MI.getOperand(0).getReg();
3302 Register XSrc = MI.getOperand(1).getReg();
3303 Register Elt = MI.getOperand(2).getReg();
3304 unsigned Idx = MI.getOperand(3).getImm();
3305
3306 Register ScratchReg1 = XSrc;
3307 if (Idx >= HalfSize) {
3308 ScratchReg1 = MRI.createVirtualRegister(RC);
3309 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3310 .addReg(XSrc)
3311 .addReg(XSrc)
3312 .addImm(1);
3313 }
3314
3315 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3316 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3317 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3318 .addReg(ScratchReg1, 0, LoongArch::sub_128);
3319 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3320 .addReg(ScratchSubReg1)
3321 .addReg(Elt)
3322 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3323
3324 Register ScratchReg2 = XDst;
3325 if (Idx >= HalfSize)
3326 ScratchReg2 = MRI.createVirtualRegister(RC);
3327
3328 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3329 .addImm(0)
3330 .addReg(ScratchSubReg2)
3331 .addImm(LoongArch::sub_128);
3332
3333 if (Idx >= HalfSize)
3334 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3335 .addReg(XSrc)
3336 .addReg(ScratchReg2)
3337 .addImm(2);
3338
3339 MI.eraseFromParent();
3340 return BB;
3341}
3342
3343MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3344 MachineInstr &MI, MachineBasicBlock *BB) const {
3345 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3346 DebugLoc DL = MI.getDebugLoc();
3347
3348 switch (MI.getOpcode()) {
3349 default:
3350 llvm_unreachable("Unexpected instr type to insert");
3351 case LoongArch::DIV_W:
3352 case LoongArch::DIV_WU:
3353 case LoongArch::MOD_W:
3354 case LoongArch::MOD_WU:
3355 case LoongArch::DIV_D:
3356 case LoongArch::DIV_DU:
3357 case LoongArch::MOD_D:
3358 case LoongArch::MOD_DU:
3359 return insertDivByZeroTrap(MI, BB);
3360 break;
3361 case LoongArch::WRFCSR: {
3362 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3363 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3364 .addReg(MI.getOperand(1).getReg());
3365 MI.eraseFromParent();
3366 return BB;
3367 }
3368 case LoongArch::RDFCSR: {
3369 MachineInstr *ReadFCSR =
3370 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3371 MI.getOperand(0).getReg())
3372 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3373 ReadFCSR->getOperand(1).setIsUndef();
3374 MI.eraseFromParent();
3375 return BB;
3376 }
3377 case LoongArch::PseudoVBZ:
3378 case LoongArch::PseudoVBZ_B:
3379 case LoongArch::PseudoVBZ_H:
3380 case LoongArch::PseudoVBZ_W:
3381 case LoongArch::PseudoVBZ_D:
3382 case LoongArch::PseudoVBNZ:
3383 case LoongArch::PseudoVBNZ_B:
3384 case LoongArch::PseudoVBNZ_H:
3385 case LoongArch::PseudoVBNZ_W:
3386 case LoongArch::PseudoVBNZ_D:
3387 case LoongArch::PseudoXVBZ:
3388 case LoongArch::PseudoXVBZ_B:
3389 case LoongArch::PseudoXVBZ_H:
3390 case LoongArch::PseudoXVBZ_W:
3391 case LoongArch::PseudoXVBZ_D:
3392 case LoongArch::PseudoXVBNZ:
3393 case LoongArch::PseudoXVBNZ_B:
3394 case LoongArch::PseudoXVBNZ_H:
3395 case LoongArch::PseudoXVBNZ_W:
3396 case LoongArch::PseudoXVBNZ_D:
3397 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3398 case LoongArch::PseudoXVINSGR2VR_B:
3399 case LoongArch::PseudoXVINSGR2VR_H:
3400 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3401 }
3402}
3403
3405 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3406 unsigned *Fast) const {
3407 if (!Subtarget.hasUAL())
3408 return false;
3409
3410 // TODO: set reasonable speed number.
3411 if (Fast)
3412 *Fast = 1;
3413 return true;
3414}
3415
3416const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3417 switch ((LoongArchISD::NodeType)Opcode) {
3419 break;
3420
3421#define NODE_NAME_CASE(node) \
3422 case LoongArchISD::node: \
3423 return "LoongArchISD::" #node;
3424
3425 // TODO: Add more target-dependent nodes later.
3426 NODE_NAME_CASE(CALL)
3427 NODE_NAME_CASE(CALL_MEDIUM)
3428 NODE_NAME_CASE(CALL_LARGE)
3429 NODE_NAME_CASE(RET)
3430 NODE_NAME_CASE(TAIL)
3431 NODE_NAME_CASE(TAIL_MEDIUM)
3432 NODE_NAME_CASE(TAIL_LARGE)
3433 NODE_NAME_CASE(SLL_W)
3434 NODE_NAME_CASE(SRA_W)
3435 NODE_NAME_CASE(SRL_W)
3436 NODE_NAME_CASE(BSTRINS)
3437 NODE_NAME_CASE(BSTRPICK)
3438 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3439 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3440 NODE_NAME_CASE(FTINT)
3441 NODE_NAME_CASE(REVB_2H)
3442 NODE_NAME_CASE(REVB_2W)
3443 NODE_NAME_CASE(BITREV_4B)
3444 NODE_NAME_CASE(BITREV_W)
3445 NODE_NAME_CASE(ROTR_W)
3446 NODE_NAME_CASE(ROTL_W)
3447 NODE_NAME_CASE(CLZ_W)
3448 NODE_NAME_CASE(CTZ_W)
3449 NODE_NAME_CASE(DBAR)
3450 NODE_NAME_CASE(IBAR)
3451 NODE_NAME_CASE(BREAK)
3452 NODE_NAME_CASE(SYSCALL)
3453 NODE_NAME_CASE(CRC_W_B_W)
3454 NODE_NAME_CASE(CRC_W_H_W)
3455 NODE_NAME_CASE(CRC_W_W_W)
3456 NODE_NAME_CASE(CRC_W_D_W)
3457 NODE_NAME_CASE(CRCC_W_B_W)
3458 NODE_NAME_CASE(CRCC_W_H_W)
3459 NODE_NAME_CASE(CRCC_W_W_W)
3460 NODE_NAME_CASE(CRCC_W_D_W)
3461 NODE_NAME_CASE(CSRRD)
3462 NODE_NAME_CASE(CSRWR)
3463 NODE_NAME_CASE(CSRXCHG)
3464 NODE_NAME_CASE(IOCSRRD_B)
3465 NODE_NAME_CASE(IOCSRRD_H)
3466 NODE_NAME_CASE(IOCSRRD_W)
3467 NODE_NAME_CASE(IOCSRRD_D)
3468 NODE_NAME_CASE(IOCSRWR_B)
3469 NODE_NAME_CASE(IOCSRWR_H)
3470 NODE_NAME_CASE(IOCSRWR_W)
3471 NODE_NAME_CASE(IOCSRWR_D)
3472 NODE_NAME_CASE(CPUCFG)
3473 NODE_NAME_CASE(MOVGR2FCSR)
3474 NODE_NAME_CASE(MOVFCSR2GR)
3475 NODE_NAME_CASE(CACOP_D)
3476 NODE_NAME_CASE(CACOP_W)
3477 NODE_NAME_CASE(VPICK_SEXT_ELT)
3478 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3479 NODE_NAME_CASE(VREPLVE)
3480 NODE_NAME_CASE(VALL_ZERO)
3481 NODE_NAME_CASE(VANY_ZERO)
3482 NODE_NAME_CASE(VALL_NONZERO)
3483 NODE_NAME_CASE(VANY_NONZERO)
3484 }
3485#undef NODE_NAME_CASE
3486 return nullptr;
3487}
3488
3489//===----------------------------------------------------------------------===//
3490// Calling Convention Implementation
3491//===----------------------------------------------------------------------===//
3492
3493// Eight general-purpose registers a0-a7 used for passing integer arguments,
3494// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3495// fixed-point arguments, and floating-point arguments when no FPR is available
3496// or with soft float ABI.
3497const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3498 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3499 LoongArch::R10, LoongArch::R11};
3500// Eight floating-point registers fa0-fa7 used for passing floating-point
3501// arguments, and fa0-fa1 are also used to return values.
3502const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3503 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3504 LoongArch::F6, LoongArch::F7};
3505// FPR32 and FPR64 alias each other.
3507 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3508 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3509
3510const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3511 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3512 LoongArch::VR6, LoongArch::VR7};
3513
3514const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3515 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3516 LoongArch::XR6, LoongArch::XR7};
3517
3518// Pass a 2*GRLen argument that has been split into two GRLen values through
3519// registers or the stack as necessary.
3520static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3521 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3522 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3523 ISD::ArgFlagsTy ArgFlags2) {
3524 unsigned GRLenInBytes = GRLen / 8;
3525 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3526 // At least one half can be passed via register.
3527 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3528 VA1.getLocVT(), CCValAssign::Full));
3529 } else {
3530 // Both halves must be passed on the stack, with proper alignment.
3531 Align StackAlign =
3532 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3533 State.addLoc(
3535 State.AllocateStack(GRLenInBytes, StackAlign),
3536 VA1.getLocVT(), CCValAssign::Full));
3538 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3539 LocVT2, CCValAssign::Full));
3540 return false;
3541 }
3542 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3543 // The second half can also be passed via register.
3544 State.addLoc(
3545 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3546 } else {
3547 // The second half is passed via the stack, without additional alignment.
3549 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3550 LocVT2, CCValAssign::Full));
3551 }
3552 return false;
3553}
3554
3555// Implements the LoongArch calling convention. Returns true upon failure.
3557 unsigned ValNo, MVT ValVT,
3558 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3559 CCState &State, bool IsFixed, bool IsRet,
3560 Type *OrigTy) {
3561 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3562 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3563 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3564 MVT LocVT = ValVT;
3565
3566 // Any return value split into more than two values can't be returned
3567 // directly.
3568 if (IsRet && ValNo > 1)
3569 return true;
3570
3571 // If passing a variadic argument, or if no FPR is available.
3572 bool UseGPRForFloat = true;
3573
3574 switch (ABI) {
3575 default:
3576 llvm_unreachable("Unexpected ABI");
3580 report_fatal_error("Unimplemented ABI");
3581 break;
3584 UseGPRForFloat = !IsFixed;
3585 break;
3587 break;
3588 }
3589
3590 // FPR32 and FPR64 alias each other.
3591 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3592 UseGPRForFloat = true;
3593
3594 if (UseGPRForFloat && ValVT == MVT::f32) {
3595 LocVT = GRLenVT;
3596 LocInfo = CCValAssign::BCvt;
3597 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3598 LocVT = MVT::i64;
3599 LocInfo = CCValAssign::BCvt;
3600 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3601 // TODO: Handle passing f64 on LA32 with D feature.
3602 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3603 }
3604
3605 // If this is a variadic argument, the LoongArch calling convention requires
3606 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3607 // byte alignment. An aligned register should be used regardless of whether
3608 // the original argument was split during legalisation or not. The argument
3609 // will not be passed by registers if the original type is larger than
3610 // 2*GRLen, so the register alignment rule does not apply.
3611 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3612 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3613 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3614 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3615 // Skip 'odd' register if necessary.
3616 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3617 State.AllocateReg(ArgGPRs);
3618 }
3619
3620 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3621 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3622 State.getPendingArgFlags();
3623
3624 assert(PendingLocs.size() == PendingArgFlags.size() &&
3625 "PendingLocs and PendingArgFlags out of sync");
3626
3627 // Split arguments might be passed indirectly, so keep track of the pending
3628 // values.
3629 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3630 LocVT = GRLenVT;
3631 LocInfo = CCValAssign::Indirect;
3632 PendingLocs.push_back(
3633 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3634 PendingArgFlags.push_back(ArgFlags);
3635 if (!ArgFlags.isSplitEnd()) {
3636 return false;
3637 }
3638 }
3639
3640 // If the split argument only had two elements, it should be passed directly
3641 // in registers or on the stack.
3642 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3643 PendingLocs.size() <= 2) {
3644 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3645 // Apply the normal calling convention rules to the first half of the
3646 // split argument.
3647 CCValAssign VA = PendingLocs[0];
3648 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3649 PendingLocs.clear();
3650 PendingArgFlags.clear();
3651 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3652 ArgFlags);
3653 }
3654
3655 // Allocate to a register if possible, or else a stack slot.
3656 Register Reg;
3657 unsigned StoreSizeBytes = GRLen / 8;
3658 Align StackAlign = Align(GRLen / 8);
3659
3660 if (ValVT == MVT::f32 && !UseGPRForFloat)
3661 Reg = State.AllocateReg(ArgFPR32s);
3662 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3663 Reg = State.AllocateReg(ArgFPR64s);
3664 else if (ValVT.is128BitVector())
3665 Reg = State.AllocateReg(ArgVRs);
3666 else if (ValVT.is256BitVector())
3667 Reg = State.AllocateReg(ArgXRs);
3668 else
3669 Reg = State.AllocateReg(ArgGPRs);
3670
3671 unsigned StackOffset =
3672 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3673
3674 // If we reach this point and PendingLocs is non-empty, we must be at the
3675 // end of a split argument that must be passed indirectly.
3676 if (!PendingLocs.empty()) {
3677 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3678 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3679 for (auto &It : PendingLocs) {
3680 if (Reg)
3681 It.convertToReg(Reg);
3682 else
3683 It.convertToMem(StackOffset);
3684 State.addLoc(It);
3685 }
3686 PendingLocs.clear();
3687 PendingArgFlags.clear();
3688 return false;
3689 }
3690 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3691 "Expected an GRLenVT at this stage");
3692
3693 if (Reg) {
3694 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3695 return false;
3696 }
3697
3698 // When a floating-point value is passed on the stack, no bit-cast is needed.
3699 if (ValVT.isFloatingPoint()) {
3700 LocVT = ValVT;
3701 LocInfo = CCValAssign::Full;
3702 }
3703
3704 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3705 return false;
3706}
3707
3708void LoongArchTargetLowering::analyzeInputArgs(
3709 MachineFunction &MF, CCState &CCInfo,
3710 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3711 LoongArchCCAssignFn Fn) const {
3713 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3714 MVT ArgVT = Ins[i].VT;
3715 Type *ArgTy = nullptr;
3716 if (IsRet)
3717 ArgTy = FType->getReturnType();
3718 else if (Ins[i].isOrigArg())
3719 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3722 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3723 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3724 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3725 << '\n');
3726 llvm_unreachable("");
3727 }
3728 }
3729}
3730
3731void LoongArchTargetLowering::analyzeOutputArgs(
3732 MachineFunction &MF, CCState &CCInfo,
3733 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3734 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3735 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3736 MVT ArgVT = Outs[i].VT;
3737 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3740 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3741 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3742 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3743 << "\n");
3744 llvm_unreachable("");
3745 }
3746 }
3747}
3748
3749// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3750// values.
3752 const CCValAssign &VA, const SDLoc &DL) {
3753 switch (VA.getLocInfo()) {
3754 default:
3755 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3756 case CCValAssign::Full:
3758 break;
3759 case CCValAssign::BCvt:
3760 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3761 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3762 else
3763 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3764 break;
3765 }
3766 return Val;
3767}
3768
3770 const CCValAssign &VA, const SDLoc &DL,
3771 const LoongArchTargetLowering &TLI) {
3774 EVT LocVT = VA.getLocVT();
3775 SDValue Val;
3776 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3777 Register VReg = RegInfo.createVirtualRegister(RC);
3778 RegInfo.addLiveIn(VA.getLocReg(), VReg);
3779 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3780
3781 return convertLocVTToValVT(DAG, Val, VA, DL);
3782}
3783
3784// The caller is responsible for loading the full value if the argument is
3785// passed with CCValAssign::Indirect.
3787 const CCValAssign &VA, const SDLoc &DL) {
3789 MachineFrameInfo &MFI = MF.getFrameInfo();
3790 EVT ValVT = VA.getValVT();
3791 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3792 /*IsImmutable=*/true);
3793 SDValue FIN = DAG.getFrameIndex(
3795
3796 ISD::LoadExtType ExtType;
3797 switch (VA.getLocInfo()) {
3798 default:
3799 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3800 case CCValAssign::Full:
3802 case CCValAssign::BCvt:
3803 ExtType = ISD::NON_EXTLOAD;
3804 break;
3805 }
3806 return DAG.getExtLoad(
3807 ExtType, DL, VA.getLocVT(), Chain, FIN,
3809}
3810
3812 const CCValAssign &VA, const SDLoc &DL) {
3813 EVT LocVT = VA.getLocVT();
3814
3815 switch (VA.getLocInfo()) {
3816 default:
3817 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3818 case CCValAssign::Full:
3819 break;
3820 case CCValAssign::BCvt:
3821 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3822 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3823 else
3824 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3825 break;
3826 }
3827 return Val;
3828}
3829
3830static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3831 CCValAssign::LocInfo LocInfo,
3832 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3833 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3834 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3835 // s0 s1 s2 s3 s4 s5 s6 s7 s8
3836 static const MCPhysReg GPRList[] = {
3837 LoongArch::R23, LoongArch::R24, LoongArch::R25,
3838 LoongArch::R26, LoongArch::R27, LoongArch::R28,
3839 LoongArch::R29, LoongArch::R30, LoongArch::R31};
3840 if (unsigned Reg = State.AllocateReg(GPRList)) {
3841 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3842 return false;
3843 }
3844 }
3845
3846 if (LocVT == MVT::f32) {
3847 // Pass in STG registers: F1, F2, F3, F4
3848 // fs0,fs1,fs2,fs3
3849 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3850 LoongArch::F26, LoongArch::F27};
3851 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3852 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3853 return false;
3854 }
3855 }
3856
3857 if (LocVT == MVT::f64) {
3858 // Pass in STG registers: D1, D2, D3, D4
3859 // fs4,fs5,fs6,fs7
3860 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3861 LoongArch::F30_64, LoongArch::F31_64};
3862 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3863 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3864 return false;
3865 }
3866 }
3867
3868 report_fatal_error("No registers left in GHC calling convention");
3869 return true;
3870}
3871
3872// Transform physical registers into virtual registers.
3874 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3875 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3876 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3877
3879
3880 switch (CallConv) {
3881 default:
3882 llvm_unreachable("Unsupported calling convention");
3883 case CallingConv::C:
3884 case CallingConv::Fast:
3885 break;
3886 case CallingConv::GHC:
3887 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3888 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3890 "GHC calling convention requires the F and D extensions");
3891 }
3892
3893 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3894 MVT GRLenVT = Subtarget.getGRLenVT();
3895 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3896 // Used with varargs to acumulate store chains.
3897 std::vector<SDValue> OutChains;
3898
3899 // Assign locations to all of the incoming arguments.
3901 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3902
3903 if (CallConv == CallingConv::GHC)
3905 else
3906 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3907
3908 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3909 CCValAssign &VA = ArgLocs[i];
3910 SDValue ArgValue;
3911 if (VA.isRegLoc())
3912 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3913 else
3914 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3915 if (VA.getLocInfo() == CCValAssign::Indirect) {
3916 // If the original argument was split and passed by reference, we need to
3917 // load all parts of it here (using the same address).
3918 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3920 unsigned ArgIndex = Ins[i].OrigArgIndex;
3921 unsigned ArgPartOffset = Ins[i].PartOffset;
3922 assert(ArgPartOffset == 0);
3923 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3924 CCValAssign &PartVA = ArgLocs[i + 1];
3925 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3926 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3927 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3928 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3930 ++i;
3931 }
3932 continue;
3933 }
3934 InVals.push_back(ArgValue);
3935 }
3936
3937 if (IsVarArg) {
3939 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3940 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3941 MachineFrameInfo &MFI = MF.getFrameInfo();
3942 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3943 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3944
3945 // Offset of the first variable argument from stack pointer, and size of
3946 // the vararg save area. For now, the varargs save area is either zero or
3947 // large enough to hold a0-a7.
3948 int VaArgOffset, VarArgsSaveSize;
3949
3950 // If all registers are allocated, then all varargs must be passed on the
3951 // stack and we don't need to save any argregs.
3952 if (ArgRegs.size() == Idx) {
3953 VaArgOffset = CCInfo.getStackSize();
3954 VarArgsSaveSize = 0;
3955 } else {
3956 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3957 VaArgOffset = -VarArgsSaveSize;
3958 }
3959
3960 // Record the frame index of the first variable argument
3961 // which is a value necessary to VASTART.
3962 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3963 LoongArchFI->setVarArgsFrameIndex(FI);
3964
3965 // If saving an odd number of registers then create an extra stack slot to
3966 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3967 // offsets to even-numbered registered remain 2*GRLen-aligned.
3968 if (Idx % 2) {
3969 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3970 true);
3971 VarArgsSaveSize += GRLenInBytes;
3972 }
3973
3974 // Copy the integer registers that may have been used for passing varargs
3975 // to the vararg save area.
3976 for (unsigned I = Idx; I < ArgRegs.size();
3977 ++I, VaArgOffset += GRLenInBytes) {
3978 const Register Reg = RegInfo.createVirtualRegister(RC);
3979 RegInfo.addLiveIn(ArgRegs[I], Reg);
3980 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3981 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3982 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3983 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3985 cast<StoreSDNode>(Store.getNode())
3986 ->getMemOperand()
3987 ->setValue((Value *)nullptr);
3988 OutChains.push_back(Store);
3989 }
3990 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3991 }
3992
3993 // All stores are grouped in one node to allow the matching between
3994 // the size of Ins and InVals. This only happens for vararg functions.
3995 if (!OutChains.empty()) {
3996 OutChains.push_back(Chain);
3997 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3998 }
3999
4000 return Chain;
4001}
4002
4004 return CI->isTailCall();
4005}
4006
4007// Check if the return value is used as only a return value, as otherwise
4008// we can't perform a tail-call.
4010 SDValue &Chain) const {
4011 if (N->getNumValues() != 1)
4012 return false;
4013 if (!N->hasNUsesOfValue(1, 0))
4014 return false;
4015
4016 SDNode *Copy = *N->use_begin();
4017 if (Copy->getOpcode() != ISD::CopyToReg)
4018 return false;
4019
4020 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
4021 // isn't safe to perform a tail call.
4022 if (Copy->getGluedNode())
4023 return false;
4024
4025 // The copy must be used by a LoongArchISD::RET, and nothing else.
4026 bool HasRet = false;
4027 for (SDNode *Node : Copy->uses()) {
4028 if (Node->getOpcode() != LoongArchISD::RET)
4029 return false;
4030 HasRet = true;
4031 }
4032
4033 if (!HasRet)
4034 return false;
4035
4036 Chain = Copy->getOperand(0);
4037 return true;
4038}
4039
4040// Check whether the call is eligible for tail call optimization.
4041bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4042 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4043 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4044
4045 auto CalleeCC = CLI.CallConv;
4046 auto &Outs = CLI.Outs;
4047 auto &Caller = MF.getFunction();
4048 auto CallerCC = Caller.getCallingConv();
4049
4050 // Do not tail call opt if the stack is used to pass parameters.
4051 if (CCInfo.getStackSize() != 0)
4052 return false;
4053
4054 // Do not tail call opt if any parameters need to be passed indirectly.
4055 for (auto &VA : ArgLocs)
4056 if (VA.getLocInfo() == CCValAssign::Indirect)
4057 return false;
4058
4059 // Do not tail call opt if either caller or callee uses struct return
4060 // semantics.
4061 auto IsCallerStructRet = Caller.hasStructRetAttr();
4062 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4063 if (IsCallerStructRet || IsCalleeStructRet)
4064 return false;
4065
4066 // Do not tail call opt if either the callee or caller has a byval argument.
4067 for (auto &Arg : Outs)
4068 if (Arg.Flags.isByVal())
4069 return false;
4070
4071 // The callee has to preserve all registers the caller needs to preserve.
4072 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4073 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4074 if (CalleeCC != CallerCC) {
4075 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4076 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4077 return false;
4078 }
4079 return true;
4080}
4081
4083 return DAG.getDataLayout().getPrefTypeAlign(
4084 VT.getTypeForEVT(*DAG.getContext()));
4085}
4086
4087// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4088// and output parameter nodes.
4089SDValue
4091 SmallVectorImpl<SDValue> &InVals) const {
4092 SelectionDAG &DAG = CLI.DAG;
4093 SDLoc &DL = CLI.DL;
4095 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4097 SDValue Chain = CLI.Chain;
4098 SDValue Callee = CLI.Callee;
4099 CallingConv::ID CallConv = CLI.CallConv;
4100 bool IsVarArg = CLI.IsVarArg;
4101 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4102 MVT GRLenVT = Subtarget.getGRLenVT();
4103 bool &IsTailCall = CLI.IsTailCall;
4104
4106
4107 // Analyze the operands of the call, assigning locations to each operand.
4109 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4110
4111 if (CallConv == CallingConv::GHC)
4112 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4113 else
4114 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4115
4116 // Check if it's really possible to do a tail call.
4117 if (IsTailCall)
4118 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4119
4120 if (IsTailCall)
4121 ++NumTailCalls;
4122 else if (CLI.CB && CLI.CB->isMustTailCall())
4123 report_fatal_error("failed to perform tail call elimination on a call "
4124 "site marked musttail");
4125
4126 // Get a count of how many bytes are to be pushed on the stack.
4127 unsigned NumBytes = ArgCCInfo.getStackSize();
4128
4129 // Create local copies for byval args.
4130 SmallVector<SDValue> ByValArgs;
4131 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4132 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4133 if (!Flags.isByVal())
4134 continue;
4135
4136 SDValue Arg = OutVals[i];
4137 unsigned Size = Flags.getByValSize();
4138 Align Alignment = Flags.getNonZeroByValAlign();
4139
4140 int FI =
4141 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4142 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4143 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4144
4145 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4146 /*IsVolatile=*/false,
4147 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4149 ByValArgs.push_back(FIPtr);
4150 }
4151
4152 if (!IsTailCall)
4153 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4154
4155 // Copy argument values to their designated locations.
4157 SmallVector<SDValue> MemOpChains;
4158 SDValue StackPtr;
4159 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4160 CCValAssign &VA = ArgLocs[i];
4161 SDValue ArgValue = OutVals[i];
4162 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4163
4164 // Promote the value if needed.
4165 // For now, only handle fully promoted and indirect arguments.
4166 if (VA.getLocInfo() == CCValAssign::Indirect) {
4167 // Store the argument in a stack slot and pass its address.
4168 Align StackAlign =
4169 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4170 getPrefTypeAlign(ArgValue.getValueType(), DAG));
4171 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4172 // If the original argument was split and passed by reference, we need to
4173 // store the required parts of it here (and pass just one address).
4174 unsigned ArgIndex = Outs[i].OrigArgIndex;
4175 unsigned ArgPartOffset = Outs[i].PartOffset;
4176 assert(ArgPartOffset == 0);
4177 // Calculate the total size to store. We don't have access to what we're
4178 // actually storing other than performing the loop and collecting the
4179 // info.
4181 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4182 SDValue PartValue = OutVals[i + 1];
4183 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4184 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4185 EVT PartVT = PartValue.getValueType();
4186
4187 StoredSize += PartVT.getStoreSize();
4188 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4189 Parts.push_back(std::make_pair(PartValue, Offset));
4190 ++i;
4191 }
4192 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4193 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4194 MemOpChains.push_back(
4195 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4197 for (const auto &Part : Parts) {
4198 SDValue PartValue = Part.first;
4199 SDValue PartOffset = Part.second;
4201 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4202 MemOpChains.push_back(
4203 DAG.getStore(Chain, DL, PartValue, Address,
4205 }
4206 ArgValue = SpillSlot;
4207 } else {
4208 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4209 }
4210
4211 // Use local copy if it is a byval arg.
4212 if (Flags.isByVal())
4213 ArgValue = ByValArgs[j++];
4214
4215 if (VA.isRegLoc()) {
4216 // Queue up the argument copies and emit them at the end.
4217 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4218 } else {
4219 assert(VA.isMemLoc() && "Argument not register or memory");
4220 assert(!IsTailCall && "Tail call not allowed if stack is used "
4221 "for passing parameters");
4222
4223 // Work out the address of the stack slot.
4224 if (!StackPtr.getNode())
4225 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4227 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4229
4230 // Emit the store.
4231 MemOpChains.push_back(
4232 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4233 }
4234 }
4235
4236 // Join the stores, which are independent of one another.
4237 if (!MemOpChains.empty())
4238 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4239
4240 SDValue Glue;
4241
4242 // Build a sequence of copy-to-reg nodes, chained and glued together.
4243 for (auto &Reg : RegsToPass) {
4244 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4245 Glue = Chain.getValue(1);
4246 }
4247
4248 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4249 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4250 // split it and then direct call can be matched by PseudoCALL.
4251 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4252 const GlobalValue *GV = S->getGlobal();
4253 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
4256 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4257 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4258 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
4261 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4262 }
4263
4264 // The first call operand is the chain and the second is the target address.
4266 Ops.push_back(Chain);
4267 Ops.push_back(Callee);
4268
4269 // Add argument registers to the end of the list so that they are
4270 // known live into the call.
4271 for (auto &Reg : RegsToPass)
4272 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4273
4274 if (!IsTailCall) {
4275 // Add a register mask operand representing the call-preserved registers.
4276 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4277 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4278 assert(Mask && "Missing call preserved mask for calling convention");
4279 Ops.push_back(DAG.getRegisterMask(Mask));
4280 }
4281
4282 // Glue the call to the argument copies, if any.
4283 if (Glue.getNode())
4284 Ops.push_back(Glue);
4285
4286 // Emit the call.
4287 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4288 unsigned Op;
4289 switch (DAG.getTarget().getCodeModel()) {
4290 default:
4291 report_fatal_error("Unsupported code model");
4292 case CodeModel::Small:
4293 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
4294 break;
4295 case CodeModel::Medium:
4296 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
4298 break;
4299 case CodeModel::Large:
4300 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4302 break;
4303 }
4304
4305 if (IsTailCall) {
4307 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
4308 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
4309 return Ret;
4310 }
4311
4312 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
4313 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4314 Glue = Chain.getValue(1);
4315
4316 // Mark the end of the call, which is glued to the call itself.
4317 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
4318 Glue = Chain.getValue(1);
4319
4320 // Assign locations to each value returned by this call.
4322 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4323 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
4324
4325 // Copy all of the result registers out of their specified physreg.
4326 for (auto &VA : RVLocs) {
4327 // Copy the value out.
4328 SDValue RetValue =
4329 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4330 // Glue the RetValue to the end of the call sequence.
4331 Chain = RetValue.getValue(1);
4332 Glue = RetValue.getValue(2);
4333
4334 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4335
4336 InVals.push_back(RetValue);
4337 }
4338
4339 return Chain;
4340}
4341
4343 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4344 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4346 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4347
4348 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4349 LoongArchABI::ABI ABI =
4350 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4351 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
4352 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4353 nullptr))
4354 return false;
4355 }
4356 return true;
4357}
4358
4360 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4362 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4363 SelectionDAG &DAG) const {
4364 // Stores the assignment of the return value to a location.
4366
4367 // Info about the registers and stack slot.
4368 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4369 *DAG.getContext());
4370
4371 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4372 nullptr, CC_LoongArch);
4373 if (CallConv == CallingConv::GHC && !RVLocs.empty())
4374 report_fatal_error("GHC functions return void only");
4375 SDValue Glue;
4376 SmallVector<SDValue, 4> RetOps(1, Chain);
4377
4378 // Copy the result values into the output registers.
4379 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4380 CCValAssign &VA = RVLocs[i];
4381 assert(VA.isRegLoc() && "Can only return in registers!");
4382
4383 // Handle a 'normal' return.
4384 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
4385 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4386
4387 // Guarantee that all emitted copies are stuck together.
4388 Glue = Chain.getValue(1);
4389 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4390 }
4391
4392 RetOps[0] = Chain; // Update chain.
4393
4394 // Add the glue node if we have it.
4395 if (Glue.getNode())
4396 RetOps.push_back(Glue);
4397
4398 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4399}
4400
4401bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4402 bool ForCodeSize) const {
4403 // TODO: Maybe need more checks here after vector extension is supported.
4404 if (VT == MVT::f32 && !Subtarget.hasBasicF())
4405 return false;
4406 if (VT == MVT::f64 && !Subtarget.hasBasicD())
4407 return false;
4408 return (Imm.isZero() || Imm.isExactlyValue(+1.0));
4409}
4410
4412 return true;
4413}
4414
4416 return true;
4417}
4418
4419bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4420 const Instruction *I) const {
4421 if (!Subtarget.is64Bit())
4422 return isa<LoadInst>(I) || isa<StoreInst>(I);
4423
4424 if (isa<LoadInst>(I))
4425 return true;
4426
4427 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4428 // require fences beacuse we can use amswap_db.[w/d].
4429 if (isa<StoreInst>(I)) {
4430 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
4431 return (Size == 8 || Size == 16);
4432 }
4433
4434 return false;
4435}
4436
4438 LLVMContext &Context,
4439 EVT VT) const {
4440 if (!VT.isVector())
4441 return getPointerTy(DL);
4443}
4444
4446 // TODO: Support vectors.
4447 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
4448}
4449
4451 const CallInst &I,
4452 MachineFunction &MF,
4453 unsigned Intrinsic) const {
4454 switch (Intrinsic) {
4455 default:
4456 return false;
4457 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4458 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4459 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4460 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4462 Info.memVT = MVT::i32;
4463 Info.ptrVal = I.getArgOperand(0);
4464 Info.offset = 0;
4465 Info.align = Align(4);
4468 return true;
4469 // TODO: Add more Intrinsics later.
4470 }
4471}
4472
4475 // TODO: Add more AtomicRMWInst that needs to be extended.
4476
4477 // Since floating-point operation requires a non-trivial set of data
4478 // operations, use CmpXChg to expand.
4479 if (AI->isFloatingPointOperation() ||
4483
4484 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4485 if (Size == 8 || Size == 16)
4488}
4489
4490static Intrinsic::ID
4492 AtomicRMWInst::BinOp BinOp) {
4493 if (GRLen == 64) {
4494 switch (BinOp) {
4495 default:
4496 llvm_unreachable("Unexpected AtomicRMW BinOp");
4498 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4499 case AtomicRMWInst::Add:
4500 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4501 case AtomicRMWInst::Sub:
4502 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4504 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4506 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4508 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4509 case AtomicRMWInst::Max:
4510 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4511 case AtomicRMWInst::Min:
4512 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4513 // TODO: support other AtomicRMWInst.
4514 }
4515 }
4516
4517 if (GRLen == 32) {
4518 switch (BinOp) {
4519 default:
4520 llvm_unreachable("Unexpected AtomicRMW BinOp");
4522 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4523 case AtomicRMWInst::Add:
4524 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4525 case AtomicRMWInst::Sub:
4526 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4528 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4529 // TODO: support other AtomicRMWInst.
4530 }
4531 }
4532
4533 llvm_unreachable("Unexpected GRLen\n");
4534}
4535
4538 AtomicCmpXchgInst *CI) const {
4540 if (Size == 8 || Size == 16)
4543}
4544
4546 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4547 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4548 AtomicOrdering FailOrd = CI->getFailureOrdering();
4549 Value *FailureOrdering =
4550 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
4551
4552 // TODO: Support cmpxchg on LA32.
4553 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4554 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4555 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4556 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4557 Type *Tys[] = {AlignedAddr->getType()};
4558 Function *MaskedCmpXchg =
4559 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4560 Value *Result = Builder.CreateCall(
4561 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
4562 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4563 return Result;
4564}
4565
4567 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4568 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4569 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4570 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4571 // mask, as this produces better code than the LL/SC loop emitted by
4572 // int_loongarch_masked_atomicrmw_xchg.
4573 if (AI->getOperation() == AtomicRMWInst::Xchg &&
4574 isa<ConstantInt>(AI->getValOperand())) {
4575 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4576 if (CVal->isZero())
4577 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4578 Builder.CreateNot(Mask, "Inv_Mask"),
4579 AI->getAlign(), Ord);
4580 if (CVal->isMinusOne())
4581 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4582 AI->getAlign(), Ord);
4583 }
4584
4585 unsigned GRLen = Subtarget.getGRLen();
4586 Value *Ordering =
4587 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
4588 Type *Tys[] = {AlignedAddr->getType()};
4589 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4590 AI->getModule(),
4592
4593 if (GRLen == 64) {
4594 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4595 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4596 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4597 }
4598
4599 Value *Result;
4600
4601 // Must pass the shift amount needed to sign extend the loaded value prior
4602 // to performing a signed comparison for min/max. ShiftAmt is the number of
4603 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4604 // is the number of bits to left+right shift the value in order to
4605 // sign-extend.
4606 if (AI->getOperation() == AtomicRMWInst::Min ||
4608 const DataLayout &DL = AI->getModule()->getDataLayout();
4609 unsigned ValWidth =
4610 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4611 Value *SextShamt =
4612 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
4613 Result = Builder.CreateCall(LlwOpScwLoop,
4614 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4615 } else {
4616 Result =
4617 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4618 }
4619
4620 if (GRLen == 64)
4621 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4622 return Result;
4623}
4624
4626 const MachineFunction &MF, EVT VT) const {
4627 VT = VT.getScalarType();
4628
4629 if (!VT.isSimple())
4630 return false;
4631
4632 switch (VT.getSimpleVT().SimpleTy) {
4633 case MVT::f32:
4634 case MVT::f64:
4635 return true;
4636 default:
4637 break;
4638 }
4639
4640 return false;
4641}
4642
4644 const Constant *PersonalityFn) const {
4645 return LoongArch::R4;
4646}
4647
4649 const Constant *PersonalityFn) const {
4650 return LoongArch::R5;
4651}
4652
4653//===----------------------------------------------------------------------===//
4654// LoongArch Inline Assembly Support
4655//===----------------------------------------------------------------------===//
4656
4658LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4659 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4660 //
4661 // 'f': A floating-point register (if available).
4662 // 'k': A memory operand whose address is formed by a base register and
4663 // (optionally scaled) index register.
4664 // 'l': A signed 16-bit constant.
4665 // 'm': A memory operand whose address is formed by a base register and
4666 // offset that is suitable for use in instructions with the same
4667 // addressing mode as st.w and ld.w.
4668 // 'I': A signed 12-bit constant (for arithmetic instructions).
4669 // 'J': Integer zero.
4670 // 'K': An unsigned 12-bit constant (for logic instructions).
4671 // "ZB": An address that is held in a general-purpose register. The offset is
4672 // zero.
4673 // "ZC": A memory operand whose address is formed by a base register and
4674 // offset that is suitable for use in instructions with the same
4675 // addressing mode as ll.w and sc.w.
4676 if (Constraint.size() == 1) {
4677 switch (Constraint[0]) {
4678 default:
4679 break;
4680 case 'f':
4681 return C_RegisterClass;
4682 case 'l':
4683 case 'I':
4684 case 'J':
4685 case 'K':
4686 return C_Immediate;
4687 case 'k':
4688 return C_Memory;
4689 }
4690 }
4691
4692 if (Constraint == "ZC" || Constraint == "ZB")
4693 return C_Memory;
4694
4695 // 'm' is handled here.
4696 return TargetLowering::getConstraintType(Constraint);
4697}
4698
4699InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4700 StringRef ConstraintCode) const {
4701 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4706}
4707
4708std::pair<unsigned, const TargetRegisterClass *>
4709LoongArchTargetLowering::getRegForInlineAsmConstraint(
4710 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4711 // First, see if this is a constraint that directly corresponds to a LoongArch
4712 // register class.
4713 if (Constraint.size() == 1) {
4714 switch (Constraint[0]) {
4715 case 'r':
4716 // TODO: Support fixed vectors up to GRLen?
4717 if (VT.isVector())
4718 break;
4719 return std::make_pair(0U, &LoongArch::GPRRegClass);
4720 case 'f':
4721 if (Subtarget.hasBasicF() && VT == MVT::f32)
4722 return std::make_pair(0U, &LoongArch::FPR32RegClass);
4723 if (Subtarget.hasBasicD() && VT == MVT::f64)
4724 return std::make_pair(0U, &LoongArch::FPR64RegClass);
4725 if (Subtarget.hasExtLSX() &&
4726 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
4727 return std::make_pair(0U, &LoongArch::LSX128RegClass);
4728 if (Subtarget.hasExtLASX() &&
4729 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
4730 return std::make_pair(0U, &LoongArch::LASX256RegClass);
4731 break;
4732 default:
4733 break;
4734 }
4735 }
4736
4737 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
4738 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
4739 // constraints while the official register name is prefixed with a '$'. So we
4740 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
4741 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
4742 // case insensitive, so no need to convert the constraint to upper case here.
4743 //
4744 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
4745 // decode the usage of register name aliases into their official names. And
4746 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
4747 // official register names.
4748 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
4749 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
4750 bool IsFP = Constraint[2] == 'f';
4751 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
4752 std::pair<unsigned, const TargetRegisterClass *> R;
4754 TRI, join_items("", Temp.first, Temp.second), VT);
4755 // Match those names to the widest floating point register type available.
4756 if (IsFP) {
4757 unsigned RegNo = R.first;
4758 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
4759 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
4760 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
4761 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
4762 }
4763 }
4764 }
4765 return R;
4766 }
4767
4768 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4769}
4770
4771void LoongArchTargetLowering::LowerAsmOperandForConstraint(
4772 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
4773 SelectionDAG &DAG) const {
4774 // Currently only support length 1 constraints.
4775 if (Constraint.size() == 1) {
4776 switch (Constraint[0]) {
4777 case 'l':
4778 // Validate & create a 16-bit signed immediate operand.
4779 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4780 uint64_t CVal = C->getSExtValue();
4781 if (isInt<16>(CVal))
4782 Ops.push_back(
4783 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4784 }
4785 return;
4786 case 'I':
4787 // Validate & create a 12-bit signed immediate operand.
4788 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4789 uint64_t CVal = C->getSExtValue();
4790 if (isInt<12>(CVal))
4791 Ops.push_back(
4792 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4793 }
4794 return;
4795 case 'J':
4796 // Validate & create an integer zero operand.
4797 if (auto *C = dyn_cast<ConstantSDNode>(Op))
4798 if (C->getZExtValue() == 0)
4799 Ops.push_back(
4800 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
4801 return;
4802 case 'K':
4803 // Validate & create a 12-bit unsigned immediate operand.
4804 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4805 uint64_t CVal = C->getZExtValue();
4806 if (isUInt<12>(CVal))
4807 Ops.push_back(
4808 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4809 }
4810 return;
4811 default:
4812 break;
4813 }
4814 }
4815 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4816}
4817
4818#define GET_REGISTER_MATCHER
4819#include "LoongArchGenAsmMatcher.inc"
4820
4823 const MachineFunction &MF) const {
4824 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
4825 std::string NewRegName = Name.second.str();
4826 Register Reg = MatchRegisterAltName(NewRegName);
4827 if (Reg == LoongArch::NoRegister)
4828 Reg = MatchRegisterName(NewRegName);
4829 if (Reg == LoongArch::NoRegister)
4831 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4832 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4833 if (!ReservedRegs.test(Reg))
4834 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4835 StringRef(RegName) + "\"."));
4836 return Reg;
4837}
4838
4840 EVT VT, SDValue C) const {
4841 // TODO: Support vectors.
4842 if (!VT.isScalarInteger())
4843 return false;
4844
4845 // Omit the optimization if the data size exceeds GRLen.
4846 if (VT.getSizeInBits() > Subtarget.getGRLen())
4847 return false;
4848
4849 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4850 const APInt &Imm = ConstNode->getAPIntValue();
4851 // Break MUL into (SLLI + ADD/SUB) or ALSL.
4852 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4853 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4854 return true;
4855 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
4856 if (ConstNode->hasOneUse() &&
4857 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
4858 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
4859 return true;
4860 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
4861 // in which the immediate has two set bits. Or Break (MUL x, imm)
4862 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
4863 // equals to (1 << s0) - (1 << s1).
4864 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
4865 unsigned Shifts = Imm.countr_zero();
4866 // Reject immediates which can be composed via a single LUI.
4867 if (Shifts >= 12)
4868 return false;
4869 // Reject multiplications can be optimized to
4870 // (SLLI (ALSL x, x, 1/2/3/4), s).
4871 APInt ImmPop = Imm.ashr(Shifts);
4872 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
4873 return false;
4874 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
4875 // since it needs one more instruction than other 3 cases.
4876 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
4877 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
4878 (ImmSmall - Imm).isPowerOf2())
4879 return true;
4880 }
4881 }
4882
4883 return false;
4884}
4885
4887 const AddrMode &AM,
4888 Type *Ty, unsigned AS,
4889 Instruction *I) const {
4890 // LoongArch has four basic addressing modes:
4891 // 1. reg
4892 // 2. reg + 12-bit signed offset
4893 // 3. reg + 14-bit signed offset left-shifted by 2
4894 // 4. reg1 + reg2
4895 // TODO: Add more checks after support vector extension.
4896
4897 // No global is ever allowed as a base.
4898 if (AM.BaseGV)
4899 return false;
4900
4901 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
4902 // with `UAL` feature.
4903 if (!isInt<12>(AM.BaseOffs) &&
4904 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
4905 return false;
4906
4907 switch (AM.Scale) {
4908 case 0:
4909 // "r+i" or just "i", depending on HasBaseReg.
4910 break;
4911 case 1:
4912 // "r+r+i" is not allowed.
4913 if (AM.HasBaseReg && AM.BaseOffs)
4914 return false;
4915 // Otherwise we have "r+r" or "r+i".
4916 break;
4917 case 2:
4918 // "2*r+r" or "2*r+i" is not allowed.
4919 if (AM.HasBaseReg || AM.BaseOffs)
4920 return false;
4921 // Allow "2*r" as "r+r".
4922 break;
4923 default:
4924 return false;
4925 }
4926
4927 return true;
4928}
4929
4931 return isInt<12>(Imm);
4932}
4933
4935 return isInt<12>(Imm);
4936}
4937
4939 // Zexts are free if they can be combined with a load.
4940 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
4941 // poorly with type legalization of compares preferring sext.
4942 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
4943 EVT MemVT = LD->getMemoryVT();
4944 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
4945 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
4946 LD->getExtensionType() == ISD::ZEXTLOAD))
4947 return true;
4948 }
4949
4950 return TargetLowering::isZExtFree(Val, VT2);
4951}
4952
4954 EVT DstVT) const {
4955 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
4956}
4957
4959 // TODO: Support vectors.
4960 if (Y.getValueType().isVector())
4961 return false;
4962
4963 return !isa<ConstantSDNode>(Y);
4964}
4965
4967 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
4968 return ISD::SIGN_EXTEND;
4969}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define NODE_NAME_CASE(node)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:638
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:202
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
bool isDSOLocal() const
Definition: GlobalValue.h:305
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:83
Class to represent integer types.
Definition: DerivedTypes.h:40
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
Align getPrefLoopAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:692
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:986
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:985
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1218
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:675
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1153
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1471
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:269
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:177
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:208
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
Align getNonZeroOrigAlign() const
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)