LLVM 19.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
22#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
139
142 }
143
144 // Set operations for LA32 only.
145
146 if (!Subtarget.is64Bit()) {
152
153 // Set libcalls.
154 setLibcallName(RTLIB::MUL_I128, nullptr);
155 // The MULO libcall is not part of libgcc, only compiler-rt.
156 setLibcallName(RTLIB::MULO_I64, nullptr);
157 }
158
159 // The MULO libcall is not part of libgcc, only compiler-rt.
160 setLibcallName(RTLIB::MULO_I128, nullptr);
161
163
164 static const ISD::CondCode FPCCToExpand[] = {
167
168 // Set operations for 'F' feature.
169
170 if (Subtarget.hasBasicF()) {
171 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172
186
187 if (Subtarget.is64Bit())
189
190 if (!Subtarget.hasBasicD()) {
192 if (Subtarget.is64Bit()) {
195 }
196 }
197 }
198
199 // Set operations for 'D' feature.
200
201 if (Subtarget.hasBasicD()) {
202 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205
219
220 if (Subtarget.is64Bit())
222 }
223
224 // Set operations for 'LSX' feature.
225
226 if (Subtarget.hasExtLSX()) {
228 // Expand all truncating stores and extending loads.
229 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230 setTruncStoreAction(VT, InnerVT, Expand);
233 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234 }
235 // By default everything must be expanded. Then we will selectively turn
236 // on ones that can be effectively codegen'd.
237 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
239 }
240
241 for (MVT VT : LSXVTs) {
245
249
252 }
253 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
257 Legal);
259 VT, Legal);
266 Expand);
267 }
268 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
271 }
272 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
280 VT, Expand);
281 }
282 }
283
284 // Set operations for 'LASX' feature.
285
286 if (Subtarget.hasExtLASX()) {
287 for (MVT VT : LASXVTs) {
291
295
298 }
299 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
303 Legal);
305 VT, Legal);
312 Expand);
313 }
314 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
317 }
318 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
326 VT, Expand);
327 }
328 }
329
330 // Set DAG combine for LA32 and LA64.
331
335
336 // Set DAG combine for 'LSX' feature.
337
338 if (Subtarget.hasExtLSX())
340
341 // Compute derived properties from the register classes.
343
345
348
350
352
353 // Function alignments.
355 // Set preferred alignments.
359}
360
362 const GlobalAddressSDNode *GA) const {
363 // In order to maximise the opportunity for common subexpression elimination,
364 // keep a separate ADD node for the global address offset instead of folding
365 // it in the global address node. Later peephole optimisations may choose to
366 // fold it back in when profitable.
367 return false;
368}
369
371 SelectionDAG &DAG) const {
372 switch (Op.getOpcode()) {
374 return lowerATOMIC_FENCE(Op, DAG);
376 return lowerEH_DWARF_CFA(Op, DAG);
378 return lowerGlobalAddress(Op, DAG);
380 return lowerGlobalTLSAddress(Op, DAG);
382 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
384 return lowerINTRINSIC_W_CHAIN(Op, DAG);
386 return lowerINTRINSIC_VOID(Op, DAG);
388 return lowerBlockAddress(Op, DAG);
389 case ISD::JumpTable:
390 return lowerJumpTable(Op, DAG);
391 case ISD::SHL_PARTS:
392 return lowerShiftLeftParts(Op, DAG);
393 case ISD::SRA_PARTS:
394 return lowerShiftRightParts(Op, DAG, true);
395 case ISD::SRL_PARTS:
396 return lowerShiftRightParts(Op, DAG, false);
398 return lowerConstantPool(Op, DAG);
399 case ISD::FP_TO_SINT:
400 return lowerFP_TO_SINT(Op, DAG);
401 case ISD::BITCAST:
402 return lowerBITCAST(Op, DAG);
403 case ISD::UINT_TO_FP:
404 return lowerUINT_TO_FP(Op, DAG);
405 case ISD::SINT_TO_FP:
406 return lowerSINT_TO_FP(Op, DAG);
407 case ISD::VASTART:
408 return lowerVASTART(Op, DAG);
409 case ISD::FRAMEADDR:
410 return lowerFRAMEADDR(Op, DAG);
411 case ISD::RETURNADDR:
412 return lowerRETURNADDR(Op, DAG);
414 return lowerWRITE_REGISTER(Op, DAG);
416 return lowerINSERT_VECTOR_ELT(Op, DAG);
418 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
420 return lowerBUILD_VECTOR(Op, DAG);
422 return lowerVECTOR_SHUFFLE(Op, DAG);
423 }
424 return SDValue();
425}
426
427SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
428 SelectionDAG &DAG) const {
429 // TODO: custom shuffle.
430 return SDValue();
431}
432
433static bool isConstantOrUndef(const SDValue Op) {
434 if (Op->isUndef())
435 return true;
436 if (isa<ConstantSDNode>(Op))
437 return true;
438 if (isa<ConstantFPSDNode>(Op))
439 return true;
440 return false;
441}
442
444 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
445 if (isConstantOrUndef(Op->getOperand(i)))
446 return true;
447 return false;
448}
449
450SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
451 SelectionDAG &DAG) const {
452 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
453 EVT ResTy = Op->getValueType(0);
454 SDLoc DL(Op);
455 APInt SplatValue, SplatUndef;
456 unsigned SplatBitSize;
457 bool HasAnyUndefs;
458 bool Is128Vec = ResTy.is128BitVector();
459 bool Is256Vec = ResTy.is256BitVector();
460
461 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
462 (!Subtarget.hasExtLASX() || !Is256Vec))
463 return SDValue();
464
465 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
466 /*MinSplatBits=*/8) &&
467 SplatBitSize <= 64) {
468 // We can only cope with 8, 16, 32, or 64-bit elements.
469 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
470 SplatBitSize != 64)
471 return SDValue();
472
473 EVT ViaVecTy;
474
475 switch (SplatBitSize) {
476 default:
477 return SDValue();
478 case 8:
479 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
480 break;
481 case 16:
482 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
483 break;
484 case 32:
485 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
486 break;
487 case 64:
488 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
489 break;
490 }
491
492 // SelectionDAG::getConstant will promote SplatValue appropriately.
493 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
494
495 // Bitcast to the type we originally wanted.
496 if (ViaVecTy != ResTy)
497 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
498
499 return Result;
500 }
501
502 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
503 return Op;
504
506 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
507 // The resulting code is the same length as the expansion, but it doesn't
508 // use memory operations.
509 EVT ResTy = Node->getValueType(0);
510
511 assert(ResTy.isVector());
512
513 unsigned NumElts = ResTy.getVectorNumElements();
514 SDValue Vector = DAG.getUNDEF(ResTy);
515 for (unsigned i = 0; i < NumElts; ++i) {
517 Node->getOperand(i),
518 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
519 }
520 return Vector;
521 }
522
523 return SDValue();
524}
525
527LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
528 SelectionDAG &DAG) const {
529 EVT VecTy = Op->getOperand(0)->getValueType(0);
530 SDValue Idx = Op->getOperand(1);
531 EVT EltTy = VecTy.getVectorElementType();
532 unsigned NumElts = VecTy.getVectorNumElements();
533
534 if (isa<ConstantSDNode>(Idx) &&
535 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
536 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
537 return Op;
538
539 return SDValue();
540}
541
543LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
544 SelectionDAG &DAG) const {
545 if (isa<ConstantSDNode>(Op->getOperand(2)))
546 return Op;
547 return SDValue();
548}
549
550SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
551 SelectionDAG &DAG) const {
552 SDLoc DL(Op);
553 SyncScope::ID FenceSSID =
554 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
555
556 // singlethread fences only synchronize with signal handlers on the same
557 // thread and thus only need to preserve instruction order, not actually
558 // enforce memory ordering.
559 if (FenceSSID == SyncScope::SingleThread)
560 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
561 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
562
563 return Op;
564}
565
566SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
567 SelectionDAG &DAG) const {
568
569 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
570 DAG.getContext()->emitError(
571 "On LA64, only 64-bit registers can be written.");
572 return Op.getOperand(0);
573 }
574
575 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
576 DAG.getContext()->emitError(
577 "On LA32, only 32-bit registers can be written.");
578 return Op.getOperand(0);
579 }
580
581 return Op;
582}
583
584SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
585 SelectionDAG &DAG) const {
586 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
587 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
588 "be a constant integer");
589 return SDValue();
590 }
591
594 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
595 EVT VT = Op.getValueType();
596 SDLoc DL(Op);
597 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
598 unsigned Depth = Op.getConstantOperandVal(0);
599 int GRLenInBytes = Subtarget.getGRLen() / 8;
600
601 while (Depth--) {
602 int Offset = -(GRLenInBytes * 2);
603 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
605 FrameAddr =
606 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
607 }
608 return FrameAddr;
609}
610
611SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
612 SelectionDAG &DAG) const {
614 return SDValue();
615
616 // Currently only support lowering return address for current frame.
617 if (Op.getConstantOperandVal(0) != 0) {
618 DAG.getContext()->emitError(
619 "return address can only be determined for the current frame");
620 return SDValue();
621 }
622
625 MVT GRLenVT = Subtarget.getGRLenVT();
626
627 // Return the value of the return address register, marking it an implicit
628 // live-in.
629 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
630 getRegClassFor(GRLenVT));
631 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
632}
633
634SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
635 SelectionDAG &DAG) const {
637 auto Size = Subtarget.getGRLen() / 8;
638 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
639 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
640}
641
642SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
643 SelectionDAG &DAG) const {
645 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
646
647 SDLoc DL(Op);
648 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
650
651 // vastart just stores the address of the VarArgsFrameIndex slot into the
652 // memory location argument.
653 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
654 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
656}
657
658SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
659 SelectionDAG &DAG) const {
660 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
661 !Subtarget.hasBasicD() && "unexpected target features");
662
663 SDLoc DL(Op);
664 SDValue Op0 = Op.getOperand(0);
665 if (Op0->getOpcode() == ISD::AND) {
666 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
667 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
668 return Op;
669 }
670
671 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
672 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
673 Op0.getConstantOperandVal(2) == UINT64_C(0))
674 return Op;
675
676 if (Op0.getOpcode() == ISD::AssertZext &&
677 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
678 return Op;
679
680 EVT OpVT = Op0.getValueType();
681 EVT RetVT = Op.getValueType();
682 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
683 MakeLibCallOptions CallOptions;
684 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
685 SDValue Chain = SDValue();
687 std::tie(Result, Chain) =
688 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
689 return Result;
690}
691
692SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
693 SelectionDAG &DAG) const {
694 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
695 !Subtarget.hasBasicD() && "unexpected target features");
696
697 SDLoc DL(Op);
698 SDValue Op0 = Op.getOperand(0);
699
700 if ((Op0.getOpcode() == ISD::AssertSext ||
702 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
703 return Op;
704
705 EVT OpVT = Op0.getValueType();
706 EVT RetVT = Op.getValueType();
707 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
708 MakeLibCallOptions CallOptions;
709 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
710 SDValue Chain = SDValue();
712 std::tie(Result, Chain) =
713 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
714 return Result;
715}
716
717SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
718 SelectionDAG &DAG) const {
719
720 SDLoc DL(Op);
721 SDValue Op0 = Op.getOperand(0);
722
723 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
724 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
725 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
727 }
728 return Op;
729}
730
731SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
732 SelectionDAG &DAG) const {
733
734 SDLoc DL(Op);
735
736 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
737 !Subtarget.hasBasicD()) {
738 SDValue Dst =
739 DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
740 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
741 }
742
743 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
744 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
745 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
746}
747
749 SelectionDAG &DAG, unsigned Flags) {
750 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
751}
752
754 SelectionDAG &DAG, unsigned Flags) {
755 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
756 Flags);
757}
758
760 SelectionDAG &DAG, unsigned Flags) {
761 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
762 N->getOffset(), Flags);
763}
764
766 SelectionDAG &DAG, unsigned Flags) {
767 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
768}
769
770template <class NodeTy>
771SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
773 bool IsLocal) const {
774 SDLoc DL(N);
775 EVT Ty = getPointerTy(DAG.getDataLayout());
776 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
777
778 switch (M) {
779 default:
780 report_fatal_error("Unsupported code model");
781
782 case CodeModel::Large: {
783 assert(Subtarget.is64Bit() && "Large code model requires LA64");
784
785 // This is not actually used, but is necessary for successfully matching
786 // the PseudoLA_*_LARGE nodes.
787 SDValue Tmp = DAG.getConstant(0, DL, Ty);
788 if (IsLocal)
789 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
790 // eventually becomes the desired 5-insn code sequence.
791 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
792 Tmp, Addr),
793 0);
794
795 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
796 // becomes the desired 5-insn code sequence.
797 return SDValue(
798 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
799 0);
800 }
801
802 case CodeModel::Small:
804 if (IsLocal)
805 // This generates the pattern (PseudoLA_PCREL sym), which expands to
806 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
807 return SDValue(
808 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
809
810 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
811 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
812 return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
813 0);
814 }
815}
816
817SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
818 SelectionDAG &DAG) const {
819 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
820 DAG.getTarget().getCodeModel());
821}
822
823SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
824 SelectionDAG &DAG) const {
825 return getAddr(cast<JumpTableSDNode>(Op), DAG,
826 DAG.getTarget().getCodeModel());
827}
828
829SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
830 SelectionDAG &DAG) const {
831 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
832 DAG.getTarget().getCodeModel());
833}
834
835SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
836 SelectionDAG &DAG) const {
837 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
838 assert(N->getOffset() == 0 && "unexpected offset in global node");
839 auto CM = DAG.getTarget().getCodeModel();
840 const GlobalValue *GV = N->getGlobal();
841
842 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
843 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
844 CM = *GCM;
845 }
846
847 return getAddr(N, DAG, CM, GV->isDSOLocal());
848}
849
850SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
851 SelectionDAG &DAG,
852 unsigned Opc,
853 bool Large) const {
854 SDLoc DL(N);
855 EVT Ty = getPointerTy(DAG.getDataLayout());
856 MVT GRLenVT = Subtarget.getGRLenVT();
857
858 // This is not actually used, but is necessary for successfully matching the
859 // PseudoLA_*_LARGE nodes.
860 SDValue Tmp = DAG.getConstant(0, DL, Ty);
861 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
863 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
864 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
865
866 // Add the thread pointer.
867 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
868 DAG.getRegister(LoongArch::R2, GRLenVT));
869}
870
871SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
872 SelectionDAG &DAG,
873 unsigned Opc,
874 bool Large) const {
875 SDLoc DL(N);
876 EVT Ty = getPointerTy(DAG.getDataLayout());
877 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
878
879 // This is not actually used, but is necessary for successfully matching the
880 // PseudoLA_*_LARGE nodes.
881 SDValue Tmp = DAG.getConstant(0, DL, Ty);
882
883 // Use a PC-relative addressing mode to access the dynamic GOT address.
884 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
885 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
886 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
887
888 // Prepare argument list to generate call.
890 ArgListEntry Entry;
891 Entry.Node = Load;
892 Entry.Ty = CallTy;
893 Args.push_back(Entry);
894
895 // Setup call to __tls_get_addr.
897 CLI.setDebugLoc(DL)
898 .setChain(DAG.getEntryNode())
899 .setLibCallee(CallingConv::C, CallTy,
900 DAG.getExternalSymbol("__tls_get_addr", Ty),
901 std::move(Args));
902
903 return LowerCallTo(CLI).first;
904}
905
907LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
908 SelectionDAG &DAG) const {
911 report_fatal_error("In GHC calling convention TLS is not supported");
912
914 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
915
916 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
917 assert(N->getOffset() == 0 && "unexpected offset in global node");
918
920 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
922 // In this model, application code calls the dynamic linker function
923 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
924 // runtime.
925 Addr = getDynamicTLSAddr(N, DAG,
926 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
927 : LoongArch::PseudoLA_TLS_GD,
928 Large);
929 break;
931 // Same as GeneralDynamic, except for assembly modifiers and relocation
932 // records.
933 Addr = getDynamicTLSAddr(N, DAG,
934 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
935 : LoongArch::PseudoLA_TLS_LD,
936 Large);
937 break;
939 // This model uses the GOT to resolve TLS offsets.
940 Addr = getStaticTLSAddr(N, DAG,
941 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
942 : LoongArch::PseudoLA_TLS_IE,
943 Large);
944 break;
946 // This model is used when static linking as the TLS offsets are resolved
947 // during program linking.
948 //
949 // This node doesn't need an extra argument for the large code model.
950 Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
951 break;
952 }
953
954 return Addr;
955}
956
957template <unsigned N>
958static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
959 SelectionDAG &DAG, bool IsSigned = false) {
960 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
961 // Check the ImmArg.
962 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
963 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
964 DAG.getContext()->emitError(Op->getOperationName(0) +
965 ": argument out of range.");
966 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
967 }
968 return SDValue();
969}
970
972LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
973 SelectionDAG &DAG) const {
974 SDLoc DL(Op);
975 switch (Op.getConstantOperandVal(0)) {
976 default:
977 return SDValue(); // Don't custom lower most intrinsics.
978 case Intrinsic::thread_pointer: {
979 EVT PtrVT = getPointerTy(DAG.getDataLayout());
980 return DAG.getRegister(LoongArch::R2, PtrVT);
981 }
982 case Intrinsic::loongarch_lsx_vpickve2gr_d:
983 case Intrinsic::loongarch_lsx_vpickve2gr_du:
984 case Intrinsic::loongarch_lsx_vreplvei_d:
985 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
986 return checkIntrinsicImmArg<1>(Op, 2, DAG);
987 case Intrinsic::loongarch_lsx_vreplvei_w:
988 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
989 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
990 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
991 case Intrinsic::loongarch_lasx_xvpickve_d:
992 case Intrinsic::loongarch_lasx_xvpickve_d_f:
993 return checkIntrinsicImmArg<2>(Op, 2, DAG);
994 case Intrinsic::loongarch_lasx_xvinsve0_d:
995 return checkIntrinsicImmArg<2>(Op, 3, DAG);
996 case Intrinsic::loongarch_lsx_vsat_b:
997 case Intrinsic::loongarch_lsx_vsat_bu:
998 case Intrinsic::loongarch_lsx_vrotri_b:
999 case Intrinsic::loongarch_lsx_vsllwil_h_b:
1000 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1001 case Intrinsic::loongarch_lsx_vsrlri_b:
1002 case Intrinsic::loongarch_lsx_vsrari_b:
1003 case Intrinsic::loongarch_lsx_vreplvei_h:
1004 case Intrinsic::loongarch_lasx_xvsat_b:
1005 case Intrinsic::loongarch_lasx_xvsat_bu:
1006 case Intrinsic::loongarch_lasx_xvrotri_b:
1007 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1008 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1009 case Intrinsic::loongarch_lasx_xvsrlri_b:
1010 case Intrinsic::loongarch_lasx_xvsrari_b:
1011 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1012 case Intrinsic::loongarch_lasx_xvpickve_w:
1013 case Intrinsic::loongarch_lasx_xvpickve_w_f:
1014 return checkIntrinsicImmArg<3>(Op, 2, DAG);
1015 case Intrinsic::loongarch_lasx_xvinsve0_w:
1016 return checkIntrinsicImmArg<3>(Op, 3, DAG);
1017 case Intrinsic::loongarch_lsx_vsat_h:
1018 case Intrinsic::loongarch_lsx_vsat_hu:
1019 case Intrinsic::loongarch_lsx_vrotri_h:
1020 case Intrinsic::loongarch_lsx_vsllwil_w_h:
1021 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1022 case Intrinsic::loongarch_lsx_vsrlri_h:
1023 case Intrinsic::loongarch_lsx_vsrari_h:
1024 case Intrinsic::loongarch_lsx_vreplvei_b:
1025 case Intrinsic::loongarch_lasx_xvsat_h:
1026 case Intrinsic::loongarch_lasx_xvsat_hu:
1027 case Intrinsic::loongarch_lasx_xvrotri_h:
1028 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1029 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1030 case Intrinsic::loongarch_lasx_xvsrlri_h:
1031 case Intrinsic::loongarch_lasx_xvsrari_h:
1032 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1033 return checkIntrinsicImmArg<4>(Op, 2, DAG);
1034 case Intrinsic::loongarch_lsx_vsrlni_b_h:
1035 case Intrinsic::loongarch_lsx_vsrani_b_h:
1036 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1037 case Intrinsic::loongarch_lsx_vsrarni_b_h:
1038 case Intrinsic::loongarch_lsx_vssrlni_b_h:
1039 case Intrinsic::loongarch_lsx_vssrani_b_h:
1040 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1041 case Intrinsic::loongarch_lsx_vssrani_bu_h:
1042 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1043 case Intrinsic::loongarch_lsx_vssrarni_b_h:
1044 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1045 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1046 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1047 case Intrinsic::loongarch_lasx_xvsrani_b_h:
1048 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1049 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1050 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1051 case Intrinsic::loongarch_lasx_xvssrani_b_h:
1052 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1053 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1054 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1055 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1056 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1057 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1058 return checkIntrinsicImmArg<4>(Op, 3, DAG);
1059 case Intrinsic::loongarch_lsx_vsat_w:
1060 case Intrinsic::loongarch_lsx_vsat_wu:
1061 case Intrinsic::loongarch_lsx_vrotri_w:
1062 case Intrinsic::loongarch_lsx_vsllwil_d_w:
1063 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1064 case Intrinsic::loongarch_lsx_vsrlri_w:
1065 case Intrinsic::loongarch_lsx_vsrari_w:
1066 case Intrinsic::loongarch_lsx_vslei_bu:
1067 case Intrinsic::loongarch_lsx_vslei_hu:
1068 case Intrinsic::loongarch_lsx_vslei_wu:
1069 case Intrinsic::loongarch_lsx_vslei_du:
1070 case Intrinsic::loongarch_lsx_vslti_bu:
1071 case Intrinsic::loongarch_lsx_vslti_hu:
1072 case Intrinsic::loongarch_lsx_vslti_wu:
1073 case Intrinsic::loongarch_lsx_vslti_du:
1074 case Intrinsic::loongarch_lsx_vbsll_v:
1075 case Intrinsic::loongarch_lsx_vbsrl_v:
1076 case Intrinsic::loongarch_lasx_xvsat_w:
1077 case Intrinsic::loongarch_lasx_xvsat_wu:
1078 case Intrinsic::loongarch_lasx_xvrotri_w:
1079 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1080 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1081 case Intrinsic::loongarch_lasx_xvsrlri_w:
1082 case Intrinsic::loongarch_lasx_xvsrari_w:
1083 case Intrinsic::loongarch_lasx_xvslei_bu:
1084 case Intrinsic::loongarch_lasx_xvslei_hu:
1085 case Intrinsic::loongarch_lasx_xvslei_wu:
1086 case Intrinsic::loongarch_lasx_xvslei_du:
1087 case Intrinsic::loongarch_lasx_xvslti_bu:
1088 case Intrinsic::loongarch_lasx_xvslti_hu:
1089 case Intrinsic::loongarch_lasx_xvslti_wu:
1090 case Intrinsic::loongarch_lasx_xvslti_du:
1091 case Intrinsic::loongarch_lasx_xvbsll_v:
1092 case Intrinsic::loongarch_lasx_xvbsrl_v:
1093 return checkIntrinsicImmArg<5>(Op, 2, DAG);
1094 case Intrinsic::loongarch_lsx_vseqi_b:
1095 case Intrinsic::loongarch_lsx_vseqi_h:
1096 case Intrinsic::loongarch_lsx_vseqi_w:
1097 case Intrinsic::loongarch_lsx_vseqi_d:
1098 case Intrinsic::loongarch_lsx_vslei_b:
1099 case Intrinsic::loongarch_lsx_vslei_h:
1100 case Intrinsic::loongarch_lsx_vslei_w:
1101 case Intrinsic::loongarch_lsx_vslei_d:
1102 case Intrinsic::loongarch_lsx_vslti_b:
1103 case Intrinsic::loongarch_lsx_vslti_h:
1104 case Intrinsic::loongarch_lsx_vslti_w:
1105 case Intrinsic::loongarch_lsx_vslti_d:
1106 case Intrinsic::loongarch_lasx_xvseqi_b:
1107 case Intrinsic::loongarch_lasx_xvseqi_h:
1108 case Intrinsic::loongarch_lasx_xvseqi_w:
1109 case Intrinsic::loongarch_lasx_xvseqi_d:
1110 case Intrinsic::loongarch_lasx_xvslei_b:
1111 case Intrinsic::loongarch_lasx_xvslei_h:
1112 case Intrinsic::loongarch_lasx_xvslei_w:
1113 case Intrinsic::loongarch_lasx_xvslei_d:
1114 case Intrinsic::loongarch_lasx_xvslti_b:
1115 case Intrinsic::loongarch_lasx_xvslti_h:
1116 case Intrinsic::loongarch_lasx_xvslti_w:
1117 case Intrinsic::loongarch_lasx_xvslti_d:
1118 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1119 case Intrinsic::loongarch_lsx_vsrlni_h_w:
1120 case Intrinsic::loongarch_lsx_vsrani_h_w:
1121 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1122 case Intrinsic::loongarch_lsx_vsrarni_h_w:
1123 case Intrinsic::loongarch_lsx_vssrlni_h_w:
1124 case Intrinsic::loongarch_lsx_vssrani_h_w:
1125 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1126 case Intrinsic::loongarch_lsx_vssrani_hu_w:
1127 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1128 case Intrinsic::loongarch_lsx_vssrarni_h_w:
1129 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1130 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1131 case Intrinsic::loongarch_lsx_vfrstpi_b:
1132 case Intrinsic::loongarch_lsx_vfrstpi_h:
1133 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1134 case Intrinsic::loongarch_lasx_xvsrani_h_w:
1135 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1136 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1137 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1138 case Intrinsic::loongarch_lasx_xvssrani_h_w:
1139 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1140 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1141 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1142 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1143 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1144 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1145 case Intrinsic::loongarch_lasx_xvfrstpi_b:
1146 case Intrinsic::loongarch_lasx_xvfrstpi_h:
1147 return checkIntrinsicImmArg<5>(Op, 3, DAG);
1148 case Intrinsic::loongarch_lsx_vsat_d:
1149 case Intrinsic::loongarch_lsx_vsat_du:
1150 case Intrinsic::loongarch_lsx_vrotri_d:
1151 case Intrinsic::loongarch_lsx_vsrlri_d:
1152 case Intrinsic::loongarch_lsx_vsrari_d:
1153 case Intrinsic::loongarch_lasx_xvsat_d:
1154 case Intrinsic::loongarch_lasx_xvsat_du:
1155 case Intrinsic::loongarch_lasx_xvrotri_d:
1156 case Intrinsic::loongarch_lasx_xvsrlri_d:
1157 case Intrinsic::loongarch_lasx_xvsrari_d:
1158 return checkIntrinsicImmArg<6>(Op, 2, DAG);
1159 case Intrinsic::loongarch_lsx_vsrlni_w_d:
1160 case Intrinsic::loongarch_lsx_vsrani_w_d:
1161 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1162 case Intrinsic::loongarch_lsx_vsrarni_w_d:
1163 case Intrinsic::loongarch_lsx_vssrlni_w_d:
1164 case Intrinsic::loongarch_lsx_vssrani_w_d:
1165 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1166 case Intrinsic::loongarch_lsx_vssrani_wu_d:
1167 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1168 case Intrinsic::loongarch_lsx_vssrarni_w_d:
1169 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1170 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1171 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1172 case Intrinsic::loongarch_lasx_xvsrani_w_d:
1173 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1174 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1175 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1176 case Intrinsic::loongarch_lasx_xvssrani_w_d:
1177 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1178 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1179 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1180 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1181 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1182 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1183 return checkIntrinsicImmArg<6>(Op, 3, DAG);
1184 case Intrinsic::loongarch_lsx_vsrlni_d_q:
1185 case Intrinsic::loongarch_lsx_vsrani_d_q:
1186 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1187 case Intrinsic::loongarch_lsx_vsrarni_d_q:
1188 case Intrinsic::loongarch_lsx_vssrlni_d_q:
1189 case Intrinsic::loongarch_lsx_vssrani_d_q:
1190 case Intrinsic::loongarch_lsx_vssrlni_du_q:
1191 case Intrinsic::loongarch_lsx_vssrani_du_q:
1192 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1193 case Intrinsic::loongarch_lsx_vssrarni_d_q:
1194 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1195 case Intrinsic::loongarch_lsx_vssrarni_du_q:
1196 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1197 case Intrinsic::loongarch_lasx_xvsrani_d_q:
1198 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1199 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1200 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1201 case Intrinsic::loongarch_lasx_xvssrani_d_q:
1202 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1203 case Intrinsic::loongarch_lasx_xvssrani_du_q:
1204 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1205 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1206 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1207 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1208 return checkIntrinsicImmArg<7>(Op, 3, DAG);
1209 case Intrinsic::loongarch_lsx_vnori_b:
1210 case Intrinsic::loongarch_lsx_vshuf4i_b:
1211 case Intrinsic::loongarch_lsx_vshuf4i_h:
1212 case Intrinsic::loongarch_lsx_vshuf4i_w:
1213 case Intrinsic::loongarch_lasx_xvnori_b:
1214 case Intrinsic::loongarch_lasx_xvshuf4i_b:
1215 case Intrinsic::loongarch_lasx_xvshuf4i_h:
1216 case Intrinsic::loongarch_lasx_xvshuf4i_w:
1217 case Intrinsic::loongarch_lasx_xvpermi_d:
1218 return checkIntrinsicImmArg<8>(Op, 2, DAG);
1219 case Intrinsic::loongarch_lsx_vshuf4i_d:
1220 case Intrinsic::loongarch_lsx_vpermi_w:
1221 case Intrinsic::loongarch_lsx_vbitseli_b:
1222 case Intrinsic::loongarch_lsx_vextrins_b:
1223 case Intrinsic::loongarch_lsx_vextrins_h:
1224 case Intrinsic::loongarch_lsx_vextrins_w:
1225 case Intrinsic::loongarch_lsx_vextrins_d:
1226 case Intrinsic::loongarch_lasx_xvshuf4i_d:
1227 case Intrinsic::loongarch_lasx_xvpermi_w:
1228 case Intrinsic::loongarch_lasx_xvpermi_q:
1229 case Intrinsic::loongarch_lasx_xvbitseli_b:
1230 case Intrinsic::loongarch_lasx_xvextrins_b:
1231 case Intrinsic::loongarch_lasx_xvextrins_h:
1232 case Intrinsic::loongarch_lasx_xvextrins_w:
1233 case Intrinsic::loongarch_lasx_xvextrins_d:
1234 return checkIntrinsicImmArg<8>(Op, 3, DAG);
1235 case Intrinsic::loongarch_lsx_vrepli_b:
1236 case Intrinsic::loongarch_lsx_vrepli_h:
1237 case Intrinsic::loongarch_lsx_vrepli_w:
1238 case Intrinsic::loongarch_lsx_vrepli_d:
1239 case Intrinsic::loongarch_lasx_xvrepli_b:
1240 case Intrinsic::loongarch_lasx_xvrepli_h:
1241 case Intrinsic::loongarch_lasx_xvrepli_w:
1242 case Intrinsic::loongarch_lasx_xvrepli_d:
1243 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1244 case Intrinsic::loongarch_lsx_vldi:
1245 case Intrinsic::loongarch_lasx_xvldi:
1246 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1247 }
1248}
1249
1250// Helper function that emits error message for intrinsics with chain and return
1251// merge values of a UNDEF and the chain.
1253 StringRef ErrorMsg,
1254 SelectionDAG &DAG) {
1255 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1256 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1257 SDLoc(Op));
1258}
1259
1260SDValue
1261LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1262 SelectionDAG &DAG) const {
1263 SDLoc DL(Op);
1264 MVT GRLenVT = Subtarget.getGRLenVT();
1265 EVT VT = Op.getValueType();
1266 SDValue Chain = Op.getOperand(0);
1267 const StringRef ErrorMsgOOR = "argument out of range";
1268 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1269 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1270
1271 switch (Op.getConstantOperandVal(1)) {
1272 default:
1273 return Op;
1274 case Intrinsic::loongarch_crc_w_b_w:
1275 case Intrinsic::loongarch_crc_w_h_w:
1276 case Intrinsic::loongarch_crc_w_w_w:
1277 case Intrinsic::loongarch_crc_w_d_w:
1278 case Intrinsic::loongarch_crcc_w_b_w:
1279 case Intrinsic::loongarch_crcc_w_h_w:
1280 case Intrinsic::loongarch_crcc_w_w_w:
1281 case Intrinsic::loongarch_crcc_w_d_w:
1282 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1283 case Intrinsic::loongarch_csrrd_w:
1284 case Intrinsic::loongarch_csrrd_d: {
1285 unsigned Imm = Op.getConstantOperandVal(2);
1286 return !isUInt<14>(Imm)
1287 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1288 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1289 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1290 }
1291 case Intrinsic::loongarch_csrwr_w:
1292 case Intrinsic::loongarch_csrwr_d: {
1293 unsigned Imm = Op.getConstantOperandVal(3);
1294 return !isUInt<14>(Imm)
1295 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1296 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1297 {Chain, Op.getOperand(2),
1298 DAG.getConstant(Imm, DL, GRLenVT)});
1299 }
1300 case Intrinsic::loongarch_csrxchg_w:
1301 case Intrinsic::loongarch_csrxchg_d: {
1302 unsigned Imm = Op.getConstantOperandVal(4);
1303 return !isUInt<14>(Imm)
1304 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1305 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1306 {Chain, Op.getOperand(2), Op.getOperand(3),
1307 DAG.getConstant(Imm, DL, GRLenVT)});
1308 }
1309 case Intrinsic::loongarch_iocsrrd_d: {
1310 return DAG.getNode(
1311 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1312 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1313 }
1314#define IOCSRRD_CASE(NAME, NODE) \
1315 case Intrinsic::loongarch_##NAME: { \
1316 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
1317 {Chain, Op.getOperand(2)}); \
1318 }
1319 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1320 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1321 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1322#undef IOCSRRD_CASE
1323 case Intrinsic::loongarch_cpucfg: {
1324 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1325 {Chain, Op.getOperand(2)});
1326 }
1327 case Intrinsic::loongarch_lddir_d: {
1328 unsigned Imm = Op.getConstantOperandVal(3);
1329 return !isUInt<8>(Imm)
1330 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1331 : Op;
1332 }
1333 case Intrinsic::loongarch_movfcsr2gr: {
1334 if (!Subtarget.hasBasicF())
1335 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1336 unsigned Imm = Op.getConstantOperandVal(2);
1337 return !isUInt<2>(Imm)
1338 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1339 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1340 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1341 }
1342 case Intrinsic::loongarch_lsx_vld:
1343 case Intrinsic::loongarch_lsx_vldrepl_b:
1344 case Intrinsic::loongarch_lasx_xvld:
1345 case Intrinsic::loongarch_lasx_xvldrepl_b:
1346 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1347 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1348 : SDValue();
1349 case Intrinsic::loongarch_lsx_vldrepl_h:
1350 case Intrinsic::loongarch_lasx_xvldrepl_h:
1351 return !isShiftedInt<11, 1>(
1352 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1354 Op, "argument out of range or not a multiple of 2", DAG)
1355 : SDValue();
1356 case Intrinsic::loongarch_lsx_vldrepl_w:
1357 case Intrinsic::loongarch_lasx_xvldrepl_w:
1358 return !isShiftedInt<10, 2>(
1359 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1361 Op, "argument out of range or not a multiple of 4", DAG)
1362 : SDValue();
1363 case Intrinsic::loongarch_lsx_vldrepl_d:
1364 case Intrinsic::loongarch_lasx_xvldrepl_d:
1365 return !isShiftedInt<9, 3>(
1366 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1368 Op, "argument out of range or not a multiple of 8", DAG)
1369 : SDValue();
1370 }
1371}
1372
1373// Helper function that emits error message for intrinsics with void return
1374// value and return the chain.
1376 SelectionDAG &DAG) {
1377
1378 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1379 return Op.getOperand(0);
1380}
1381
1382SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1383 SelectionDAG &DAG) const {
1384 SDLoc DL(Op);
1385 MVT GRLenVT = Subtarget.getGRLenVT();
1386 SDValue Chain = Op.getOperand(0);
1387 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1388 SDValue Op2 = Op.getOperand(2);
1389 const StringRef ErrorMsgOOR = "argument out of range";
1390 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1391 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1392 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1393
1394 switch (IntrinsicEnum) {
1395 default:
1396 // TODO: Add more Intrinsics.
1397 return SDValue();
1398 case Intrinsic::loongarch_cacop_d:
1399 case Intrinsic::loongarch_cacop_w: {
1400 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1401 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1402 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1403 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1404 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1405 unsigned Imm1 = Op2->getAsZExtVal();
1406 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1407 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1408 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1409 return Op;
1410 }
1411 case Intrinsic::loongarch_dbar: {
1412 unsigned Imm = Op2->getAsZExtVal();
1413 return !isUInt<15>(Imm)
1414 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1415 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1416 DAG.getConstant(Imm, DL, GRLenVT));
1417 }
1418 case Intrinsic::loongarch_ibar: {
1419 unsigned Imm = Op2->getAsZExtVal();
1420 return !isUInt<15>(Imm)
1421 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1422 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1423 DAG.getConstant(Imm, DL, GRLenVT));
1424 }
1425 case Intrinsic::loongarch_break: {
1426 unsigned Imm = Op2->getAsZExtVal();
1427 return !isUInt<15>(Imm)
1428 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1429 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1430 DAG.getConstant(Imm, DL, GRLenVT));
1431 }
1432 case Intrinsic::loongarch_movgr2fcsr: {
1433 if (!Subtarget.hasBasicF())
1434 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1435 unsigned Imm = Op2->getAsZExtVal();
1436 return !isUInt<2>(Imm)
1437 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1438 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1439 DAG.getConstant(Imm, DL, GRLenVT),
1440 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1441 Op.getOperand(3)));
1442 }
1443 case Intrinsic::loongarch_syscall: {
1444 unsigned Imm = Op2->getAsZExtVal();
1445 return !isUInt<15>(Imm)
1446 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1447 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1448 DAG.getConstant(Imm, DL, GRLenVT));
1449 }
1450#define IOCSRWR_CASE(NAME, NODE) \
1451 case Intrinsic::loongarch_##NAME: { \
1452 SDValue Op3 = Op.getOperand(3); \
1453 return Subtarget.is64Bit() \
1454 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
1455 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1456 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
1457 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
1458 Op3); \
1459 }
1460 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1461 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1462 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1463#undef IOCSRWR_CASE
1464 case Intrinsic::loongarch_iocsrwr_d: {
1465 return !Subtarget.is64Bit()
1466 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1467 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1468 Op2,
1469 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1470 Op.getOperand(3)));
1471 }
1472#define ASRT_LE_GT_CASE(NAME) \
1473 case Intrinsic::loongarch_##NAME: { \
1474 return !Subtarget.is64Bit() \
1475 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
1476 : Op; \
1477 }
1478 ASRT_LE_GT_CASE(asrtle_d)
1479 ASRT_LE_GT_CASE(asrtgt_d)
1480#undef ASRT_LE_GT_CASE
1481 case Intrinsic::loongarch_ldpte_d: {
1482 unsigned Imm = Op.getConstantOperandVal(3);
1483 return !Subtarget.is64Bit()
1484 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1485 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1486 : Op;
1487 }
1488 case Intrinsic::loongarch_lsx_vst:
1489 case Intrinsic::loongarch_lasx_xvst:
1490 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1491 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1492 : SDValue();
1493 case Intrinsic::loongarch_lasx_xvstelm_b:
1494 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1495 !isUInt<5>(Op.getConstantOperandVal(5)))
1496 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1497 : SDValue();
1498 case Intrinsic::loongarch_lsx_vstelm_b:
1499 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1500 !isUInt<4>(Op.getConstantOperandVal(5)))
1501 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1502 : SDValue();
1503 case Intrinsic::loongarch_lasx_xvstelm_h:
1504 return (!isShiftedInt<8, 1>(
1505 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1506 !isUInt<4>(Op.getConstantOperandVal(5)))
1508 Op, "argument out of range or not a multiple of 2", DAG)
1509 : SDValue();
1510 case Intrinsic::loongarch_lsx_vstelm_h:
1511 return (!isShiftedInt<8, 1>(
1512 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1513 !isUInt<3>(Op.getConstantOperandVal(5)))
1515 Op, "argument out of range or not a multiple of 2", DAG)
1516 : SDValue();
1517 case Intrinsic::loongarch_lasx_xvstelm_w:
1518 return (!isShiftedInt<8, 2>(
1519 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1520 !isUInt<3>(Op.getConstantOperandVal(5)))
1522 Op, "argument out of range or not a multiple of 4", DAG)
1523 : SDValue();
1524 case Intrinsic::loongarch_lsx_vstelm_w:
1525 return (!isShiftedInt<8, 2>(
1526 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1527 !isUInt<2>(Op.getConstantOperandVal(5)))
1529 Op, "argument out of range or not a multiple of 4", DAG)
1530 : SDValue();
1531 case Intrinsic::loongarch_lasx_xvstelm_d:
1532 return (!isShiftedInt<8, 3>(
1533 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1534 !isUInt<2>(Op.getConstantOperandVal(5)))
1536 Op, "argument out of range or not a multiple of 8", DAG)
1537 : SDValue();
1538 case Intrinsic::loongarch_lsx_vstelm_d:
1539 return (!isShiftedInt<8, 3>(
1540 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1541 !isUInt<1>(Op.getConstantOperandVal(5)))
1543 Op, "argument out of range or not a multiple of 8", DAG)
1544 : SDValue();
1545 }
1546}
1547
1548SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1549 SelectionDAG &DAG) const {
1550 SDLoc DL(Op);
1551 SDValue Lo = Op.getOperand(0);
1552 SDValue Hi = Op.getOperand(1);
1553 SDValue Shamt = Op.getOperand(2);
1554 EVT VT = Lo.getValueType();
1555
1556 // if Shamt-GRLen < 0: // Shamt < GRLen
1557 // Lo = Lo << Shamt
1558 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1559 // else:
1560 // Lo = 0
1561 // Hi = Lo << (Shamt-GRLen)
1562
1563 SDValue Zero = DAG.getConstant(0, DL, VT);
1564 SDValue One = DAG.getConstant(1, DL, VT);
1565 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1566 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1567 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1568 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1569
1570 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1571 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1572 SDValue ShiftRightLo =
1573 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1574 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1575 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1576 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1577
1578 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1579
1580 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1581 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1582
1583 SDValue Parts[2] = {Lo, Hi};
1584 return DAG.getMergeValues(Parts, DL);
1585}
1586
1587SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1588 SelectionDAG &DAG,
1589 bool IsSRA) const {
1590 SDLoc DL(Op);
1591 SDValue Lo = Op.getOperand(0);
1592 SDValue Hi = Op.getOperand(1);
1593 SDValue Shamt = Op.getOperand(2);
1594 EVT VT = Lo.getValueType();
1595
1596 // SRA expansion:
1597 // if Shamt-GRLen < 0: // Shamt < GRLen
1598 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1599 // Hi = Hi >>s Shamt
1600 // else:
1601 // Lo = Hi >>s (Shamt-GRLen);
1602 // Hi = Hi >>s (GRLen-1)
1603 //
1604 // SRL expansion:
1605 // if Shamt-GRLen < 0: // Shamt < GRLen
1606 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1607 // Hi = Hi >>u Shamt
1608 // else:
1609 // Lo = Hi >>u (Shamt-GRLen);
1610 // Hi = 0;
1611
1612 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1613
1614 SDValue Zero = DAG.getConstant(0, DL, VT);
1615 SDValue One = DAG.getConstant(1, DL, VT);
1616 SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1617 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1618 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1619 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1620
1621 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1622 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1623 SDValue ShiftLeftHi =
1624 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1625 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1626 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1627 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1628 SDValue HiFalse =
1629 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1630
1631 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1632
1633 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1634 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1635
1636 SDValue Parts[2] = {Lo, Hi};
1637 return DAG.getMergeValues(Parts, DL);
1638}
1639
1640// Returns the opcode of the target-specific SDNode that implements the 32-bit
1641// form of the given Opcode.
1643 switch (Opcode) {
1644 default:
1645 llvm_unreachable("Unexpected opcode");
1646 case ISD::SHL:
1647 return LoongArchISD::SLL_W;
1648 case ISD::SRA:
1649 return LoongArchISD::SRA_W;
1650 case ISD::SRL:
1651 return LoongArchISD::SRL_W;
1652 case ISD::ROTR:
1653 return LoongArchISD::ROTR_W;
1654 case ISD::ROTL:
1655 return LoongArchISD::ROTL_W;
1656 case ISD::CTTZ:
1657 return LoongArchISD::CTZ_W;
1658 case ISD::CTLZ:
1659 return LoongArchISD::CLZ_W;
1660 }
1661}
1662
1663// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1664// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1665// otherwise be promoted to i64, making it difficult to select the
1666// SLL_W/.../*W later one because the fact the operation was originally of
1667// type i8/i16/i32 is lost.
1669 unsigned ExtOpc = ISD::ANY_EXTEND) {
1670 SDLoc DL(N);
1671 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1672 SDValue NewOp0, NewRes;
1673
1674 switch (NumOp) {
1675 default:
1676 llvm_unreachable("Unexpected NumOp");
1677 case 1: {
1678 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1679 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1680 break;
1681 }
1682 case 2: {
1683 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1684 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1685 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1686 break;
1687 }
1688 // TODO:Handle more NumOp.
1689 }
1690
1691 // ReplaceNodeResults requires we maintain the same type for the return
1692 // value.
1693 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1694}
1695
1696// Helper function that emits error message for intrinsics with/without chain
1697// and return a UNDEF or and the chain as the results.
1700 StringRef ErrorMsg, bool WithChain = true) {
1701 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1702 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1703 if (!WithChain)
1704 return;
1705 Results.push_back(N->getOperand(0));
1706}
1707
1708template <unsigned N>
1709static void
1711 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1712 unsigned ResOp) {
1713 const StringRef ErrorMsgOOR = "argument out of range";
1714 unsigned Imm = Node->getConstantOperandVal(2);
1715 if (!isUInt<N>(Imm)) {
1717 /*WithChain=*/false);
1718 return;
1719 }
1720 SDLoc DL(Node);
1721 SDValue Vec = Node->getOperand(1);
1722
1723 SDValue PickElt =
1724 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1725 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1727 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1728 PickElt.getValue(0)));
1729}
1730
1733 SelectionDAG &DAG,
1734 const LoongArchSubtarget &Subtarget,
1735 unsigned ResOp) {
1736 SDLoc DL(N);
1737 SDValue Vec = N->getOperand(1);
1738
1739 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1740 Results.push_back(
1741 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1742}
1743
1744static void
1746 SelectionDAG &DAG,
1747 const LoongArchSubtarget &Subtarget) {
1748 switch (N->getConstantOperandVal(0)) {
1749 default:
1750 llvm_unreachable("Unexpected Intrinsic.");
1751 case Intrinsic::loongarch_lsx_vpickve2gr_b:
1752 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1754 break;
1755 case Intrinsic::loongarch_lsx_vpickve2gr_h:
1756 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1757 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1759 break;
1760 case Intrinsic::loongarch_lsx_vpickve2gr_w:
1761 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1763 break;
1764 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1765 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1767 break;
1768 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1769 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1770 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1772 break;
1773 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1774 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1776 break;
1777 case Intrinsic::loongarch_lsx_bz_b:
1778 case Intrinsic::loongarch_lsx_bz_h:
1779 case Intrinsic::loongarch_lsx_bz_w:
1780 case Intrinsic::loongarch_lsx_bz_d:
1781 case Intrinsic::loongarch_lasx_xbz_b:
1782 case Intrinsic::loongarch_lasx_xbz_h:
1783 case Intrinsic::loongarch_lasx_xbz_w:
1784 case Intrinsic::loongarch_lasx_xbz_d:
1785 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1787 break;
1788 case Intrinsic::loongarch_lsx_bz_v:
1789 case Intrinsic::loongarch_lasx_xbz_v:
1790 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1792 break;
1793 case Intrinsic::loongarch_lsx_bnz_b:
1794 case Intrinsic::loongarch_lsx_bnz_h:
1795 case Intrinsic::loongarch_lsx_bnz_w:
1796 case Intrinsic::loongarch_lsx_bnz_d:
1797 case Intrinsic::loongarch_lasx_xbnz_b:
1798 case Intrinsic::loongarch_lasx_xbnz_h:
1799 case Intrinsic::loongarch_lasx_xbnz_w:
1800 case Intrinsic::loongarch_lasx_xbnz_d:
1801 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1803 break;
1804 case Intrinsic::loongarch_lsx_bnz_v:
1805 case Intrinsic::loongarch_lasx_xbnz_v:
1806 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1808 break;
1809 }
1810}
1811
1814 SDLoc DL(N);
1815 EVT VT = N->getValueType(0);
1816 switch (N->getOpcode()) {
1817 default:
1818 llvm_unreachable("Don't know how to legalize this operation");
1819 case ISD::SHL:
1820 case ISD::SRA:
1821 case ISD::SRL:
1822 case ISD::ROTR:
1823 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1824 "Unexpected custom legalisation");
1825 if (N->getOperand(1).getOpcode() != ISD::Constant) {
1826 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1827 break;
1828 }
1829 break;
1830 case ISD::ROTL:
1831 ConstantSDNode *CN;
1832 if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1833 Results.push_back(customLegalizeToWOp(N, DAG, 2));
1834 break;
1835 }
1836 break;
1837 case ISD::FP_TO_SINT: {
1838 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1839 "Unexpected custom legalisation");
1840 SDValue Src = N->getOperand(0);
1841 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1842 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1844 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1845 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1846 return;
1847 }
1848 // If the FP type needs to be softened, emit a library call using the 'si'
1849 // version. If we left it to default legalization we'd end up with 'di'.
1850 RTLIB::Libcall LC;
1851 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1852 MakeLibCallOptions CallOptions;
1853 EVT OpVT = Src.getValueType();
1854 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1855 SDValue Chain = SDValue();
1856 SDValue Result;
1857 std::tie(Result, Chain) =
1858 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1859 Results.push_back(Result);
1860 break;
1861 }
1862 case ISD::BITCAST: {
1863 SDValue Src = N->getOperand(0);
1864 EVT SrcVT = Src.getValueType();
1865 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1866 Subtarget.hasBasicF()) {
1867 SDValue Dst =
1868 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1869 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1870 }
1871 break;
1872 }
1873 case ISD::FP_TO_UINT: {
1874 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1875 "Unexpected custom legalisation");
1876 auto &TLI = DAG.getTargetLoweringInfo();
1877 SDValue Tmp1, Tmp2;
1878 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1879 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1880 break;
1881 }
1882 case ISD::BSWAP: {
1883 SDValue Src = N->getOperand(0);
1884 assert((VT == MVT::i16 || VT == MVT::i32) &&
1885 "Unexpected custom legalization");
1886 MVT GRLenVT = Subtarget.getGRLenVT();
1887 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1888 SDValue Tmp;
1889 switch (VT.getSizeInBits()) {
1890 default:
1891 llvm_unreachable("Unexpected operand width");
1892 case 16:
1893 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1894 break;
1895 case 32:
1896 // Only LA64 will get to here due to the size mismatch between VT and
1897 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1898 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1899 break;
1900 }
1901 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1902 break;
1903 }
1904 case ISD::BITREVERSE: {
1905 SDValue Src = N->getOperand(0);
1906 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1907 "Unexpected custom legalization");
1908 MVT GRLenVT = Subtarget.getGRLenVT();
1909 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1910 SDValue Tmp;
1911 switch (VT.getSizeInBits()) {
1912 default:
1913 llvm_unreachable("Unexpected operand width");
1914 case 8:
1915 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1916 break;
1917 case 32:
1918 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1919 break;
1920 }
1921 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1922 break;
1923 }
1924 case ISD::CTLZ:
1925 case ISD::CTTZ: {
1926 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1927 "Unexpected custom legalisation");
1928 Results.push_back(customLegalizeToWOp(N, DAG, 1));
1929 break;
1930 }
1932 SDValue Chain = N->getOperand(0);
1933 SDValue Op2 = N->getOperand(2);
1934 MVT GRLenVT = Subtarget.getGRLenVT();
1935 const StringRef ErrorMsgOOR = "argument out of range";
1936 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1937 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1938
1939 switch (N->getConstantOperandVal(1)) {
1940 default:
1941 llvm_unreachable("Unexpected Intrinsic.");
1942 case Intrinsic::loongarch_movfcsr2gr: {
1943 if (!Subtarget.hasBasicF()) {
1944 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1945 return;
1946 }
1947 unsigned Imm = Op2->getAsZExtVal();
1948 if (!isUInt<2>(Imm)) {
1949 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1950 return;
1951 }
1952 SDValue MOVFCSR2GRResults = DAG.getNode(
1953 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1954 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1955 Results.push_back(
1956 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1957 Results.push_back(MOVFCSR2GRResults.getValue(1));
1958 break;
1959 }
1960#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
1961 case Intrinsic::loongarch_##NAME: { \
1962 SDValue NODE = DAG.getNode( \
1963 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1964 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
1965 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1966 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1967 Results.push_back(NODE.getValue(1)); \
1968 break; \
1969 }
1970 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1971 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1972 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1973 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1974 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1975 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1976#undef CRC_CASE_EXT_BINARYOP
1977
1978#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
1979 case Intrinsic::loongarch_##NAME: { \
1980 SDValue NODE = DAG.getNode( \
1981 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
1982 {Chain, Op2, \
1983 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
1984 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
1985 Results.push_back(NODE.getValue(1)); \
1986 break; \
1987 }
1988 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1989 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1990#undef CRC_CASE_EXT_UNARYOP
1991#define CSR_CASE(ID) \
1992 case Intrinsic::loongarch_##ID: { \
1993 if (!Subtarget.is64Bit()) \
1994 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
1995 break; \
1996 }
1997 CSR_CASE(csrrd_d);
1998 CSR_CASE(csrwr_d);
1999 CSR_CASE(csrxchg_d);
2000 CSR_CASE(iocsrrd_d);
2001#undef CSR_CASE
2002 case Intrinsic::loongarch_csrrd_w: {
2003 unsigned Imm = Op2->getAsZExtVal();
2004 if (!isUInt<14>(Imm)) {
2005 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2006 return;
2007 }
2008 SDValue CSRRDResults =
2009 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2010 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2011 Results.push_back(
2012 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2013 Results.push_back(CSRRDResults.getValue(1));
2014 break;
2015 }
2016 case Intrinsic::loongarch_csrwr_w: {
2017 unsigned Imm = N->getConstantOperandVal(3);
2018 if (!isUInt<14>(Imm)) {
2019 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2020 return;
2021 }
2022 SDValue CSRWRResults =
2023 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2024 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2025 DAG.getConstant(Imm, DL, GRLenVT)});
2026 Results.push_back(
2027 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2028 Results.push_back(CSRWRResults.getValue(1));
2029 break;
2030 }
2031 case Intrinsic::loongarch_csrxchg_w: {
2032 unsigned Imm = N->getConstantOperandVal(4);
2033 if (!isUInt<14>(Imm)) {
2034 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2035 return;
2036 }
2037 SDValue CSRXCHGResults = DAG.getNode(
2038 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2039 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2040 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2041 DAG.getConstant(Imm, DL, GRLenVT)});
2042 Results.push_back(
2043 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2044 Results.push_back(CSRXCHGResults.getValue(1));
2045 break;
2046 }
2047#define IOCSRRD_CASE(NAME, NODE) \
2048 case Intrinsic::loongarch_##NAME: { \
2049 SDValue IOCSRRDResults = \
2050 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2051 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2052 Results.push_back( \
2053 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
2054 Results.push_back(IOCSRRDResults.getValue(1)); \
2055 break; \
2056 }
2057 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2058 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2059 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2060#undef IOCSRRD_CASE
2061 case Intrinsic::loongarch_cpucfg: {
2062 SDValue CPUCFGResults =
2063 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2064 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2065 Results.push_back(
2066 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2067 Results.push_back(CPUCFGResults.getValue(1));
2068 break;
2069 }
2070 case Intrinsic::loongarch_lddir_d: {
2071 if (!Subtarget.is64Bit()) {
2072 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2073 return;
2074 }
2075 break;
2076 }
2077 }
2078 break;
2079 }
2080 case ISD::READ_REGISTER: {
2081 if (Subtarget.is64Bit())
2082 DAG.getContext()->emitError(
2083 "On LA64, only 64-bit registers can be read.");
2084 else
2085 DAG.getContext()->emitError(
2086 "On LA32, only 32-bit registers can be read.");
2087 Results.push_back(DAG.getUNDEF(VT));
2088 Results.push_back(N->getOperand(0));
2089 break;
2090 }
2092 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2093 break;
2094 }
2095 }
2096}
2097
2100 const LoongArchSubtarget &Subtarget) {
2101 if (DCI.isBeforeLegalizeOps())
2102 return SDValue();
2103
2104 SDValue FirstOperand = N->getOperand(0);
2105 SDValue SecondOperand = N->getOperand(1);
2106 unsigned FirstOperandOpc = FirstOperand.getOpcode();
2107 EVT ValTy = N->getValueType(0);
2108 SDLoc DL(N);
2109 uint64_t lsb, msb;
2110 unsigned SMIdx, SMLen;
2111 ConstantSDNode *CN;
2112 SDValue NewOperand;
2113 MVT GRLenVT = Subtarget.getGRLenVT();
2114
2115 // Op's second operand must be a shifted mask.
2116 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2117 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2118 return SDValue();
2119
2120 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2121 // Pattern match BSTRPICK.
2122 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2123 // => BSTRPICK $dst, $src, msb, lsb
2124 // where msb = lsb + len - 1
2125
2126 // The second operand of the shift must be an immediate.
2127 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2128 return SDValue();
2129
2130 lsb = CN->getZExtValue();
2131
2132 // Return if the shifted mask does not start at bit 0 or the sum of its
2133 // length and lsb exceeds the word's size.
2134 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2135 return SDValue();
2136
2137 NewOperand = FirstOperand.getOperand(0);
2138 } else {
2139 // Pattern match BSTRPICK.
2140 // $dst = and $src, (2**len- 1) , if len > 12
2141 // => BSTRPICK $dst, $src, msb, lsb
2142 // where lsb = 0 and msb = len - 1
2143
2144 // If the mask is <= 0xfff, andi can be used instead.
2145 if (CN->getZExtValue() <= 0xfff)
2146 return SDValue();
2147
2148 // Return if the MSB exceeds.
2149 if (SMIdx + SMLen > ValTy.getSizeInBits())
2150 return SDValue();
2151
2152 if (SMIdx > 0) {
2153 // Omit if the constant has more than 2 uses. This a conservative
2154 // decision. Whether it is a win depends on the HW microarchitecture.
2155 // However it should always be better for 1 and 2 uses.
2156 if (CN->use_size() > 2)
2157 return SDValue();
2158 // Return if the constant can be composed by a single LU12I.W.
2159 if ((CN->getZExtValue() & 0xfff) == 0)
2160 return SDValue();
2161 // Return if the constand can be composed by a single ADDI with
2162 // the zero register.
2163 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2164 return SDValue();
2165 }
2166
2167 lsb = SMIdx;
2168 NewOperand = FirstOperand;
2169 }
2170
2171 msb = lsb + SMLen - 1;
2172 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2173 DAG.getConstant(msb, DL, GRLenVT),
2174 DAG.getConstant(lsb, DL, GRLenVT));
2175 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2176 return NR0;
2177 // Try to optimize to
2178 // bstrpick $Rd, $Rs, msb, lsb
2179 // slli $Rd, $Rd, lsb
2180 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2181 DAG.getConstant(lsb, DL, GRLenVT));
2182}
2183
2186 const LoongArchSubtarget &Subtarget) {
2187 if (DCI.isBeforeLegalizeOps())
2188 return SDValue();
2189
2190 // $dst = srl (and $src, Mask), Shamt
2191 // =>
2192 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2193 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2194 //
2195
2196 SDValue FirstOperand = N->getOperand(0);
2197 ConstantSDNode *CN;
2198 EVT ValTy = N->getValueType(0);
2199 SDLoc DL(N);
2200 MVT GRLenVT = Subtarget.getGRLenVT();
2201 unsigned MaskIdx, MaskLen;
2202 uint64_t Shamt;
2203
2204 // The first operand must be an AND and the second operand of the AND must be
2205 // a shifted mask.
2206 if (FirstOperand.getOpcode() != ISD::AND ||
2207 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2208 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2209 return SDValue();
2210
2211 // The second operand (shift amount) must be an immediate.
2212 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2213 return SDValue();
2214
2215 Shamt = CN->getZExtValue();
2216 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2217 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2218 FirstOperand->getOperand(0),
2219 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2220 DAG.getConstant(Shamt, DL, GRLenVT));
2221
2222 return SDValue();
2223}
2224
2227 const LoongArchSubtarget &Subtarget) {
2228 MVT GRLenVT = Subtarget.getGRLenVT();
2229 EVT ValTy = N->getValueType(0);
2230 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2231 ConstantSDNode *CN0, *CN1;
2232 SDLoc DL(N);
2233 unsigned ValBits = ValTy.getSizeInBits();
2234 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2235 unsigned Shamt;
2236 bool SwapAndRetried = false;
2237
2238 if (DCI.isBeforeLegalizeOps())
2239 return SDValue();
2240
2241 if (ValBits != 32 && ValBits != 64)
2242 return SDValue();
2243
2244Retry:
2245 // 1st pattern to match BSTRINS:
2246 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
2247 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2248 // =>
2249 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2250 if (N0.getOpcode() == ISD::AND &&
2251 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2252 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2253 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2254 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2255 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2256 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2257 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2258 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2259 (MaskIdx0 + MaskLen0 <= ValBits)) {
2260 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2261 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2262 N1.getOperand(0).getOperand(0),
2263 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2264 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2265 }
2266
2267 // 2nd pattern to match BSTRINS:
2268 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
2269 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2270 // =>
2271 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2272 if (N0.getOpcode() == ISD::AND &&
2273 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2274 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2275 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2276 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2277 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2278 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2279 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2280 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2281 (MaskIdx0 + MaskLen0 <= ValBits)) {
2282 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2283 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2284 N1.getOperand(0).getOperand(0),
2285 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2286 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2287 }
2288
2289 // 3rd pattern to match BSTRINS:
2290 // R = or (and X, mask0), (and Y, mask1)
2291 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2292 // =>
2293 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2294 // where msb = lsb + size - 1
2295 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2296 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2297 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2298 (MaskIdx0 + MaskLen0 <= 64) &&
2299 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2300 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2301 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2302 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2303 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2304 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2305 DAG.getConstant(ValBits == 32
2306 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2307 : (MaskIdx0 + MaskLen0 - 1),
2308 DL, GRLenVT),
2309 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2310 }
2311
2312 // 4th pattern to match BSTRINS:
2313 // R = or (and X, mask), (shl Y, shamt)
2314 // where mask = (2**shamt - 1)
2315 // =>
2316 // R = BSTRINS X, Y, ValBits - 1, shamt
2317 // where ValBits = 32 or 64
2318 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2319 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2320 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2321 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2322 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2323 (MaskIdx0 + MaskLen0 <= ValBits)) {
2324 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2325 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2326 N1.getOperand(0),
2327 DAG.getConstant((ValBits - 1), DL, GRLenVT),
2328 DAG.getConstant(Shamt, DL, GRLenVT));
2329 }
2330
2331 // 5th pattern to match BSTRINS:
2332 // R = or (and X, mask), const
2333 // where ~mask = (2**size - 1) << lsb, mask & const = 0
2334 // =>
2335 // R = BSTRINS X, (const >> lsb), msb, lsb
2336 // where msb = lsb + size - 1
2337 if (N0.getOpcode() == ISD::AND &&
2338 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2339 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2340 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2341 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2342 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2343 return DAG.getNode(
2344 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2345 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2346 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2347 : (MaskIdx0 + MaskLen0 - 1),
2348 DL, GRLenVT),
2349 DAG.getConstant(MaskIdx0, DL, GRLenVT));
2350 }
2351
2352 // 6th pattern.
2353 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2354 // by the incoming bits are known to be zero.
2355 // =>
2356 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2357 //
2358 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2359 // pattern is more common than the 1st. So we put the 1st before the 6th in
2360 // order to match as many nodes as possible.
2361 ConstantSDNode *CNMask, *CNShamt;
2362 unsigned MaskIdx, MaskLen;
2363 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2364 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2365 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2366 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2367 CNShamt->getZExtValue() + MaskLen <= ValBits) {
2368 Shamt = CNShamt->getZExtValue();
2369 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2370 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2371 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2372 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2373 N1.getOperand(0).getOperand(0),
2374 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2375 DAG.getConstant(Shamt, DL, GRLenVT));
2376 }
2377 }
2378
2379 // 7th pattern.
2380 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2381 // overwritten by the incoming bits are known to be zero.
2382 // =>
2383 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2384 //
2385 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2386 // before the 7th in order to match as many nodes as possible.
2387 if (N1.getOpcode() == ISD::AND &&
2388 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2389 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2390 N1.getOperand(0).getOpcode() == ISD::SHL &&
2391 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2392 CNShamt->getZExtValue() == MaskIdx) {
2393 APInt ShMask(ValBits, CNMask->getZExtValue());
2394 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2395 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2396 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2397 N1.getOperand(0).getOperand(0),
2398 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2399 DAG.getConstant(MaskIdx, DL, GRLenVT));
2400 }
2401 }
2402
2403 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2404 if (!SwapAndRetried) {
2405 std::swap(N0, N1);
2406 SwapAndRetried = true;
2407 goto Retry;
2408 }
2409
2410 SwapAndRetried = false;
2411Retry2:
2412 // 8th pattern.
2413 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2414 // the incoming bits are known to be zero.
2415 // =>
2416 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2417 //
2418 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2419 // we put it here in order to match as many nodes as possible or generate less
2420 // instructions.
2421 if (N1.getOpcode() == ISD::AND &&
2422 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2423 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2424 APInt ShMask(ValBits, CNMask->getZExtValue());
2425 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2426 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2427 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2428 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2429 N1->getOperand(0),
2430 DAG.getConstant(MaskIdx, DL, GRLenVT)),
2431 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2432 DAG.getConstant(MaskIdx, DL, GRLenVT));
2433 }
2434 }
2435 // Swap N0/N1 and retry.
2436 if (!SwapAndRetried) {
2437 std::swap(N0, N1);
2438 SwapAndRetried = true;
2439 goto Retry2;
2440 }
2441
2442 return SDValue();
2443}
2444
2445// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
2448 const LoongArchSubtarget &Subtarget) {
2449 if (DCI.isBeforeLegalizeOps())
2450 return SDValue();
2451
2452 SDValue Src = N->getOperand(0);
2453 if (Src.getOpcode() != LoongArchISD::REVB_2W)
2454 return SDValue();
2455
2456 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2457 Src.getOperand(0));
2458}
2459
2460template <unsigned N>
2462 SelectionDAG &DAG,
2463 const LoongArchSubtarget &Subtarget,
2464 bool IsSigned = false) {
2465 SDLoc DL(Node);
2466 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2467 // Check the ImmArg.
2468 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2469 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2470 DAG.getContext()->emitError(Node->getOperationName(0) +
2471 ": argument out of range.");
2472 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2473 }
2474 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2475}
2476
2477template <unsigned N>
2478static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2479 SelectionDAG &DAG, bool IsSigned = false) {
2480 SDLoc DL(Node);
2481 EVT ResTy = Node->getValueType(0);
2482 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2483
2484 // Check the ImmArg.
2485 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2486 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2487 DAG.getContext()->emitError(Node->getOperationName(0) +
2488 ": argument out of range.");
2489 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2490 }
2491 return DAG.getConstant(
2493 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2494 DL, ResTy);
2495}
2496
2498 SDLoc DL(Node);
2499 EVT ResTy = Node->getValueType(0);
2500 SDValue Vec = Node->getOperand(2);
2501 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2502 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2503}
2504
2506 SDLoc DL(Node);
2507 EVT ResTy = Node->getValueType(0);
2508 SDValue One = DAG.getConstant(1, DL, ResTy);
2509 SDValue Bit =
2510 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2511
2512 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2513 DAG.getNOT(DL, Bit, ResTy));
2514}
2515
2516template <unsigned N>
2518 SDLoc DL(Node);
2519 EVT ResTy = Node->getValueType(0);
2520 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2521 // Check the unsigned ImmArg.
2522 if (!isUInt<N>(CImm->getZExtValue())) {
2523 DAG.getContext()->emitError(Node->getOperationName(0) +
2524 ": argument out of range.");
2525 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2526 }
2527
2528 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2529 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2530
2531 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2532}
2533
2534template <unsigned N>
2536 SDLoc DL(Node);
2537 EVT ResTy = Node->getValueType(0);
2538 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2539 // Check the unsigned ImmArg.
2540 if (!isUInt<N>(CImm->getZExtValue())) {
2541 DAG.getContext()->emitError(Node->getOperationName(0) +
2542 ": argument out of range.");
2543 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2544 }
2545
2546 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2547 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2548 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2549}
2550
2551template <unsigned N>
2553 SDLoc DL(Node);
2554 EVT ResTy = Node->getValueType(0);
2555 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2556 // Check the unsigned ImmArg.
2557 if (!isUInt<N>(CImm->getZExtValue())) {
2558 DAG.getContext()->emitError(Node->getOperationName(0) +
2559 ": argument out of range.");
2560 return DAG.getNode(ISD::UNDEF, DL, ResTy);
2561 }
2562
2563 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2564 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2565 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2566}
2567
2568static SDValue
2571 const LoongArchSubtarget &Subtarget) {
2572 SDLoc DL(N);
2573 switch (N->getConstantOperandVal(0)) {
2574 default:
2575 break;
2576 case Intrinsic::loongarch_lsx_vadd_b:
2577 case Intrinsic::loongarch_lsx_vadd_h:
2578 case Intrinsic::loongarch_lsx_vadd_w:
2579 case Intrinsic::loongarch_lsx_vadd_d:
2580 case Intrinsic::loongarch_lasx_xvadd_b:
2581 case Intrinsic::loongarch_lasx_xvadd_h:
2582 case Intrinsic::loongarch_lasx_xvadd_w:
2583 case Intrinsic::loongarch_lasx_xvadd_d:
2584 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2585 N->getOperand(2));
2586 case Intrinsic::loongarch_lsx_vaddi_bu:
2587 case Intrinsic::loongarch_lsx_vaddi_hu:
2588 case Intrinsic::loongarch_lsx_vaddi_wu:
2589 case Intrinsic::loongarch_lsx_vaddi_du:
2590 case Intrinsic::loongarch_lasx_xvaddi_bu:
2591 case Intrinsic::loongarch_lasx_xvaddi_hu:
2592 case Intrinsic::loongarch_lasx_xvaddi_wu:
2593 case Intrinsic::loongarch_lasx_xvaddi_du:
2594 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2595 lowerVectorSplatImm<5>(N, 2, DAG));
2596 case Intrinsic::loongarch_lsx_vsub_b:
2597 case Intrinsic::loongarch_lsx_vsub_h:
2598 case Intrinsic::loongarch_lsx_vsub_w:
2599 case Intrinsic::loongarch_lsx_vsub_d:
2600 case Intrinsic::loongarch_lasx_xvsub_b:
2601 case Intrinsic::loongarch_lasx_xvsub_h:
2602 case Intrinsic::loongarch_lasx_xvsub_w:
2603 case Intrinsic::loongarch_lasx_xvsub_d:
2604 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2605 N->getOperand(2));
2606 case Intrinsic::loongarch_lsx_vsubi_bu:
2607 case Intrinsic::loongarch_lsx_vsubi_hu:
2608 case Intrinsic::loongarch_lsx_vsubi_wu:
2609 case Intrinsic::loongarch_lsx_vsubi_du:
2610 case Intrinsic::loongarch_lasx_xvsubi_bu:
2611 case Intrinsic::loongarch_lasx_xvsubi_hu:
2612 case Intrinsic::loongarch_lasx_xvsubi_wu:
2613 case Intrinsic::loongarch_lasx_xvsubi_du:
2614 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2615 lowerVectorSplatImm<5>(N, 2, DAG));
2616 case Intrinsic::loongarch_lsx_vneg_b:
2617 case Intrinsic::loongarch_lsx_vneg_h:
2618 case Intrinsic::loongarch_lsx_vneg_w:
2619 case Intrinsic::loongarch_lsx_vneg_d:
2620 case Intrinsic::loongarch_lasx_xvneg_b:
2621 case Intrinsic::loongarch_lasx_xvneg_h:
2622 case Intrinsic::loongarch_lasx_xvneg_w:
2623 case Intrinsic::loongarch_lasx_xvneg_d:
2624 return DAG.getNode(
2625 ISD::SUB, DL, N->getValueType(0),
2626 DAG.getConstant(
2627 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2628 /*isSigned=*/true),
2629 SDLoc(N), N->getValueType(0)),
2630 N->getOperand(1));
2631 case Intrinsic::loongarch_lsx_vmax_b:
2632 case Intrinsic::loongarch_lsx_vmax_h:
2633 case Intrinsic::loongarch_lsx_vmax_w:
2634 case Intrinsic::loongarch_lsx_vmax_d:
2635 case Intrinsic::loongarch_lasx_xvmax_b:
2636 case Intrinsic::loongarch_lasx_xvmax_h:
2637 case Intrinsic::loongarch_lasx_xvmax_w:
2638 case Intrinsic::loongarch_lasx_xvmax_d:
2639 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2640 N->getOperand(2));
2641 case Intrinsic::loongarch_lsx_vmax_bu:
2642 case Intrinsic::loongarch_lsx_vmax_hu:
2643 case Intrinsic::loongarch_lsx_vmax_wu:
2644 case Intrinsic::loongarch_lsx_vmax_du:
2645 case Intrinsic::loongarch_lasx_xvmax_bu:
2646 case Intrinsic::loongarch_lasx_xvmax_hu:
2647 case Intrinsic::loongarch_lasx_xvmax_wu:
2648 case Intrinsic::loongarch_lasx_xvmax_du:
2649 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2650 N->getOperand(2));
2651 case Intrinsic::loongarch_lsx_vmaxi_b:
2652 case Intrinsic::loongarch_lsx_vmaxi_h:
2653 case Intrinsic::loongarch_lsx_vmaxi_w:
2654 case Intrinsic::loongarch_lsx_vmaxi_d:
2655 case Intrinsic::loongarch_lasx_xvmaxi_b:
2656 case Intrinsic::loongarch_lasx_xvmaxi_h:
2657 case Intrinsic::loongarch_lasx_xvmaxi_w:
2658 case Intrinsic::loongarch_lasx_xvmaxi_d:
2659 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2660 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2661 case Intrinsic::loongarch_lsx_vmaxi_bu:
2662 case Intrinsic::loongarch_lsx_vmaxi_hu:
2663 case Intrinsic::loongarch_lsx_vmaxi_wu:
2664 case Intrinsic::loongarch_lsx_vmaxi_du:
2665 case Intrinsic::loongarch_lasx_xvmaxi_bu:
2666 case Intrinsic::loongarch_lasx_xvmaxi_hu:
2667 case Intrinsic::loongarch_lasx_xvmaxi_wu:
2668 case Intrinsic::loongarch_lasx_xvmaxi_du:
2669 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2670 lowerVectorSplatImm<5>(N, 2, DAG));
2671 case Intrinsic::loongarch_lsx_vmin_b:
2672 case Intrinsic::loongarch_lsx_vmin_h:
2673 case Intrinsic::loongarch_lsx_vmin_w:
2674 case Intrinsic::loongarch_lsx_vmin_d:
2675 case Intrinsic::loongarch_lasx_xvmin_b:
2676 case Intrinsic::loongarch_lasx_xvmin_h:
2677 case Intrinsic::loongarch_lasx_xvmin_w:
2678 case Intrinsic::loongarch_lasx_xvmin_d:
2679 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2680 N->getOperand(2));
2681 case Intrinsic::loongarch_lsx_vmin_bu:
2682 case Intrinsic::loongarch_lsx_vmin_hu:
2683 case Intrinsic::loongarch_lsx_vmin_wu:
2684 case Intrinsic::loongarch_lsx_vmin_du:
2685 case Intrinsic::loongarch_lasx_xvmin_bu:
2686 case Intrinsic::loongarch_lasx_xvmin_hu:
2687 case Intrinsic::loongarch_lasx_xvmin_wu:
2688 case Intrinsic::loongarch_lasx_xvmin_du:
2689 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2690 N->getOperand(2));
2691 case Intrinsic::loongarch_lsx_vmini_b:
2692 case Intrinsic::loongarch_lsx_vmini_h:
2693 case Intrinsic::loongarch_lsx_vmini_w:
2694 case Intrinsic::loongarch_lsx_vmini_d:
2695 case Intrinsic::loongarch_lasx_xvmini_b:
2696 case Intrinsic::loongarch_lasx_xvmini_h:
2697 case Intrinsic::loongarch_lasx_xvmini_w:
2698 case Intrinsic::loongarch_lasx_xvmini_d:
2699 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2700 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2701 case Intrinsic::loongarch_lsx_vmini_bu:
2702 case Intrinsic::loongarch_lsx_vmini_hu:
2703 case Intrinsic::loongarch_lsx_vmini_wu:
2704 case Intrinsic::loongarch_lsx_vmini_du:
2705 case Intrinsic::loongarch_lasx_xvmini_bu:
2706 case Intrinsic::loongarch_lasx_xvmini_hu:
2707 case Intrinsic::loongarch_lasx_xvmini_wu:
2708 case Intrinsic::loongarch_lasx_xvmini_du:
2709 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2710 lowerVectorSplatImm<5>(N, 2, DAG));
2711 case Intrinsic::loongarch_lsx_vmul_b:
2712 case Intrinsic::loongarch_lsx_vmul_h:
2713 case Intrinsic::loongarch_lsx_vmul_w:
2714 case Intrinsic::loongarch_lsx_vmul_d:
2715 case Intrinsic::loongarch_lasx_xvmul_b:
2716 case Intrinsic::loongarch_lasx_xvmul_h:
2717 case Intrinsic::loongarch_lasx_xvmul_w:
2718 case Intrinsic::loongarch_lasx_xvmul_d:
2719 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2720 N->getOperand(2));
2721 case Intrinsic::loongarch_lsx_vmadd_b:
2722 case Intrinsic::loongarch_lsx_vmadd_h:
2723 case Intrinsic::loongarch_lsx_vmadd_w:
2724 case Intrinsic::loongarch_lsx_vmadd_d:
2725 case Intrinsic::loongarch_lasx_xvmadd_b:
2726 case Intrinsic::loongarch_lasx_xvmadd_h:
2727 case Intrinsic::loongarch_lasx_xvmadd_w:
2728 case Intrinsic::loongarch_lasx_xvmadd_d: {
2729 EVT ResTy = N->getValueType(0);
2730 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2731 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2732 N->getOperand(3)));
2733 }
2734 case Intrinsic::loongarch_lsx_vmsub_b:
2735 case Intrinsic::loongarch_lsx_vmsub_h:
2736 case Intrinsic::loongarch_lsx_vmsub_w:
2737 case Intrinsic::loongarch_lsx_vmsub_d:
2738 case Intrinsic::loongarch_lasx_xvmsub_b:
2739 case Intrinsic::loongarch_lasx_xvmsub_h:
2740 case Intrinsic::loongarch_lasx_xvmsub_w:
2741 case Intrinsic::loongarch_lasx_xvmsub_d: {
2742 EVT ResTy = N->getValueType(0);
2743 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2744 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2745 N->getOperand(3)));
2746 }
2747 case Intrinsic::loongarch_lsx_vdiv_b:
2748 case Intrinsic::loongarch_lsx_vdiv_h:
2749 case Intrinsic::loongarch_lsx_vdiv_w:
2750 case Intrinsic::loongarch_lsx_vdiv_d:
2751 case Intrinsic::loongarch_lasx_xvdiv_b:
2752 case Intrinsic::loongarch_lasx_xvdiv_h:
2753 case Intrinsic::loongarch_lasx_xvdiv_w:
2754 case Intrinsic::loongarch_lasx_xvdiv_d:
2755 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2756 N->getOperand(2));
2757 case Intrinsic::loongarch_lsx_vdiv_bu:
2758 case Intrinsic::loongarch_lsx_vdiv_hu:
2759 case Intrinsic::loongarch_lsx_vdiv_wu:
2760 case Intrinsic::loongarch_lsx_vdiv_du:
2761 case Intrinsic::loongarch_lasx_xvdiv_bu:
2762 case Intrinsic::loongarch_lasx_xvdiv_hu:
2763 case Intrinsic::loongarch_lasx_xvdiv_wu:
2764 case Intrinsic::loongarch_lasx_xvdiv_du:
2765 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2766 N->getOperand(2));
2767 case Intrinsic::loongarch_lsx_vmod_b:
2768 case Intrinsic::loongarch_lsx_vmod_h:
2769 case Intrinsic::loongarch_lsx_vmod_w:
2770 case Intrinsic::loongarch_lsx_vmod_d:
2771 case Intrinsic::loongarch_lasx_xvmod_b:
2772 case Intrinsic::loongarch_lasx_xvmod_h:
2773 case Intrinsic::loongarch_lasx_xvmod_w:
2774 case Intrinsic::loongarch_lasx_xvmod_d:
2775 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2776 N->getOperand(2));
2777 case Intrinsic::loongarch_lsx_vmod_bu:
2778 case Intrinsic::loongarch_lsx_vmod_hu:
2779 case Intrinsic::loongarch_lsx_vmod_wu:
2780 case Intrinsic::loongarch_lsx_vmod_du:
2781 case Intrinsic::loongarch_lasx_xvmod_bu:
2782 case Intrinsic::loongarch_lasx_xvmod_hu:
2783 case Intrinsic::loongarch_lasx_xvmod_wu:
2784 case Intrinsic::loongarch_lasx_xvmod_du:
2785 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2786 N->getOperand(2));
2787 case Intrinsic::loongarch_lsx_vand_v:
2788 case Intrinsic::loongarch_lasx_xvand_v:
2789 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2790 N->getOperand(2));
2791 case Intrinsic::loongarch_lsx_vor_v:
2792 case Intrinsic::loongarch_lasx_xvor_v:
2793 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2794 N->getOperand(2));
2795 case Intrinsic::loongarch_lsx_vxor_v:
2796 case Intrinsic::loongarch_lasx_xvxor_v:
2797 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2798 N->getOperand(2));
2799 case Intrinsic::loongarch_lsx_vnor_v:
2800 case Intrinsic::loongarch_lasx_xvnor_v: {
2801 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2802 N->getOperand(2));
2803 return DAG.getNOT(DL, Res, Res->getValueType(0));
2804 }
2805 case Intrinsic::loongarch_lsx_vandi_b:
2806 case Intrinsic::loongarch_lasx_xvandi_b:
2807 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2808 lowerVectorSplatImm<8>(N, 2, DAG));
2809 case Intrinsic::loongarch_lsx_vori_b:
2810 case Intrinsic::loongarch_lasx_xvori_b:
2811 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2812 lowerVectorSplatImm<8>(N, 2, DAG));
2813 case Intrinsic::loongarch_lsx_vxori_b:
2814 case Intrinsic::loongarch_lasx_xvxori_b:
2815 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2816 lowerVectorSplatImm<8>(N, 2, DAG));
2817 case Intrinsic::loongarch_lsx_vsll_b:
2818 case Intrinsic::loongarch_lsx_vsll_h:
2819 case Intrinsic::loongarch_lsx_vsll_w:
2820 case Intrinsic::loongarch_lsx_vsll_d:
2821 case Intrinsic::loongarch_lasx_xvsll_b:
2822 case Intrinsic::loongarch_lasx_xvsll_h:
2823 case Intrinsic::loongarch_lasx_xvsll_w:
2824 case Intrinsic::loongarch_lasx_xvsll_d:
2825 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2826 truncateVecElts(N, DAG));
2827 case Intrinsic::loongarch_lsx_vslli_b:
2828 case Intrinsic::loongarch_lasx_xvslli_b:
2829 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2830 lowerVectorSplatImm<3>(N, 2, DAG));
2831 case Intrinsic::loongarch_lsx_vslli_h:
2832 case Intrinsic::loongarch_lasx_xvslli_h:
2833 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2834 lowerVectorSplatImm<4>(N, 2, DAG));
2835 case Intrinsic::loongarch_lsx_vslli_w:
2836 case Intrinsic::loongarch_lasx_xvslli_w:
2837 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2838 lowerVectorSplatImm<5>(N, 2, DAG));
2839 case Intrinsic::loongarch_lsx_vslli_d:
2840 case Intrinsic::loongarch_lasx_xvslli_d:
2841 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2842 lowerVectorSplatImm<6>(N, 2, DAG));
2843 case Intrinsic::loongarch_lsx_vsrl_b:
2844 case Intrinsic::loongarch_lsx_vsrl_h:
2845 case Intrinsic::loongarch_lsx_vsrl_w:
2846 case Intrinsic::loongarch_lsx_vsrl_d:
2847 case Intrinsic::loongarch_lasx_xvsrl_b:
2848 case Intrinsic::loongarch_lasx_xvsrl_h:
2849 case Intrinsic::loongarch_lasx_xvsrl_w:
2850 case Intrinsic::loongarch_lasx_xvsrl_d:
2851 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2852 truncateVecElts(N, DAG));
2853 case Intrinsic::loongarch_lsx_vsrli_b:
2854 case Intrinsic::loongarch_lasx_xvsrli_b:
2855 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2856 lowerVectorSplatImm<3>(N, 2, DAG));
2857 case Intrinsic::loongarch_lsx_vsrli_h:
2858 case Intrinsic::loongarch_lasx_xvsrli_h:
2859 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2860 lowerVectorSplatImm<4>(N, 2, DAG));
2861 case Intrinsic::loongarch_lsx_vsrli_w:
2862 case Intrinsic::loongarch_lasx_xvsrli_w:
2863 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2864 lowerVectorSplatImm<5>(N, 2, DAG));
2865 case Intrinsic::loongarch_lsx_vsrli_d:
2866 case Intrinsic::loongarch_lasx_xvsrli_d:
2867 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2868 lowerVectorSplatImm<6>(N, 2, DAG));
2869 case Intrinsic::loongarch_lsx_vsra_b:
2870 case Intrinsic::loongarch_lsx_vsra_h:
2871 case Intrinsic::loongarch_lsx_vsra_w:
2872 case Intrinsic::loongarch_lsx_vsra_d:
2873 case Intrinsic::loongarch_lasx_xvsra_b:
2874 case Intrinsic::loongarch_lasx_xvsra_h:
2875 case Intrinsic::loongarch_lasx_xvsra_w:
2876 case Intrinsic::loongarch_lasx_xvsra_d:
2877 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2878 truncateVecElts(N, DAG));
2879 case Intrinsic::loongarch_lsx_vsrai_b:
2880 case Intrinsic::loongarch_lasx_xvsrai_b:
2881 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2882 lowerVectorSplatImm<3>(N, 2, DAG));
2883 case Intrinsic::loongarch_lsx_vsrai_h:
2884 case Intrinsic::loongarch_lasx_xvsrai_h:
2885 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2886 lowerVectorSplatImm<4>(N, 2, DAG));
2887 case Intrinsic::loongarch_lsx_vsrai_w:
2888 case Intrinsic::loongarch_lasx_xvsrai_w:
2889 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2890 lowerVectorSplatImm<5>(N, 2, DAG));
2891 case Intrinsic::loongarch_lsx_vsrai_d:
2892 case Intrinsic::loongarch_lasx_xvsrai_d:
2893 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2894 lowerVectorSplatImm<6>(N, 2, DAG));
2895 case Intrinsic::loongarch_lsx_vclz_b:
2896 case Intrinsic::loongarch_lsx_vclz_h:
2897 case Intrinsic::loongarch_lsx_vclz_w:
2898 case Intrinsic::loongarch_lsx_vclz_d:
2899 case Intrinsic::loongarch_lasx_xvclz_b:
2900 case Intrinsic::loongarch_lasx_xvclz_h:
2901 case Intrinsic::loongarch_lasx_xvclz_w:
2902 case Intrinsic::loongarch_lasx_xvclz_d:
2903 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2904 case Intrinsic::loongarch_lsx_vpcnt_b:
2905 case Intrinsic::loongarch_lsx_vpcnt_h:
2906 case Intrinsic::loongarch_lsx_vpcnt_w:
2907 case Intrinsic::loongarch_lsx_vpcnt_d:
2908 case Intrinsic::loongarch_lasx_xvpcnt_b:
2909 case Intrinsic::loongarch_lasx_xvpcnt_h:
2910 case Intrinsic::loongarch_lasx_xvpcnt_w:
2911 case Intrinsic::loongarch_lasx_xvpcnt_d:
2912 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2913 case Intrinsic::loongarch_lsx_vbitclr_b:
2914 case Intrinsic::loongarch_lsx_vbitclr_h:
2915 case Intrinsic::loongarch_lsx_vbitclr_w:
2916 case Intrinsic::loongarch_lsx_vbitclr_d:
2917 case Intrinsic::loongarch_lasx_xvbitclr_b:
2918 case Intrinsic::loongarch_lasx_xvbitclr_h:
2919 case Intrinsic::loongarch_lasx_xvbitclr_w:
2920 case Intrinsic::loongarch_lasx_xvbitclr_d:
2921 return lowerVectorBitClear(N, DAG);
2922 case Intrinsic::loongarch_lsx_vbitclri_b:
2923 case Intrinsic::loongarch_lasx_xvbitclri_b:
2924 return lowerVectorBitClearImm<3>(N, DAG);
2925 case Intrinsic::loongarch_lsx_vbitclri_h:
2926 case Intrinsic::loongarch_lasx_xvbitclri_h:
2927 return lowerVectorBitClearImm<4>(N, DAG);
2928 case Intrinsic::loongarch_lsx_vbitclri_w:
2929 case Intrinsic::loongarch_lasx_xvbitclri_w:
2930 return lowerVectorBitClearImm<5>(N, DAG);
2931 case Intrinsic::loongarch_lsx_vbitclri_d:
2932 case Intrinsic::loongarch_lasx_xvbitclri_d:
2933 return lowerVectorBitClearImm<6>(N, DAG);
2934 case Intrinsic::loongarch_lsx_vbitset_b:
2935 case Intrinsic::loongarch_lsx_vbitset_h:
2936 case Intrinsic::loongarch_lsx_vbitset_w:
2937 case Intrinsic::loongarch_lsx_vbitset_d:
2938 case Intrinsic::loongarch_lasx_xvbitset_b:
2939 case Intrinsic::loongarch_lasx_xvbitset_h:
2940 case Intrinsic::loongarch_lasx_xvbitset_w:
2941 case Intrinsic::loongarch_lasx_xvbitset_d: {
2942 EVT VecTy = N->getValueType(0);
2943 SDValue One = DAG.getConstant(1, DL, VecTy);
2944 return DAG.getNode(
2945 ISD::OR, DL, VecTy, N->getOperand(1),
2946 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2947 }
2948 case Intrinsic::loongarch_lsx_vbitseti_b:
2949 case Intrinsic::loongarch_lasx_xvbitseti_b:
2950 return lowerVectorBitSetImm<3>(N, DAG);
2951 case Intrinsic::loongarch_lsx_vbitseti_h:
2952 case Intrinsic::loongarch_lasx_xvbitseti_h:
2953 return lowerVectorBitSetImm<4>(N, DAG);
2954 case Intrinsic::loongarch_lsx_vbitseti_w:
2955 case Intrinsic::loongarch_lasx_xvbitseti_w:
2956 return lowerVectorBitSetImm<5>(N, DAG);
2957 case Intrinsic::loongarch_lsx_vbitseti_d:
2958 case Intrinsic::loongarch_lasx_xvbitseti_d:
2959 return lowerVectorBitSetImm<6>(N, DAG);
2960 case Intrinsic::loongarch_lsx_vbitrev_b:
2961 case Intrinsic::loongarch_lsx_vbitrev_h:
2962 case Intrinsic::loongarch_lsx_vbitrev_w:
2963 case Intrinsic::loongarch_lsx_vbitrev_d:
2964 case Intrinsic::loongarch_lasx_xvbitrev_b:
2965 case Intrinsic::loongarch_lasx_xvbitrev_h:
2966 case Intrinsic::loongarch_lasx_xvbitrev_w:
2967 case Intrinsic::loongarch_lasx_xvbitrev_d: {
2968 EVT VecTy = N->getValueType(0);
2969 SDValue One = DAG.getConstant(1, DL, VecTy);
2970 return DAG.getNode(
2971 ISD::XOR, DL, VecTy, N->getOperand(1),
2972 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2973 }
2974 case Intrinsic::loongarch_lsx_vbitrevi_b:
2975 case Intrinsic::loongarch_lasx_xvbitrevi_b:
2976 return lowerVectorBitRevImm<3>(N, DAG);
2977 case Intrinsic::loongarch_lsx_vbitrevi_h:
2978 case Intrinsic::loongarch_lasx_xvbitrevi_h:
2979 return lowerVectorBitRevImm<4>(N, DAG);
2980 case Intrinsic::loongarch_lsx_vbitrevi_w:
2981 case Intrinsic::loongarch_lasx_xvbitrevi_w:
2982 return lowerVectorBitRevImm<5>(N, DAG);
2983 case Intrinsic::loongarch_lsx_vbitrevi_d:
2984 case Intrinsic::loongarch_lasx_xvbitrevi_d:
2985 return lowerVectorBitRevImm<6>(N, DAG);
2986 case Intrinsic::loongarch_lsx_vfadd_s:
2987 case Intrinsic::loongarch_lsx_vfadd_d:
2988 case Intrinsic::loongarch_lasx_xvfadd_s:
2989 case Intrinsic::loongarch_lasx_xvfadd_d:
2990 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
2991 N->getOperand(2));
2992 case Intrinsic::loongarch_lsx_vfsub_s:
2993 case Intrinsic::loongarch_lsx_vfsub_d:
2994 case Intrinsic::loongarch_lasx_xvfsub_s:
2995 case Intrinsic::loongarch_lasx_xvfsub_d:
2996 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
2997 N->getOperand(2));
2998 case Intrinsic::loongarch_lsx_vfmul_s:
2999 case Intrinsic::loongarch_lsx_vfmul_d:
3000 case Intrinsic::loongarch_lasx_xvfmul_s:
3001 case Intrinsic::loongarch_lasx_xvfmul_d:
3002 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
3003 N->getOperand(2));
3004 case Intrinsic::loongarch_lsx_vfdiv_s:
3005 case Intrinsic::loongarch_lsx_vfdiv_d:
3006 case Intrinsic::loongarch_lasx_xvfdiv_s:
3007 case Intrinsic::loongarch_lasx_xvfdiv_d:
3008 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
3009 N->getOperand(2));
3010 case Intrinsic::loongarch_lsx_vfmadd_s:
3011 case Intrinsic::loongarch_lsx_vfmadd_d:
3012 case Intrinsic::loongarch_lasx_xvfmadd_s:
3013 case Intrinsic::loongarch_lasx_xvfmadd_d:
3014 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3015 N->getOperand(2), N->getOperand(3));
3016 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3017 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3018 N->getOperand(1), N->getOperand(2),
3019 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3020 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3021 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3022 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3023 N->getOperand(1), N->getOperand(2),
3024 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3025 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3026 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3027 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3028 N->getOperand(1), N->getOperand(2),
3029 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3030 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3031 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3032 N->getOperand(1), N->getOperand(2),
3033 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3034 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3035 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3036 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3037 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3038 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3039 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3040 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3041 case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3042 EVT ResTy = N->getValueType(0);
3043 SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3044 return DAG.getBuildVector(ResTy, DL, Ops);
3045 }
3046 case Intrinsic::loongarch_lsx_vreplve_b:
3047 case Intrinsic::loongarch_lsx_vreplve_h:
3048 case Intrinsic::loongarch_lsx_vreplve_w:
3049 case Intrinsic::loongarch_lsx_vreplve_d:
3050 case Intrinsic::loongarch_lasx_xvreplve_b:
3051 case Intrinsic::loongarch_lasx_xvreplve_h:
3052 case Intrinsic::loongarch_lasx_xvreplve_w:
3053 case Intrinsic::loongarch_lasx_xvreplve_d:
3054 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3055 N->getOperand(1),
3056 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3057 N->getOperand(2)));
3058 }
3059 return SDValue();
3060}
3061
3063 DAGCombinerInfo &DCI) const {
3064 SelectionDAG &DAG = DCI.DAG;
3065 switch (N->getOpcode()) {
3066 default:
3067 break;
3068 case ISD::AND:
3069 return performANDCombine(N, DAG, DCI, Subtarget);
3070 case ISD::OR:
3071 return performORCombine(N, DAG, DCI, Subtarget);
3072 case ISD::SRL:
3073 return performSRLCombine(N, DAG, DCI, Subtarget);
3075 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3077 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3078 }
3079 return SDValue();
3080}
3081
3084 if (!ZeroDivCheck)
3085 return MBB;
3086
3087 // Build instructions:
3088 // MBB:
3089 // div(or mod) $dst, $dividend, $divisor
3090 // bnez $divisor, SinkMBB
3091 // BreakMBB:
3092 // break 7 // BRK_DIVZERO
3093 // SinkMBB:
3094 // fallthrough
3095 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3097 MachineFunction *MF = MBB->getParent();
3098 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3099 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3100 MF->insert(It, BreakMBB);
3101 MF->insert(It, SinkMBB);
3102
3103 // Transfer the remainder of MBB and its successor edges to SinkMBB.
3104 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3105 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3106
3107 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3108 DebugLoc DL = MI.getDebugLoc();
3109 MachineOperand &Divisor = MI.getOperand(2);
3110 Register DivisorReg = Divisor.getReg();
3111
3112 // MBB:
3113 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3114 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3115 .addMBB(SinkMBB);
3116 MBB->addSuccessor(BreakMBB);
3117 MBB->addSuccessor(SinkMBB);
3118
3119 // BreakMBB:
3120 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3121 // definition of BRK_DIVZERO.
3122 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3123 BreakMBB->addSuccessor(SinkMBB);
3124
3125 // Clear Divisor's kill flag.
3126 Divisor.setIsKill(false);
3127
3128 return SinkMBB;
3129}
3130
3131static MachineBasicBlock *
3133 const LoongArchSubtarget &Subtarget) {
3134 unsigned CondOpc;
3135 switch (MI.getOpcode()) {
3136 default:
3137 llvm_unreachable("Unexpected opcode");
3138 case LoongArch::PseudoVBZ:
3139 CondOpc = LoongArch::VSETEQZ_V;
3140 break;
3141 case LoongArch::PseudoVBZ_B:
3142 CondOpc = LoongArch::VSETANYEQZ_B;
3143 break;
3144 case LoongArch::PseudoVBZ_H:
3145 CondOpc = LoongArch::VSETANYEQZ_H;
3146 break;
3147 case LoongArch::PseudoVBZ_W:
3148 CondOpc = LoongArch::VSETANYEQZ_W;
3149 break;
3150 case LoongArch::PseudoVBZ_D:
3151 CondOpc = LoongArch::VSETANYEQZ_D;
3152 break;
3153 case LoongArch::PseudoVBNZ:
3154 CondOpc = LoongArch::VSETNEZ_V;
3155 break;
3156 case LoongArch::PseudoVBNZ_B:
3157 CondOpc = LoongArch::VSETALLNEZ_B;
3158 break;
3159 case LoongArch::PseudoVBNZ_H:
3160 CondOpc = LoongArch::VSETALLNEZ_H;
3161 break;
3162 case LoongArch::PseudoVBNZ_W:
3163 CondOpc = LoongArch::VSETALLNEZ_W;
3164 break;
3165 case LoongArch::PseudoVBNZ_D:
3166 CondOpc = LoongArch::VSETALLNEZ_D;
3167 break;
3168 case LoongArch::PseudoXVBZ:
3169 CondOpc = LoongArch::XVSETEQZ_V;
3170 break;
3171 case LoongArch::PseudoXVBZ_B:
3172 CondOpc = LoongArch::XVSETANYEQZ_B;
3173 break;
3174 case LoongArch::PseudoXVBZ_H:
3175 CondOpc = LoongArch::XVSETANYEQZ_H;
3176 break;
3177 case LoongArch::PseudoXVBZ_W:
3178 CondOpc = LoongArch::XVSETANYEQZ_W;
3179 break;
3180 case LoongArch::PseudoXVBZ_D:
3181 CondOpc = LoongArch::XVSETANYEQZ_D;
3182 break;
3183 case LoongArch::PseudoXVBNZ:
3184 CondOpc = LoongArch::XVSETNEZ_V;
3185 break;
3186 case LoongArch::PseudoXVBNZ_B:
3187 CondOpc = LoongArch::XVSETALLNEZ_B;
3188 break;
3189 case LoongArch::PseudoXVBNZ_H:
3190 CondOpc = LoongArch::XVSETALLNEZ_H;
3191 break;
3192 case LoongArch::PseudoXVBNZ_W:
3193 CondOpc = LoongArch::XVSETALLNEZ_W;
3194 break;
3195 case LoongArch::PseudoXVBNZ_D:
3196 CondOpc = LoongArch::XVSETALLNEZ_D;
3197 break;
3198 }
3199
3200 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3201 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3202 DebugLoc DL = MI.getDebugLoc();
3205
3206 MachineFunction *F = BB->getParent();
3207 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3208 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3209 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3210
3211 F->insert(It, FalseBB);
3212 F->insert(It, TrueBB);
3213 F->insert(It, SinkBB);
3214
3215 // Transfer the remainder of MBB and its successor edges to Sink.
3216 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3218
3219 // Insert the real instruction to BB.
3220 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3221 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3222
3223 // Insert branch.
3224 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3225 BB->addSuccessor(FalseBB);
3226 BB->addSuccessor(TrueBB);
3227
3228 // FalseBB.
3229 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3230 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3231 .addReg(LoongArch::R0)
3232 .addImm(0);
3233 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3234 FalseBB->addSuccessor(SinkBB);
3235
3236 // TrueBB.
3237 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3238 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3239 .addReg(LoongArch::R0)
3240 .addImm(1);
3241 TrueBB->addSuccessor(SinkBB);
3242
3243 // SinkBB: merge the results.
3244 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3245 MI.getOperand(0).getReg())
3246 .addReg(RD1)
3247 .addMBB(FalseBB)
3248 .addReg(RD2)
3249 .addMBB(TrueBB);
3250
3251 // The pseudo instruction is gone now.
3252 MI.eraseFromParent();
3253 return SinkBB;
3254}
3255
3256static MachineBasicBlock *
3258 const LoongArchSubtarget &Subtarget) {
3259 unsigned InsOp;
3260 unsigned HalfSize;
3261 switch (MI.getOpcode()) {
3262 default:
3263 llvm_unreachable("Unexpected opcode");
3264 case LoongArch::PseudoXVINSGR2VR_B:
3265 HalfSize = 16;
3266 InsOp = LoongArch::VINSGR2VR_B;
3267 break;
3268 case LoongArch::PseudoXVINSGR2VR_H:
3269 HalfSize = 8;
3270 InsOp = LoongArch::VINSGR2VR_H;
3271 break;
3272 }
3273 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3274 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3275 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3276 DebugLoc DL = MI.getDebugLoc();
3278 // XDst = vector_insert XSrc, Elt, Idx
3279 Register XDst = MI.getOperand(0).getReg();
3280 Register XSrc = MI.getOperand(1).getReg();
3281 Register Elt = MI.getOperand(2).getReg();
3282 unsigned Idx = MI.getOperand(3).getImm();
3283
3284 Register ScratchReg1 = XSrc;
3285 if (Idx >= HalfSize) {
3286 ScratchReg1 = MRI.createVirtualRegister(RC);
3287 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3288 .addReg(XSrc)
3289 .addReg(XSrc)
3290 .addImm(1);
3291 }
3292
3293 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3294 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3295 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3296 .addReg(ScratchReg1, 0, LoongArch::sub_128);
3297 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3298 .addReg(ScratchSubReg1)
3299 .addReg(Elt)
3300 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3301
3302 Register ScratchReg2 = XDst;
3303 if (Idx >= HalfSize)
3304 ScratchReg2 = MRI.createVirtualRegister(RC);
3305
3306 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3307 .addImm(0)
3308 .addReg(ScratchSubReg2)
3309 .addImm(LoongArch::sub_128);
3310
3311 if (Idx >= HalfSize)
3312 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3313 .addReg(XSrc)
3314 .addReg(ScratchReg2)
3315 .addImm(2);
3316
3317 MI.eraseFromParent();
3318 return BB;
3319}
3320
3321MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3322 MachineInstr &MI, MachineBasicBlock *BB) const {
3323 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3324 DebugLoc DL = MI.getDebugLoc();
3325
3326 switch (MI.getOpcode()) {
3327 default:
3328 llvm_unreachable("Unexpected instr type to insert");
3329 case LoongArch::DIV_W:
3330 case LoongArch::DIV_WU:
3331 case LoongArch::MOD_W:
3332 case LoongArch::MOD_WU:
3333 case LoongArch::DIV_D:
3334 case LoongArch::DIV_DU:
3335 case LoongArch::MOD_D:
3336 case LoongArch::MOD_DU:
3337 return insertDivByZeroTrap(MI, BB);
3338 break;
3339 case LoongArch::WRFCSR: {
3340 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3341 LoongArch::FCSR0 + MI.getOperand(0).getImm())
3342 .addReg(MI.getOperand(1).getReg());
3343 MI.eraseFromParent();
3344 return BB;
3345 }
3346 case LoongArch::RDFCSR: {
3347 MachineInstr *ReadFCSR =
3348 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3349 MI.getOperand(0).getReg())
3350 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3351 ReadFCSR->getOperand(1).setIsUndef();
3352 MI.eraseFromParent();
3353 return BB;
3354 }
3355 case LoongArch::PseudoVBZ:
3356 case LoongArch::PseudoVBZ_B:
3357 case LoongArch::PseudoVBZ_H:
3358 case LoongArch::PseudoVBZ_W:
3359 case LoongArch::PseudoVBZ_D:
3360 case LoongArch::PseudoVBNZ:
3361 case LoongArch::PseudoVBNZ_B:
3362 case LoongArch::PseudoVBNZ_H:
3363 case LoongArch::PseudoVBNZ_W:
3364 case LoongArch::PseudoVBNZ_D:
3365 case LoongArch::PseudoXVBZ:
3366 case LoongArch::PseudoXVBZ_B:
3367 case LoongArch::PseudoXVBZ_H:
3368 case LoongArch::PseudoXVBZ_W:
3369 case LoongArch::PseudoXVBZ_D:
3370 case LoongArch::PseudoXVBNZ:
3371 case LoongArch::PseudoXVBNZ_B:
3372 case LoongArch::PseudoXVBNZ_H:
3373 case LoongArch::PseudoXVBNZ_W:
3374 case LoongArch::PseudoXVBNZ_D:
3375 return emitVecCondBranchPseudo(MI, BB, Subtarget);
3376 case LoongArch::PseudoXVINSGR2VR_B:
3377 case LoongArch::PseudoXVINSGR2VR_H:
3378 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3379 }
3380}
3381
3383 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3384 unsigned *Fast) const {
3385 if (!Subtarget.hasUAL())
3386 return false;
3387
3388 // TODO: set reasonable speed number.
3389 if (Fast)
3390 *Fast = 1;
3391 return true;
3392}
3393
3394const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3395 switch ((LoongArchISD::NodeType)Opcode) {
3397 break;
3398
3399#define NODE_NAME_CASE(node) \
3400 case LoongArchISD::node: \
3401 return "LoongArchISD::" #node;
3402
3403 // TODO: Add more target-dependent nodes later.
3404 NODE_NAME_CASE(CALL)
3405 NODE_NAME_CASE(CALL_MEDIUM)
3406 NODE_NAME_CASE(CALL_LARGE)
3407 NODE_NAME_CASE(RET)
3408 NODE_NAME_CASE(TAIL)
3409 NODE_NAME_CASE(TAIL_MEDIUM)
3410 NODE_NAME_CASE(TAIL_LARGE)
3411 NODE_NAME_CASE(SLL_W)
3412 NODE_NAME_CASE(SRA_W)
3413 NODE_NAME_CASE(SRL_W)
3414 NODE_NAME_CASE(BSTRINS)
3415 NODE_NAME_CASE(BSTRPICK)
3416 NODE_NAME_CASE(MOVGR2FR_W_LA64)
3417 NODE_NAME_CASE(MOVFR2GR_S_LA64)
3418 NODE_NAME_CASE(FTINT)
3419 NODE_NAME_CASE(REVB_2H)
3420 NODE_NAME_CASE(REVB_2W)
3421 NODE_NAME_CASE(BITREV_4B)
3422 NODE_NAME_CASE(BITREV_W)
3423 NODE_NAME_CASE(ROTR_W)
3424 NODE_NAME_CASE(ROTL_W)
3425 NODE_NAME_CASE(CLZ_W)
3426 NODE_NAME_CASE(CTZ_W)
3427 NODE_NAME_CASE(DBAR)
3428 NODE_NAME_CASE(IBAR)
3429 NODE_NAME_CASE(BREAK)
3430 NODE_NAME_CASE(SYSCALL)
3431 NODE_NAME_CASE(CRC_W_B_W)
3432 NODE_NAME_CASE(CRC_W_H_W)
3433 NODE_NAME_CASE(CRC_W_W_W)
3434 NODE_NAME_CASE(CRC_W_D_W)
3435 NODE_NAME_CASE(CRCC_W_B_W)
3436 NODE_NAME_CASE(CRCC_W_H_W)
3437 NODE_NAME_CASE(CRCC_W_W_W)
3438 NODE_NAME_CASE(CRCC_W_D_W)
3439 NODE_NAME_CASE(CSRRD)
3440 NODE_NAME_CASE(CSRWR)
3441 NODE_NAME_CASE(CSRXCHG)
3442 NODE_NAME_CASE(IOCSRRD_B)
3443 NODE_NAME_CASE(IOCSRRD_H)
3444 NODE_NAME_CASE(IOCSRRD_W)
3445 NODE_NAME_CASE(IOCSRRD_D)
3446 NODE_NAME_CASE(IOCSRWR_B)
3447 NODE_NAME_CASE(IOCSRWR_H)
3448 NODE_NAME_CASE(IOCSRWR_W)
3449 NODE_NAME_CASE(IOCSRWR_D)
3450 NODE_NAME_CASE(CPUCFG)
3451 NODE_NAME_CASE(MOVGR2FCSR)
3452 NODE_NAME_CASE(MOVFCSR2GR)
3453 NODE_NAME_CASE(CACOP_D)
3454 NODE_NAME_CASE(CACOP_W)
3455 NODE_NAME_CASE(VPICK_SEXT_ELT)
3456 NODE_NAME_CASE(VPICK_ZEXT_ELT)
3457 NODE_NAME_CASE(VREPLVE)
3458 NODE_NAME_CASE(VALL_ZERO)
3459 NODE_NAME_CASE(VANY_ZERO)
3460 NODE_NAME_CASE(VALL_NONZERO)
3461 NODE_NAME_CASE(VANY_NONZERO)
3462 }
3463#undef NODE_NAME_CASE
3464 return nullptr;
3465}
3466
3467//===----------------------------------------------------------------------===//
3468// Calling Convention Implementation
3469//===----------------------------------------------------------------------===//
3470
3471// Eight general-purpose registers a0-a7 used for passing integer arguments,
3472// with a0-a1 reused to return values. Generally, the GPRs are used to pass
3473// fixed-point arguments, and floating-point arguments when no FPR is available
3474// or with soft float ABI.
3475const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
3476 LoongArch::R7, LoongArch::R8, LoongArch::R9,
3477 LoongArch::R10, LoongArch::R11};
3478// Eight floating-point registers fa0-fa7 used for passing floating-point
3479// arguments, and fa0-fa1 are also used to return values.
3480const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3481 LoongArch::F3, LoongArch::F4, LoongArch::F5,
3482 LoongArch::F6, LoongArch::F7};
3483// FPR32 and FPR64 alias each other.
3485 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3486 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3487
3488const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3489 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3490 LoongArch::VR6, LoongArch::VR7};
3491
3492const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3493 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3494 LoongArch::XR6, LoongArch::XR7};
3495
3496// Pass a 2*GRLen argument that has been split into two GRLen values through
3497// registers or the stack as necessary.
3498static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3499 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3500 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3501 ISD::ArgFlagsTy ArgFlags2) {
3502 unsigned GRLenInBytes = GRLen / 8;
3503 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3504 // At least one half can be passed via register.
3505 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3506 VA1.getLocVT(), CCValAssign::Full));
3507 } else {
3508 // Both halves must be passed on the stack, with proper alignment.
3509 Align StackAlign =
3510 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3511 State.addLoc(
3513 State.AllocateStack(GRLenInBytes, StackAlign),
3514 VA1.getLocVT(), CCValAssign::Full));
3516 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3517 LocVT2, CCValAssign::Full));
3518 return false;
3519 }
3520 if (Register Reg = State.AllocateReg(ArgGPRs)) {
3521 // The second half can also be passed via register.
3522 State.addLoc(
3523 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3524 } else {
3525 // The second half is passed via the stack, without additional alignment.
3527 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3528 LocVT2, CCValAssign::Full));
3529 }
3530 return false;
3531}
3532
3533// Implements the LoongArch calling convention. Returns true upon failure.
3535 unsigned ValNo, MVT ValVT,
3536 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3537 CCState &State, bool IsFixed, bool IsRet,
3538 Type *OrigTy) {
3539 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3540 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3541 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3542 MVT LocVT = ValVT;
3543
3544 // Any return value split into more than two values can't be returned
3545 // directly.
3546 if (IsRet && ValNo > 1)
3547 return true;
3548
3549 // If passing a variadic argument, or if no FPR is available.
3550 bool UseGPRForFloat = true;
3551
3552 switch (ABI) {
3553 default:
3554 llvm_unreachable("Unexpected ABI");
3558 report_fatal_error("Unimplemented ABI");
3559 break;
3562 UseGPRForFloat = !IsFixed;
3563 break;
3565 break;
3566 }
3567
3568 // FPR32 and FPR64 alias each other.
3569 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3570 UseGPRForFloat = true;
3571
3572 if (UseGPRForFloat && ValVT == MVT::f32) {
3573 LocVT = GRLenVT;
3574 LocInfo = CCValAssign::BCvt;
3575 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3576 LocVT = MVT::i64;
3577 LocInfo = CCValAssign::BCvt;
3578 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3579 // TODO: Handle passing f64 on LA32 with D feature.
3580 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3581 }
3582
3583 // If this is a variadic argument, the LoongArch calling convention requires
3584 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3585 // byte alignment. An aligned register should be used regardless of whether
3586 // the original argument was split during legalisation or not. The argument
3587 // will not be passed by registers if the original type is larger than
3588 // 2*GRLen, so the register alignment rule does not apply.
3589 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3590 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3591 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3592 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3593 // Skip 'odd' register if necessary.
3594 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3595 State.AllocateReg(ArgGPRs);
3596 }
3597
3598 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3599 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3600 State.getPendingArgFlags();
3601
3602 assert(PendingLocs.size() == PendingArgFlags.size() &&
3603 "PendingLocs and PendingArgFlags out of sync");
3604
3605 // Split arguments might be passed indirectly, so keep track of the pending
3606 // values.
3607 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3608 LocVT = GRLenVT;
3609 LocInfo = CCValAssign::Indirect;
3610 PendingLocs.push_back(
3611 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3612 PendingArgFlags.push_back(ArgFlags);
3613 if (!ArgFlags.isSplitEnd()) {
3614 return false;
3615 }
3616 }
3617
3618 // If the split argument only had two elements, it should be passed directly
3619 // in registers or on the stack.
3620 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3621 PendingLocs.size() <= 2) {
3622 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3623 // Apply the normal calling convention rules to the first half of the
3624 // split argument.
3625 CCValAssign VA = PendingLocs[0];
3626 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3627 PendingLocs.clear();
3628 PendingArgFlags.clear();
3629 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3630 ArgFlags);
3631 }
3632
3633 // Allocate to a register if possible, or else a stack slot.
3634 Register Reg;
3635 unsigned StoreSizeBytes = GRLen / 8;
3636 Align StackAlign = Align(GRLen / 8);
3637
3638 if (ValVT == MVT::f32 && !UseGPRForFloat)
3639 Reg = State.AllocateReg(ArgFPR32s);
3640 else if (ValVT == MVT::f64 && !UseGPRForFloat)
3641 Reg = State.AllocateReg(ArgFPR64s);
3642 else if (ValVT.is128BitVector())
3643 Reg = State.AllocateReg(ArgVRs);
3644 else if (ValVT.is256BitVector())
3645 Reg = State.AllocateReg(ArgXRs);
3646 else
3647 Reg = State.AllocateReg(ArgGPRs);
3648
3649 unsigned StackOffset =
3650 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3651
3652 // If we reach this point and PendingLocs is non-empty, we must be at the
3653 // end of a split argument that must be passed indirectly.
3654 if (!PendingLocs.empty()) {
3655 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3656 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3657 for (auto &It : PendingLocs) {
3658 if (Reg)
3659 It.convertToReg(Reg);
3660 else
3661 It.convertToMem(StackOffset);
3662 State.addLoc(It);
3663 }
3664 PendingLocs.clear();
3665 PendingArgFlags.clear();
3666 return false;
3667 }
3668 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3669 "Expected an GRLenVT at this stage");
3670
3671 if (Reg) {
3672 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3673 return false;
3674 }
3675
3676 // When a floating-point value is passed on the stack, no bit-cast is needed.
3677 if (ValVT.isFloatingPoint()) {
3678 LocVT = ValVT;
3679 LocInfo = CCValAssign::Full;
3680 }
3681
3682 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3683 return false;
3684}
3685
3686void LoongArchTargetLowering::analyzeInputArgs(
3687 MachineFunction &MF, CCState &CCInfo,
3688 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3689 LoongArchCCAssignFn Fn) const {
3691 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3692 MVT ArgVT = Ins[i].VT;
3693 Type *ArgTy = nullptr;
3694 if (IsRet)
3695 ArgTy = FType->getReturnType();
3696 else if (Ins[i].isOrigArg())
3697 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3700 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3701 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3702 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3703 << '\n');
3704 llvm_unreachable("");
3705 }
3706 }
3707}
3708
3709void LoongArchTargetLowering::analyzeOutputArgs(
3710 MachineFunction &MF, CCState &CCInfo,
3711 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3712 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3713 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3714 MVT ArgVT = Outs[i].VT;
3715 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3718 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3719 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3720 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3721 << "\n");
3722 llvm_unreachable("");
3723 }
3724 }
3725}
3726
3727// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3728// values.
3730 const CCValAssign &VA, const SDLoc &DL) {
3731 switch (VA.getLocInfo()) {
3732 default:
3733 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3734 case CCValAssign::Full:
3736 break;
3737 case CCValAssign::BCvt:
3738 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3739 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3740 else
3741 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3742 break;
3743 }
3744 return Val;
3745}
3746
3748 const CCValAssign &VA, const SDLoc &DL,
3749 const LoongArchTargetLowering &TLI) {
3752 EVT LocVT = VA.getLocVT();
3753 SDValue Val;
3754 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3755 Register VReg = RegInfo.createVirtualRegister(RC);
3756 RegInfo.addLiveIn(VA.getLocReg(), VReg);
3757 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3758
3759 return convertLocVTToValVT(DAG, Val, VA, DL);
3760}
3761
3762// The caller is responsible for loading the full value if the argument is
3763// passed with CCValAssign::Indirect.
3765 const CCValAssign &VA, const SDLoc &DL) {
3767 MachineFrameInfo &MFI = MF.getFrameInfo();
3768 EVT ValVT = VA.getValVT();
3769 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3770 /*IsImmutable=*/true);
3771 SDValue FIN = DAG.getFrameIndex(
3773
3774 ISD::LoadExtType ExtType;
3775 switch (VA.getLocInfo()) {
3776 default:
3777 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3778 case CCValAssign::Full:
3780 case CCValAssign::BCvt:
3781 ExtType = ISD::NON_EXTLOAD;
3782 break;
3783 }
3784 return DAG.getExtLoad(
3785 ExtType, DL, VA.getLocVT(), Chain, FIN,
3787}
3788
3790 const CCValAssign &VA, const SDLoc &DL) {
3791 EVT LocVT = VA.getLocVT();
3792
3793 switch (VA.getLocInfo()) {
3794 default:
3795 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3796 case CCValAssign::Full:
3797 break;
3798 case CCValAssign::BCvt:
3799 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3800 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3801 else
3802 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3803 break;
3804 }
3805 return Val;
3806}
3807
3808static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3809 CCValAssign::LocInfo LocInfo,
3810 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3811 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3812 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3813 // s0 s1 s2 s3 s4 s5 s6 s7 s8
3814 static const MCPhysReg GPRList[] = {
3815 LoongArch::R23, LoongArch::R24, LoongArch::R25,
3816 LoongArch::R26, LoongArch::R27, LoongArch::R28,
3817 LoongArch::R29, LoongArch::R30, LoongArch::R31};
3818 if (unsigned Reg = State.AllocateReg(GPRList)) {
3819 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3820 return false;
3821 }
3822 }
3823
3824 if (LocVT == MVT::f32) {
3825 // Pass in STG registers: F1, F2, F3, F4
3826 // fs0,fs1,fs2,fs3
3827 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3828 LoongArch::F26, LoongArch::F27};
3829 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3830 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3831 return false;
3832 }
3833 }
3834
3835 if (LocVT == MVT::f64) {
3836 // Pass in STG registers: D1, D2, D3, D4
3837 // fs4,fs5,fs6,fs7
3838 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3839 LoongArch::F30_64, LoongArch::F31_64};
3840 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3841 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3842 return false;
3843 }
3844 }
3845
3846 report_fatal_error("No registers left in GHC calling convention");
3847 return true;
3848}
3849
3850// Transform physical registers into virtual registers.
3852 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3853 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3854 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3855
3857
3858 switch (CallConv) {
3859 default:
3860 llvm_unreachable("Unsupported calling convention");
3861 case CallingConv::C:
3862 case CallingConv::Fast:
3863 break;
3864 case CallingConv::GHC:
3865 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3866 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3868 "GHC calling convention requires the F and D extensions");
3869 }
3870
3871 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3872 MVT GRLenVT = Subtarget.getGRLenVT();
3873 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3874 // Used with varargs to acumulate store chains.
3875 std::vector<SDValue> OutChains;
3876
3877 // Assign locations to all of the incoming arguments.
3879 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3880
3881 if (CallConv == CallingConv::GHC)
3883 else
3884 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3885
3886 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3887 CCValAssign &VA = ArgLocs[i];
3888 SDValue ArgValue;
3889 if (VA.isRegLoc())
3890 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3891 else
3892 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3893 if (VA.getLocInfo() == CCValAssign::Indirect) {
3894 // If the original argument was split and passed by reference, we need to
3895 // load all parts of it here (using the same address).
3896 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3898 unsigned ArgIndex = Ins[i].OrigArgIndex;
3899 unsigned ArgPartOffset = Ins[i].PartOffset;
3900 assert(ArgPartOffset == 0);
3901 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3902 CCValAssign &PartVA = ArgLocs[i + 1];
3903 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3904 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3905 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3906 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3908 ++i;
3909 }
3910 continue;
3911 }
3912 InVals.push_back(ArgValue);
3913 }
3914
3915 if (IsVarArg) {
3917 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3918 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3919 MachineFrameInfo &MFI = MF.getFrameInfo();
3920 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3921 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3922
3923 // Offset of the first variable argument from stack pointer, and size of
3924 // the vararg save area. For now, the varargs save area is either zero or
3925 // large enough to hold a0-a7.
3926 int VaArgOffset, VarArgsSaveSize;
3927
3928 // If all registers are allocated, then all varargs must be passed on the
3929 // stack and we don't need to save any argregs.
3930 if (ArgRegs.size() == Idx) {
3931 VaArgOffset = CCInfo.getStackSize();
3932 VarArgsSaveSize = 0;
3933 } else {
3934 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3935 VaArgOffset = -VarArgsSaveSize;
3936 }
3937
3938 // Record the frame index of the first variable argument
3939 // which is a value necessary to VASTART.
3940 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3941 LoongArchFI->setVarArgsFrameIndex(FI);
3942
3943 // If saving an odd number of registers then create an extra stack slot to
3944 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3945 // offsets to even-numbered registered remain 2*GRLen-aligned.
3946 if (Idx % 2) {
3947 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3948 true);
3949 VarArgsSaveSize += GRLenInBytes;
3950 }
3951
3952 // Copy the integer registers that may have been used for passing varargs
3953 // to the vararg save area.
3954 for (unsigned I = Idx; I < ArgRegs.size();
3955 ++I, VaArgOffset += GRLenInBytes) {
3956 const Register Reg = RegInfo.createVirtualRegister(RC);
3957 RegInfo.addLiveIn(ArgRegs[I], Reg);
3958 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3959 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3960 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3961 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3963 cast<StoreSDNode>(Store.getNode())
3964 ->getMemOperand()
3965 ->setValue((Value *)nullptr);
3966 OutChains.push_back(Store);
3967 }
3968 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3969 }
3970
3971 // All stores are grouped in one node to allow the matching between
3972 // the size of Ins and InVals. This only happens for vararg functions.
3973 if (!OutChains.empty()) {
3974 OutChains.push_back(Chain);
3975 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3976 }
3977
3978 return Chain;
3979}
3980
3982 return CI->isTailCall();
3983}
3984
3985// Check if the return value is used as only a return value, as otherwise
3986// we can't perform a tail-call.
3988 SDValue &Chain) const {
3989 if (N->getNumValues() != 1)
3990 return false;
3991 if (!N->hasNUsesOfValue(1, 0))
3992 return false;
3993
3994 SDNode *Copy = *N->use_begin();
3995 if (Copy->getOpcode() != ISD::CopyToReg)
3996 return false;
3997
3998 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3999 // isn't safe to perform a tail call.
4000 if (Copy->getGluedNode())
4001 return false;
4002
4003 // The copy must be used by a LoongArchISD::RET, and nothing else.
4004 bool HasRet = false;
4005 for (SDNode *Node : Copy->uses()) {
4006 if (Node->getOpcode() != LoongArchISD::RET)
4007 return false;
4008 HasRet = true;
4009 }
4010
4011 if (!HasRet)
4012 return false;
4013
4014 Chain = Copy->getOperand(0);
4015 return true;
4016}
4017
4018// Check whether the call is eligible for tail call optimization.
4019bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4020 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4021 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4022
4023 auto CalleeCC = CLI.CallConv;
4024 auto &Outs = CLI.Outs;
4025 auto &Caller = MF.getFunction();
4026 auto CallerCC = Caller.getCallingConv();
4027
4028 // Do not tail call opt if the stack is used to pass parameters.
4029 if (CCInfo.getStackSize() != 0)
4030 return false;
4031
4032 // Do not tail call opt if any parameters need to be passed indirectly.
4033 for (auto &VA : ArgLocs)
4034 if (VA.getLocInfo() == CCValAssign::Indirect)
4035 return false;
4036
4037 // Do not tail call opt if either caller or callee uses struct return
4038 // semantics.
4039 auto IsCallerStructRet = Caller.hasStructRetAttr();
4040 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4041 if (IsCallerStructRet || IsCalleeStructRet)
4042 return false;
4043
4044 // Do not tail call opt if either the callee or caller has a byval argument.
4045 for (auto &Arg : Outs)
4046 if (Arg.Flags.isByVal())
4047 return false;
4048
4049 // The callee has to preserve all registers the caller needs to preserve.
4050 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4051 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4052 if (CalleeCC != CallerCC) {
4053 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4054 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4055 return false;
4056 }
4057 return true;
4058}
4059
4061 return DAG.getDataLayout().getPrefTypeAlign(
4062 VT.getTypeForEVT(*DAG.getContext()));
4063}
4064
4065// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4066// and output parameter nodes.
4067SDValue
4069 SmallVectorImpl<SDValue> &InVals) const {
4070 SelectionDAG &DAG = CLI.DAG;
4071 SDLoc &DL = CLI.DL;
4073 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4075 SDValue Chain = CLI.Chain;
4076 SDValue Callee = CLI.Callee;
4077 CallingConv::ID CallConv = CLI.CallConv;
4078 bool IsVarArg = CLI.IsVarArg;
4079 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4080 MVT GRLenVT = Subtarget.getGRLenVT();
4081 bool &IsTailCall = CLI.IsTailCall;
4082
4084
4085 // Analyze the operands of the call, assigning locations to each operand.
4087 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4088
4089 if (CallConv == CallingConv::GHC)
4090 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4091 else
4092 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4093
4094 // Check if it's really possible to do a tail call.
4095 if (IsTailCall)
4096 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4097
4098 if (IsTailCall)
4099 ++NumTailCalls;
4100 else if (CLI.CB && CLI.CB->isMustTailCall())
4101 report_fatal_error("failed to perform tail call elimination on a call "
4102 "site marked musttail");
4103
4104 // Get a count of how many bytes are to be pushed on the stack.
4105 unsigned NumBytes = ArgCCInfo.getStackSize();
4106
4107 // Create local copies for byval args.
4108 SmallVector<SDValue> ByValArgs;
4109 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4110 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4111 if (!Flags.isByVal())
4112 continue;
4113
4114 SDValue Arg = OutVals[i];
4115 unsigned Size = Flags.getByValSize();
4116 Align Alignment = Flags.getNonZeroByValAlign();
4117
4118 int FI =
4119 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4120 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4121 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4122
4123 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4124 /*IsVolatile=*/false,
4125 /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4127 ByValArgs.push_back(FIPtr);
4128 }
4129
4130 if (!IsTailCall)
4131 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4132
4133 // Copy argument values to their designated locations.
4135 SmallVector<SDValue> MemOpChains;
4136 SDValue StackPtr;
4137 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4138 CCValAssign &VA = ArgLocs[i];
4139 SDValue ArgValue = OutVals[i];
4140 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4141
4142 // Promote the value if needed.
4143 // For now, only handle fully promoted and indirect arguments.
4144 if (VA.getLocInfo() == CCValAssign::Indirect) {
4145 // Store the argument in a stack slot and pass its address.
4146 Align StackAlign =
4147 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4148 getPrefTypeAlign(ArgValue.getValueType(), DAG));
4149 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4150 // If the original argument was split and passed by reference, we need to
4151 // store the required parts of it here (and pass just one address).
4152 unsigned ArgIndex = Outs[i].OrigArgIndex;
4153 unsigned ArgPartOffset = Outs[i].PartOffset;
4154 assert(ArgPartOffset == 0);
4155 // Calculate the total size to store. We don't have access to what we're
4156 // actually storing other than performing the loop and collecting the
4157 // info.
4159 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4160 SDValue PartValue = OutVals[i + 1];
4161 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4162 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4163 EVT PartVT = PartValue.getValueType();
4164
4165 StoredSize += PartVT.getStoreSize();
4166 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4167 Parts.push_back(std::make_pair(PartValue, Offset));
4168 ++i;
4169 }
4170 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4171 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4172 MemOpChains.push_back(
4173 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4175 for (const auto &Part : Parts) {
4176 SDValue PartValue = Part.first;
4177 SDValue PartOffset = Part.second;
4179 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4180 MemOpChains.push_back(
4181 DAG.getStore(Chain, DL, PartValue, Address,
4183 }
4184 ArgValue = SpillSlot;
4185 } else {
4186 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4187 }
4188
4189 // Use local copy if it is a byval arg.
4190 if (Flags.isByVal())
4191 ArgValue = ByValArgs[j++];
4192
4193 if (VA.isRegLoc()) {
4194 // Queue up the argument copies and emit them at the end.
4195 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4196 } else {
4197 assert(VA.isMemLoc() && "Argument not register or memory");
4198 assert(!IsTailCall && "Tail call not allowed if stack is used "
4199 "for passing parameters");
4200
4201 // Work out the address of the stack slot.
4202 if (!StackPtr.getNode())
4203 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4205 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4207
4208 // Emit the store.
4209 MemOpChains.push_back(
4210 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4211 }
4212 }
4213
4214 // Join the stores, which are independent of one another.
4215 if (!MemOpChains.empty())
4216 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4217
4218 SDValue Glue;
4219
4220 // Build a sequence of copy-to-reg nodes, chained and glued together.
4221 for (auto &Reg : RegsToPass) {
4222 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4223 Glue = Chain.getValue(1);
4224 }
4225
4226 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4227 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4228 // split it and then direct call can be matched by PseudoCALL.
4229 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4230 const GlobalValue *GV = S->getGlobal();
4231 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
4234 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4235 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4236 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
4239 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4240 }
4241
4242 // The first call operand is the chain and the second is the target address.
4244 Ops.push_back(Chain);
4245 Ops.push_back(Callee);
4246
4247 // Add argument registers to the end of the list so that they are
4248 // known live into the call.
4249 for (auto &Reg : RegsToPass)
4250 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4251
4252 if (!IsTailCall) {
4253 // Add a register mask operand representing the call-preserved registers.
4254 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4255 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4256 assert(Mask && "Missing call preserved mask for calling convention");
4257 Ops.push_back(DAG.getRegisterMask(Mask));
4258 }
4259
4260 // Glue the call to the argument copies, if any.
4261 if (Glue.getNode())
4262 Ops.push_back(Glue);
4263
4264 // Emit the call.
4265 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4266 unsigned Op;
4267 switch (DAG.getTarget().getCodeModel()) {
4268 default:
4269 report_fatal_error("Unsupported code model");
4270 case CodeModel::Small:
4271 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
4272 break;
4273 case CodeModel::Medium:
4274 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
4276 break;
4277 case CodeModel::Large:
4278 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4280 break;
4281 }
4282
4283 if (IsTailCall) {
4285 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
4286 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
4287 return Ret;
4288 }
4289
4290 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
4291 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4292 Glue = Chain.getValue(1);
4293
4294 // Mark the end of the call, which is glued to the call itself.
4295 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
4296 Glue = Chain.getValue(1);
4297
4298 // Assign locations to each value returned by this call.
4300 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4301 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
4302
4303 // Copy all of the result registers out of their specified physreg.
4304 for (auto &VA : RVLocs) {
4305 // Copy the value out.
4306 SDValue RetValue =
4307 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4308 // Glue the RetValue to the end of the call sequence.
4309 Chain = RetValue.getValue(1);
4310 Glue = RetValue.getValue(2);
4311
4312 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4313
4314 InVals.push_back(RetValue);
4315 }
4316
4317 return Chain;
4318}
4319
4321 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4322 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4324 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4325
4326 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4327 LoongArchABI::ABI ABI =
4328 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4329 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
4330 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4331 nullptr))
4332 return false;
4333 }
4334 return true;
4335}
4336
4338 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4340 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4341 SelectionDAG &DAG) const {
4342 // Stores the assignment of the return value to a location.
4344
4345 // Info about the registers and stack slot.
4346 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4347 *DAG.getContext());
4348
4349 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4350 nullptr, CC_LoongArch);
4351 if (CallConv == CallingConv::GHC && !RVLocs.empty())
4352 report_fatal_error("GHC functions return void only");
4353 SDValue Glue;
4354 SmallVector<SDValue, 4> RetOps(1, Chain);
4355
4356 // Copy the result values into the output registers.
4357 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4358 CCValAssign &VA = RVLocs[i];
4359 assert(VA.isRegLoc() && "Can only return in registers!");
4360
4361 // Handle a 'normal' return.
4362 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
4363 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4364
4365 // Guarantee that all emitted copies are stuck together.
4366 Glue = Chain.getValue(1);
4367 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4368 }
4369
4370 RetOps[0] = Chain; // Update chain.
4371
4372 // Add the glue node if we have it.
4373 if (Glue.getNode())
4374 RetOps.push_back(Glue);
4375
4376 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4377}
4378
4379bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4380 bool ForCodeSize) const {
4381 // TODO: Maybe need more checks here after vector extension is supported.
4382 if (VT == MVT::f32 && !Subtarget.hasBasicF())
4383 return false;
4384 if (VT == MVT::f64 && !Subtarget.hasBasicD())
4385 return false;
4386 return (Imm.isZero() || Imm.isExactlyValue(+1.0));
4387}
4388
4390 return true;
4391}
4392
4394 return true;
4395}
4396
4397bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4398 const Instruction *I) const {
4399 if (!Subtarget.is64Bit())
4400 return isa<LoadInst>(I) || isa<StoreInst>(I);
4401
4402 if (isa<LoadInst>(I))
4403 return true;
4404
4405 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4406 // require fences beacuse we can use amswap_db.[w/d].
4407 if (isa<StoreInst>(I)) {
4408 unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
4409 return (Size == 8 || Size == 16);
4410 }
4411
4412 return false;
4413}
4414
4416 LLVMContext &Context,
4417 EVT VT) const {
4418 if (!VT.isVector())
4419 return getPointerTy(DL);
4421}
4422
4424 // TODO: Support vectors.
4425 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
4426}
4427
4429 const CallInst &I,
4430 MachineFunction &MF,
4431 unsigned Intrinsic) const {
4432 switch (Intrinsic) {
4433 default:
4434 return false;
4435 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4436 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4437 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4438 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4440 Info.memVT = MVT::i32;
4441 Info.ptrVal = I.getArgOperand(0);
4442 Info.offset = 0;
4443 Info.align = Align(4);
4446 return true;
4447 // TODO: Add more Intrinsics later.
4448 }
4449}
4450
4453 // TODO: Add more AtomicRMWInst that needs to be extended.
4454
4455 // Since floating-point operation requires a non-trivial set of data
4456 // operations, use CmpXChg to expand.
4457 if (AI->isFloatingPointOperation() ||
4461
4462 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4463 if (Size == 8 || Size == 16)
4466}
4467
4468static Intrinsic::ID
4470 AtomicRMWInst::BinOp BinOp) {
4471 if (GRLen == 64) {
4472 switch (BinOp) {
4473 default:
4474 llvm_unreachable("Unexpected AtomicRMW BinOp");
4476 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4477 case AtomicRMWInst::Add:
4478 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4479 case AtomicRMWInst::Sub:
4480 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4482 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4484 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4486 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4487 case AtomicRMWInst::Max:
4488 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4489 case AtomicRMWInst::Min:
4490 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4491 // TODO: support other AtomicRMWInst.
4492 }
4493 }
4494
4495 if (GRLen == 32) {
4496 switch (BinOp) {
4497 default:
4498 llvm_unreachable("Unexpected AtomicRMW BinOp");
4500 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4501 case AtomicRMWInst::Add:
4502 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4503 case AtomicRMWInst::Sub:
4504 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4506 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4507 // TODO: support other AtomicRMWInst.
4508 }
4509 }
4510
4511 llvm_unreachable("Unexpected GRLen\n");
4512}
4513
4516 AtomicCmpXchgInst *CI) const {
4518 if (Size == 8 || Size == 16)
4521}
4522
4524 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4525 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4526 AtomicOrdering FailOrd = CI->getFailureOrdering();
4527 Value *FailureOrdering =
4528 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
4529
4530 // TODO: Support cmpxchg on LA32.
4531 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4532 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4533 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4534 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4535 Type *Tys[] = {AlignedAddr->getType()};
4536 Function *MaskedCmpXchg =
4537 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4538 Value *Result = Builder.CreateCall(
4539 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
4540 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4541 return Result;
4542}
4543
4545 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4546 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4547 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4548 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4549 // mask, as this produces better code than the LL/SC loop emitted by
4550 // int_loongarch_masked_atomicrmw_xchg.
4551 if (AI->getOperation() == AtomicRMWInst::Xchg &&
4552 isa<ConstantInt>(AI->getValOperand())) {
4553 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4554 if (CVal->isZero())
4555 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4556 Builder.CreateNot(Mask, "Inv_Mask"),
4557 AI->getAlign(), Ord);
4558 if (CVal->isMinusOne())
4559 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4560 AI->getAlign(), Ord);
4561 }
4562
4563 unsigned GRLen = Subtarget.getGRLen();
4564 Value *Ordering =
4565 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
4566 Type *Tys[] = {AlignedAddr->getType()};
4567 Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4568 AI->getModule(),
4570
4571 if (GRLen == 64) {
4572 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4573 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4574 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4575 }
4576
4577 Value *Result;
4578
4579 // Must pass the shift amount needed to sign extend the loaded value prior
4580 // to performing a signed comparison for min/max. ShiftAmt is the number of
4581 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4582 // is the number of bits to left+right shift the value in order to
4583 // sign-extend.
4584 if (AI->getOperation() == AtomicRMWInst::Min ||
4586 const DataLayout &DL = AI->getModule()->getDataLayout();
4587 unsigned ValWidth =
4588 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4589 Value *SextShamt =
4590 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
4591 Result = Builder.CreateCall(LlwOpScwLoop,
4592 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4593 } else {
4594 Result =
4595 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4596 }
4597
4598 if (GRLen == 64)
4599 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4600 return Result;
4601}
4602
4604 const MachineFunction &MF, EVT VT) const {
4605 VT = VT.getScalarType();
4606
4607 if (!VT.isSimple())
4608 return false;
4609
4610 switch (VT.getSimpleVT().SimpleTy) {
4611 case MVT::f32:
4612 case MVT::f64:
4613 return true;
4614 default:
4615 break;
4616 }
4617
4618 return false;
4619}
4620
4622 const Constant *PersonalityFn) const {
4623 return LoongArch::R4;
4624}
4625
4627 const Constant *PersonalityFn) const {
4628 return LoongArch::R5;
4629}
4630
4631//===----------------------------------------------------------------------===//
4632// LoongArch Inline Assembly Support
4633//===----------------------------------------------------------------------===//
4634
4636LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4637 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4638 //
4639 // 'f': A floating-point register (if available).
4640 // 'k': A memory operand whose address is formed by a base register and
4641 // (optionally scaled) index register.
4642 // 'l': A signed 16-bit constant.
4643 // 'm': A memory operand whose address is formed by a base register and
4644 // offset that is suitable for use in instructions with the same
4645 // addressing mode as st.w and ld.w.
4646 // 'I': A signed 12-bit constant (for arithmetic instructions).
4647 // 'J': Integer zero.
4648 // 'K': An unsigned 12-bit constant (for logic instructions).
4649 // "ZB": An address that is held in a general-purpose register. The offset is
4650 // zero.
4651 // "ZC": A memory operand whose address is formed by a base register and
4652 // offset that is suitable for use in instructions with the same
4653 // addressing mode as ll.w and sc.w.
4654 if (Constraint.size() == 1) {
4655 switch (Constraint[0]) {
4656 default:
4657 break;
4658 case 'f':
4659 return C_RegisterClass;
4660 case 'l':
4661 case 'I':
4662 case 'J':
4663 case 'K':
4664 return C_Immediate;
4665 case 'k':
4666 return C_Memory;
4667 }
4668 }
4669
4670 if (Constraint == "ZC" || Constraint == "ZB")
4671 return C_Memory;
4672
4673 // 'm' is handled here.
4674 return TargetLowering::getConstraintType(Constraint);
4675}
4676
4677InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4678 StringRef ConstraintCode) const {
4679 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4684}
4685
4686std::pair<unsigned, const TargetRegisterClass *>
4687LoongArchTargetLowering::getRegForInlineAsmConstraint(
4688 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4689 // First, see if this is a constraint that directly corresponds to a LoongArch
4690 // register class.
4691 if (Constraint.size() == 1) {
4692 switch (Constraint[0]) {
4693 case 'r':
4694 // TODO: Support fixed vectors up to GRLen?
4695 if (VT.isVector())
4696 break;
4697 return std::make_pair(0U, &LoongArch::GPRRegClass);
4698 case 'f':
4699 if (Subtarget.hasBasicF() && VT == MVT::f32)
4700 return std::make_pair(0U, &LoongArch::FPR32RegClass);
4701 if (Subtarget.hasBasicD() && VT == MVT::f64)
4702 return std::make_pair(0U, &LoongArch::FPR64RegClass);
4703 if (Subtarget.hasExtLSX() &&
4704 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
4705 return std::make_pair(0U, &LoongArch::LSX128RegClass);
4706 if (Subtarget.hasExtLASX() &&
4707 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
4708 return std::make_pair(0U, &LoongArch::LASX256RegClass);
4709 break;
4710 default:
4711 break;
4712 }
4713 }
4714
4715 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
4716 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
4717 // constraints while the official register name is prefixed with a '$'. So we
4718 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
4719 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
4720 // case insensitive, so no need to convert the constraint to upper case here.
4721 //
4722 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
4723 // decode the usage of register name aliases into their official names. And
4724 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
4725 // official register names.
4726 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
4727 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
4728 bool IsFP = Constraint[2] == 'f';
4729 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
4730 std::pair<unsigned, const TargetRegisterClass *> R;
4732 TRI, join_items("", Temp.first, Temp.second), VT);
4733 // Match those names to the widest floating point register type available.
4734 if (IsFP) {
4735 unsigned RegNo = R.first;
4736 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
4737 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
4738 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
4739 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
4740 }
4741 }
4742 }
4743 return R;
4744 }
4745
4746 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4747}
4748
4749void LoongArchTargetLowering::LowerAsmOperandForConstraint(
4750 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
4751 SelectionDAG &DAG) const {
4752 // Currently only support length 1 constraints.
4753 if (Constraint.size() == 1) {
4754 switch (Constraint[0]) {
4755 case 'l':
4756 // Validate & create a 16-bit signed immediate operand.
4757 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4758 uint64_t CVal = C->getSExtValue();
4759 if (isInt<16>(CVal))
4760 Ops.push_back(
4761 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4762 }
4763 return;
4764 case 'I':
4765 // Validate & create a 12-bit signed immediate operand.
4766 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4767 uint64_t CVal = C->getSExtValue();
4768 if (isInt<12>(CVal))
4769 Ops.push_back(
4770 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4771 }
4772 return;
4773 case 'J':
4774 // Validate & create an integer zero operand.
4775 if (auto *C = dyn_cast<ConstantSDNode>(Op))
4776 if (C->getZExtValue() == 0)
4777 Ops.push_back(
4778 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
4779 return;
4780 case 'K':
4781 // Validate & create a 12-bit unsigned immediate operand.
4782 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4783 uint64_t CVal = C->getZExtValue();
4784 if (isUInt<12>(CVal))
4785 Ops.push_back(
4786 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4787 }
4788 return;
4789 default:
4790 break;
4791 }
4792 }
4793 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4794}
4795
4796#define GET_REGISTER_MATCHER
4797#include "LoongArchGenAsmMatcher.inc"
4798
4801 const MachineFunction &MF) const {
4802 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
4803 std::string NewRegName = Name.second.str();
4804 Register Reg = MatchRegisterAltName(NewRegName);
4805 if (Reg == LoongArch::NoRegister)
4806 Reg = MatchRegisterName(NewRegName);
4807 if (Reg == LoongArch::NoRegister)
4809 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4810 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4811 if (!ReservedRegs.test(Reg))
4812 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4813 StringRef(RegName) + "\"."));
4814 return Reg;
4815}
4816
4818 EVT VT, SDValue C) const {
4819 // TODO: Support vectors.
4820 if (!VT.isScalarInteger())
4821 return false;
4822
4823 // Omit the optimization if the data size exceeds GRLen.
4824 if (VT.getSizeInBits() > Subtarget.getGRLen())
4825 return false;
4826
4827 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4828 const APInt &Imm = ConstNode->getAPIntValue();
4829 // Break MUL into (SLLI + ADD/SUB) or ALSL.
4830 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4831 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4832 return true;
4833 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
4834 if (ConstNode->hasOneUse() &&
4835 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
4836 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
4837 return true;
4838 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
4839 // in which the immediate has two set bits. Or Break (MUL x, imm)
4840 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
4841 // equals to (1 << s0) - (1 << s1).
4842 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
4843 unsigned Shifts = Imm.countr_zero();
4844 // Reject immediates which can be composed via a single LUI.
4845 if (Shifts >= 12)
4846 return false;
4847 // Reject multiplications can be optimized to
4848 // (SLLI (ALSL x, x, 1/2/3/4), s).
4849 APInt ImmPop = Imm.ashr(Shifts);
4850 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
4851 return false;
4852 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
4853 // since it needs one more instruction than other 3 cases.
4854 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
4855 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
4856 (ImmSmall - Imm).isPowerOf2())
4857 return true;
4858 }
4859 }
4860
4861 return false;
4862}
4863
4865 const AddrMode &AM,
4866 Type *Ty, unsigned AS,
4867 Instruction *I) const {
4868 // LoongArch has four basic addressing modes:
4869 // 1. reg
4870 // 2. reg + 12-bit signed offset
4871 // 3. reg + 14-bit signed offset left-shifted by 2
4872 // 4. reg1 + reg2
4873 // TODO: Add more checks after support vector extension.
4874
4875 // No global is ever allowed as a base.
4876 if (AM.BaseGV)
4877 return false;
4878
4879 // Require a 12 or 14 bit signed offset.
4880 if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs))
4881 return false;
4882
4883 switch (AM.Scale) {
4884 case 0:
4885 // "i" is not allowed.
4886 if (!AM.HasBaseReg)
4887 return false;
4888 // Otherwise we have "r+i".
4889 break;
4890 case 1:
4891 // "r+r+i" is not allowed.
4892 if (AM.HasBaseReg && AM.BaseOffs != 0)
4893 return false;
4894 // Otherwise we have "r+r" or "r+i".
4895 break;
4896 case 2:
4897 // "2*r+r" or "2*r+i" is not allowed.
4898 if (AM.HasBaseReg || AM.BaseOffs)
4899 return false;
4900 // Otherwise we have "r+r".
4901 break;
4902 default:
4903 return false;
4904 }
4905
4906 return true;
4907}
4908
4910 return isInt<12>(Imm);
4911}
4912
4914 return isInt<12>(Imm);
4915}
4916
4918 // Zexts are free if they can be combined with a load.
4919 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
4920 // poorly with type legalization of compares preferring sext.
4921 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
4922 EVT MemVT = LD->getMemoryVT();
4923 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
4924 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
4925 LD->getExtensionType() == ISD::ZEXTLOAD))
4926 return true;
4927 }
4928
4929 return TargetLowering::isZExtFree(Val, VT2);
4930}
4931
4933 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
4934}
4935
4937 // TODO: Support vectors.
4938 if (Y.getValueType().isVector())
4939 return false;
4940
4941 return !isa<ConstantSDNode>(Y);
4942}
4943
4945 // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
4946 return ISD::SIGN_EXTEND;
4947}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define NODE_NAME_CASE(node)
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
const MCPhysReg ArgVRs[]
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:76
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:638
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:867
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
bool isFloatingPointOperation() const
Definition: Instructions.h:922
BinOp getOperation() const
Definition: Instructions.h:845
Value * getValOperand()
Definition: Instructions.h:914
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:887
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:217
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:201
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
bool isDSOLocal() const
Definition: GlobalValue.h:305
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2007
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2023
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1749
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2402
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
Class to represent integer types.
Definition: DerivedTypes.h:40
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:477
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:731
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:827
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:471
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:472
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:772
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:468
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:798
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:484
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:553
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:979
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1228
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1233
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:978
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1211
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1146
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1469
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:258
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:174
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:208
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
Align getNonZeroOrigAlign() const
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)