LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
248
249 if (Subtarget.is64Bit())
251
252 if (!Subtarget.hasBasicD()) {
254 if (Subtarget.is64Bit()) {
257 }
258 }
259 }
260
261 // Set operations for 'D' feature.
262
263 if (Subtarget.hasBasicD()) {
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
266 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
269 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
270 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
271
291 Subtarget.isSoftFPABI() ? LibCall : Custom);
294 Subtarget.isSoftFPABI() ? LibCall : Custom);
295
296 if (Subtarget.is64Bit())
298 }
299
300 // Set operations for 'LSX' feature.
301
302 if (Subtarget.hasExtLSX()) {
304 // Expand all truncating stores and extending loads.
305 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
306 setTruncStoreAction(VT, InnerVT, Expand);
309 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
310 }
311 // By default everything must be expanded. Then we will selectively turn
312 // on ones that can be effectively codegen'd.
313 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
315 }
316
317 for (MVT VT : LSXVTs) {
321
325
330 }
331 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
334 Legal);
336 VT, Legal);
343 Expand);
358 }
359 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
361 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
363 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
366 }
368 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
376 VT, Expand);
384 }
386 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
388 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
389 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
390
391 for (MVT VT :
392 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
393 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
403 }
406 // We want to legalize this to an f64 load rather than an i64 load.
407 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
408 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i16})
410 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v16i32, MVT::v8i64,
411 MVT::v16i64})
413 }
414
415 // Set operations for 'LASX' feature.
416
417 if (Subtarget.hasExtLASX()) {
418 for (MVT VT : LASXVTs) {
422
428
432 }
433 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
436 Legal);
438 VT, Legal);
445 Expand);
461 }
462 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
464 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
466 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
469 }
470 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
478 VT, Expand);
486 }
489 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
492 }
493 for (MVT VT :
494 {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
497 }
498 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
500 }
501
502 // Set DAG combine for LA32 and LA64.
503 if (Subtarget.hasBasicF()) {
505 }
506
511
512 // Set DAG combine for 'LSX' feature.
513
514 if (Subtarget.hasExtLSX()) {
524 }
525
526 // Set DAG combine for 'LASX' feature.
527 if (Subtarget.hasExtLASX()) {
532 }
533
534 // Compute derived properties from the register classes.
535 computeRegisterProperties(Subtarget.getRegisterInfo());
536
538
541
542 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
543
545
546 // Function alignments.
548 // Set preferred alignments.
549 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
550 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
551 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
552
553 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
554 if (Subtarget.hasLAMCAS())
556
557 if (Subtarget.hasSCQ()) {
560 }
561
562 // Disable strict node mutation.
563 IsStrictFPEnabled = true;
564}
565
567 const GlobalAddressSDNode *GA) const {
568 // In order to maximise the opportunity for common subexpression elimination,
569 // keep a separate ADD node for the global address offset instead of folding
570 // it in the global address node. Later peephole optimisations may choose to
571 // fold it back in when profitable.
572 return false;
573}
574
576 SelectionDAG &DAG) const {
577 switch (Op.getOpcode()) {
579 return lowerATOMIC_FENCE(Op, DAG);
581 return lowerEH_DWARF_CFA(Op, DAG);
583 return lowerGlobalAddress(Op, DAG);
585 return lowerGlobalTLSAddress(Op, DAG);
587 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
589 return lowerINTRINSIC_W_CHAIN(Op, DAG);
591 return lowerINTRINSIC_VOID(Op, DAG);
593 return lowerBlockAddress(Op, DAG);
594 case ISD::JumpTable:
595 return lowerJumpTable(Op, DAG);
596 case ISD::SHL_PARTS:
597 return lowerShiftLeftParts(Op, DAG);
598 case ISD::SRA_PARTS:
599 return lowerShiftRightParts(Op, DAG, true);
600 case ISD::SRL_PARTS:
601 return lowerShiftRightParts(Op, DAG, false);
603 return lowerConstantPool(Op, DAG);
604 case ISD::FP_TO_SINT:
605 return lowerFP_TO_SINT(Op, DAG);
606 case ISD::FP_TO_UINT:
607 return lowerFP_TO_UINT(Op, DAG);
608 case ISD::BITCAST:
609 return lowerBITCAST(Op, DAG);
610 case ISD::UINT_TO_FP:
611 return lowerUINT_TO_FP(Op, DAG);
612 case ISD::SINT_TO_FP:
613 return lowerSINT_TO_FP(Op, DAG);
614 case ISD::VASTART:
615 return lowerVASTART(Op, DAG);
616 case ISD::FRAMEADDR:
617 return lowerFRAMEADDR(Op, DAG);
618 case ISD::RETURNADDR:
619 return lowerRETURNADDR(Op, DAG);
621 return lowerSET_ROUNDING(Op, DAG);
623 return lowerWRITE_REGISTER(Op, DAG);
625 return lowerINSERT_VECTOR_ELT(Op, DAG);
627 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
629 return lowerBUILD_VECTOR(Op, DAG);
631 return lowerCONCAT_VECTORS(Op, DAG);
633 return lowerVECTOR_SHUFFLE(Op, DAG);
634 case ISD::BITREVERSE:
635 return lowerBITREVERSE(Op, DAG);
637 return lowerSCALAR_TO_VECTOR(Op, DAG);
638 case ISD::PREFETCH:
639 return lowerPREFETCH(Op, DAG);
640 case ISD::SELECT:
641 return lowerSELECT(Op, DAG);
642 case ISD::BRCOND:
643 return lowerBRCOND(Op, DAG);
644 case ISD::FP_TO_FP16:
645 return lowerFP_TO_FP16(Op, DAG);
646 case ISD::FP16_TO_FP:
647 return lowerFP16_TO_FP(Op, DAG);
648 case ISD::FP_TO_BF16:
649 return lowerFP_TO_BF16(Op, DAG);
650 case ISD::BF16_TO_FP:
651 return lowerBF16_TO_FP(Op, DAG);
653 return lowerVECREDUCE_ADD(Op, DAG);
654 case ISD::ROTL:
655 case ISD::ROTR:
656 return lowerRotate(Op, DAG);
664 return lowerVECREDUCE(Op, DAG);
665 case ISD::ConstantFP:
666 return lowerConstantFP(Op, DAG);
667 case ISD::SETCC:
668 return lowerSETCC(Op, DAG);
669 case ISD::FP_ROUND:
670 return lowerFP_ROUND(Op, DAG);
671 case ISD::FP_EXTEND:
672 return lowerFP_EXTEND(Op, DAG);
674 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
676 return lowerDYNAMIC_STACKALLOC(Op, DAG);
677 }
678 return SDValue();
679}
680
681// Helper to attempt to return a cheaper, bit-inverted version of \p V.
683 // TODO: don't always ignore oneuse constraints.
684 V = peekThroughBitcasts(V);
685 EVT VT = V.getValueType();
686
687 // Match not(xor X, -1) -> X.
688 if (V.getOpcode() == ISD::XOR &&
689 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
690 isAllOnesConstant(V.getOperand(1))))
691 return V.getOperand(0);
692
693 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
694 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
695 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
696 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
697 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
698 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
699 V.getOperand(1));
700 }
701 }
702
703 // Match not(SplatVector(not(X)) -> SplatVector(X).
704 if (V.getOpcode() == ISD::BUILD_VECTOR) {
705 if (SDValue SplatValue =
706 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
707 if (!V->isOnlyUserOf(SplatValue.getNode()))
708 return SDValue();
709
710 if (SDValue Not = isNOT(SplatValue, DAG)) {
711 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
712 return DAG.getSplat(VT, SDLoc(Not), Not);
713 }
714 }
715 }
716
717 // Match not(or(not(X),not(Y))) -> and(X, Y).
718 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
719 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
720 // TODO: Handle cases with single NOT operand -> VANDN
721 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
722 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
723 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
724 DAG.getBitcast(VT, Op1));
725 }
726
727 // TODO: Add more matching patterns. Such as,
728 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
729 // not(slt(C, X)) -> slt(X - 1, C)
730 return SDValue();
731}
732
733// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
734// LoongArchISD::VFCVT. For example:
735// x1 = fp_round x, 0
736// y1 = fp_round y, 0
737// z = concat_vectors x1, y1
738// Or
739// x1 = LoongArch::VFCVT undef, x
740// y1 = LoongArch::VFCVT undef, y
741// z = LoongArchISD::VPACKEV y1, x1; or LoongArchISD::VPERMI y1, x1, 68
742// can be combined to:
743// z = LoongArch::VFCVT y, x
745 const LoongArchSubtarget &Subtarget) {
746 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
747 (N->getOpcode() == LoongArchISD::VPACKEV) ||
748 (N->getOpcode() == LoongArchISD::VPERMI)) &&
749 "Invalid Node");
750
751 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
752 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
753 unsigned Opcode0 = Op0.getOpcode();
754 unsigned Opcode1 = Op1.getOpcode();
755 if (Opcode0 != Opcode1)
756 return SDValue();
757
758 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
759 return SDValue();
760
761 // Check if two nodes have only one use.
762 if (!Op0.hasOneUse() || !Op1.hasOneUse())
763 return SDValue();
764
765 EVT VT = N.getValueType();
766 EVT SVT0 = Op0.getValueType();
767 EVT SVT1 = Op1.getValueType();
768 // Check if two nodes have the same result type.
769 if (SVT0 != SVT1)
770 return SDValue();
771
772 // Check if two nodes have the same operand type.
773 EVT SSVT0 = Op0.getOperand(0).getValueType();
774 EVT SSVT1 = Op1.getOperand(0).getValueType();
775 if (SSVT0 != SSVT1)
776 return SDValue();
777
778 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
779 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
780 SSVT0 == MVT::v4f64) {
781 // A vector_shuffle is required in the final step, as xvfcvt instruction
782 // operates on each 128-bit segament as a lane.
783 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
784 Op1.getOperand(0), Op0.getOperand(0));
785 SDValue Undef = DAG.getUNDEF(Res.getValueType());
786 // After VFCVT, the high part of Res comes from the high parts of Op0 and
787 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
788 // the desired order requires Op0 to fully occupy the lower half and Op1
789 // the upper half of Res. The Mask reorders the elements of Res to achieve
790 // this:
791 // - The first four elements (0, 1, 4, 5) come from Op0.
792 // - The next four elements (2, 3, 6, 7) come from Op1.
793 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
794 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
795 return DAG.getBitcast(VT, Res);
796 }
797 }
798
799 if ((N->getOpcode() == LoongArchISD::VPACKEV ||
800 N->getOpcode() == LoongArchISD::VPERMI) &&
801 Opcode0 == LoongArchISD::VFCVT) {
802 // For VPACKEV or VPERMI, check if the first operation of VFCVT is undef.
803 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
804 return SDValue();
805
806 if (!Subtarget.hasExtLSX() || SVT0 != MVT::v4f32 || SSVT0 != MVT::v2f64)
807 return SDValue();
808
809 if (N->getOpcode() == LoongArchISD::VPACKEV &&
810 (VT == MVT::v2i64 || VT == MVT::v2f64)) {
811 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
812 Op0.getOperand(1), Op1.getOperand(1));
813 return DAG.getBitcast(VT, Res);
814 }
815
816 if (N->getOpcode() == LoongArchISD::VPERMI && VT == MVT::v4f32) {
817 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
818 if (Imm != 68)
819 return SDValue();
820 return DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Op0.getOperand(1),
821 Op1.getOperand(1));
822 }
823 }
824
825 return SDValue();
826}
827
828SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
829 SelectionDAG &DAG) const {
830 SDLoc DL(Op);
831 SDValue In = Op.getOperand(0);
832 MVT VT = Op.getSimpleValueType();
833 MVT SVT = In.getSimpleValueType();
834
835 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
836 SDValue Lo, Hi;
837 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
838 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
839 }
840
841 return SDValue();
842}
843
844SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
845 SelectionDAG &DAG) const {
846
847 SDLoc DL(Op);
848 EVT VT = Op.getValueType();
849 SDValue Src = Op->getOperand(0);
850 EVT SVT = Src.getValueType();
851
852 bool V2F32ToV2F64 =
853 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
854 bool V4F32ToV4F64 =
855 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
856 if (!V2F32ToV2F64 && !V4F32ToV4F64)
857 return SDValue();
858
859 // Check if Op is the high part of vector.
860 auto CheckVecHighPart = [](SDValue Op) {
862 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
863 SDValue SOp = Op.getOperand(0);
864 EVT SVT = SOp.getValueType();
865 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
866 return SDValue();
867
868 const uint64_t Imm = Op.getConstantOperandVal(1);
869 if (Imm == SVT.getVectorNumElements() / 2)
870 return SOp;
871 return SDValue();
872 }
873 return SDValue();
874 };
875
876 unsigned Opcode;
877 SDValue VFCVTOp;
878 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
879 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
880
881 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
882 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
883 if (SDValue V = CheckVecHighPart(Src)) {
884 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
885 "Unexpected wide vector");
886 Opcode = LoongArchISD::VFCVTH;
887 VFCVTOp = DAG.getBitcast(WideOpVT, V);
888 } else {
889 Opcode = LoongArchISD::VFCVTL;
890 VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
891 DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
892 }
893
894 // v2f64 = fp_extend v2f32
895 if (V2F32ToV2F64)
896 return DAG.getNode(Opcode, DL, VT, VFCVTOp);
897
898 // v4f64 = fp_extend v4f32
899 if (V4F32ToV4F64) {
900 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
901 // vector_shuffle is required firstly.
902 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
903 SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
904 DAG.getUNDEF(WideOpVT), Mask);
905 Res = DAG.getNode(Opcode, DL, VT, Res);
906 return Res;
907 }
908
909 return SDValue();
910}
911
912SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
913 SelectionDAG &DAG) const {
914 EVT VT = Op.getValueType();
915 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
916 const APFloat &FPVal = CFP->getValueAPF();
917 SDLoc DL(CFP);
918
919 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
920 (VT == MVT::f64 && Subtarget.hasBasicD()));
921
922 // If value is 0.0 or -0.0, just ignore it.
923 if (FPVal.isZero())
924 return SDValue();
925
926 // If lsx enabled, use cheaper 'vldi' instruction if possible.
927 if (isFPImmVLDILegal(FPVal, VT))
928 return SDValue();
929
930 // Construct as integer, and move to float register.
931 APInt INTVal = FPVal.bitcastToAPInt();
932
933 // If more than MaterializeFPImmInsNum instructions will be used to
934 // generate the INTVal and move it to float register, fallback to
935 // use floating point load from the constant pool.
937 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
938 if (InsNum > MaterializeFPImmInsNum && !FPVal.isOne())
939 return SDValue();
940
941 switch (VT.getSimpleVT().SimpleTy) {
942 default:
943 llvm_unreachable("Unexpected floating point type!");
944 break;
945 case MVT::f32: {
946 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
947 if (Subtarget.is64Bit())
948 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
949 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
950 : LoongArchISD::MOVGR2FR_W,
951 DL, VT, NewVal);
952 }
953 case MVT::f64: {
954 if (Subtarget.is64Bit()) {
955 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
956 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
957 }
958 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
959 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
960 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
961 }
962 }
963
964 return SDValue();
965}
966
967// Ensure SETCC result and operand have the same bit width; isel does not
968// support mismatched widths.
969SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
970 SelectionDAG &DAG) const {
971 SDLoc DL(Op);
972 EVT ResultVT = Op.getValueType();
973 EVT OperandVT = Op.getOperand(0).getValueType();
974
975 EVT SetCCResultVT =
976 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
977
978 if (ResultVT == SetCCResultVT)
979 return Op;
980
981 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
982 "SETCC operands must have the same type!");
983
984 SDValue SetCCNode =
985 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
986 Op.getOperand(1), Op.getOperand(2));
987
988 if (ResultVT.bitsGT(SetCCResultVT))
989 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
990 else if (ResultVT.bitsLT(SetCCResultVT))
991 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
992
993 return SetCCNode;
994}
995
996// Lower sext_invec using vslti instructions.
997// For example:
998// %b = sext <4 x i16> %a to <4 x i32>
999// can be lowered to:
1000// VSLTI_H vr2, vr1, 0
1001// VILVL.H vr1, vr2, vr1
1002SDValue LoongArchTargetLowering::lowerSIGN_EXTEND_VECTOR_INREG(
1003 SDValue Op, SelectionDAG &DAG) const {
1004 SDLoc DL(Op);
1005 SDValue Src = Op.getOperand(0);
1006 MVT SrcVT = Src.getSimpleValueType();
1007 MVT DstVT = Op.getSimpleValueType();
1008
1009 if (!SrcVT.is128BitVector())
1010 return SDValue();
1011
1012 // lower to VSLTI + VILVL if extend could be done in single step.
1013 if (DstVT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits() == 2) {
1014 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1015 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Src, Zero,
1016 DAG.getCondCode(ISD::SETLT));
1017 SDValue LoInterleaved =
1018 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Src);
1019
1020 return DAG.getBitcast(DstVT, LoInterleaved);
1021 }
1022
1023 return SDValue();
1024}
1025
1026// Lower vecreduce_add using vhaddw instructions.
1027// For Example:
1028// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
1029// can be lowered to:
1030// VHADDW_D_W vr0, vr0, vr0
1031// VHADDW_Q_D vr0, vr0, vr0
1032// VPICKVE2GR_D a0, vr0, 0
1033// ADDI_W a0, a0, 0
1034SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
1035 SelectionDAG &DAG) const {
1036
1037 SDLoc DL(Op);
1038 MVT OpVT = Op.getSimpleValueType();
1039 SDValue Val = Op.getOperand(0);
1040
1041 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1042 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1043 unsigned ResBits = OpVT.getScalarSizeInBits();
1044
1045 unsigned LegalVecSize = 128;
1046 bool isLASX256Vector =
1047 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
1048
1049 // Ensure operand type legal or enable it legal.
1050 while (!isTypeLegal(Val.getSimpleValueType())) {
1051 Val = DAG.WidenVector(Val, DL);
1052 }
1053
1054 // NumEles is designed for iterations count, v4i32 for LSX
1055 // and v8i32 for LASX should have the same count.
1056 if (isLASX256Vector) {
1057 NumEles /= 2;
1058 LegalVecSize = 256;
1059 }
1060
1061 EleBits *= 2;
1062 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
1063 EleBits = std::min(EleBits, 64u);
1064 MVT IntTy = MVT::getIntegerVT(EleBits);
1065 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
1066 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
1067 }
1068
1069 if (isLASX256Vector) {
1070 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
1071 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
1072 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
1073 }
1074
1075 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
1076 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1077 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
1078}
1079
1080// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
1081// For Example:
1082// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1083// can be lowered to:
1084// VBSRL_V vr1, vr0, 8
1085// VMAX_W vr0, vr1, vr0
1086// VBSRL_V vr1, vr0, 4
1087// VMAX_W vr0, vr1, vr0
1088// VPICKVE2GR_W a0, vr0, 0
1089// For 256 bit vector, it is illegal and will be spilt into
1090// two 128 bit vector by default then processed by this.
1091SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1092 SelectionDAG &DAG) const {
1093 SDLoc DL(Op);
1094
1095 MVT OpVT = Op.getSimpleValueType();
1096 SDValue Val = Op.getOperand(0);
1097
1098 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1099 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1100
1101 // Ensure operand type legal or enable it legal.
1102 while (!isTypeLegal(Val.getSimpleValueType())) {
1103 Val = DAG.WidenVector(Val, DL);
1104 }
1105
1106 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
1107 MVT VecTy = Val.getSimpleValueType();
1108 MVT GRLenVT = Subtarget.getGRLenVT();
1109
1110 for (int i = NumEles; i > 1; i /= 2) {
1111 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
1112 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
1113 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
1114 }
1115
1116 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1117 DAG.getConstant(0, DL, GRLenVT));
1118}
1119
1120SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1121 SelectionDAG &DAG) const {
1122 unsigned IsData = Op.getConstantOperandVal(4);
1123
1124 // We don't support non-data prefetch.
1125 // Just preserve the chain.
1126 if (!IsData)
1127 return Op.getOperand(0);
1128
1129 return Op;
1130}
1131
1132SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1133 SelectionDAG &DAG) const {
1134 MVT VT = Op.getSimpleValueType();
1135 assert(VT.isVector() && "Unexpected type");
1136
1137 SDLoc DL(Op);
1138 SDValue R = Op.getOperand(0);
1139 SDValue Amt = Op.getOperand(1);
1140 unsigned Opcode = Op.getOpcode();
1141 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1142
1143 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1144 if (V.getOpcode() != ISD::BUILD_VECTOR)
1145 return false;
1146 if (SDValue SplatValue =
1147 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
1148 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
1149 CstSplatValue = C->getAPIntValue();
1150 return true;
1151 }
1152 }
1153 return false;
1154 };
1155
1156 // Check for constant splat rotation amount.
1157 APInt CstSplatValue;
1158 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1159 bool isROTL = Opcode == ISD::ROTL;
1160
1161 // Check for splat rotate by zero.
1162 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1163 return R;
1164
1165 // LoongArch targets always prefer ISD::ROTR.
1166 if (isROTL) {
1167 SDValue Zero = DAG.getConstant(0, DL, VT);
1168 return DAG.getNode(ISD::ROTR, DL, VT, R,
1169 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1170 }
1171
1172 // Rotate by a immediate.
1173 if (IsCstSplat) {
1174 // ISD::ROTR: Attemp to rotate by a positive immediate.
1175 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1176 if (SDValue Urem =
1177 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1178 return DAG.getNode(Opcode, DL, VT, R, Urem);
1179 }
1180
1181 return Op;
1182}
1183
1184// Return true if Val is equal to (setcc LHS, RHS, CC).
1185// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1186// Otherwise, return std::nullopt.
1187static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1188 ISD::CondCode CC, SDValue Val) {
1189 assert(Val->getOpcode() == ISD::SETCC);
1190 SDValue LHS2 = Val.getOperand(0);
1191 SDValue RHS2 = Val.getOperand(1);
1192 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1193
1194 if (LHS == LHS2 && RHS == RHS2) {
1195 if (CC == CC2)
1196 return true;
1197 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1198 return false;
1199 } else if (LHS == RHS2 && RHS == LHS2) {
1201 if (CC == CC2)
1202 return true;
1203 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1204 return false;
1205 }
1206
1207 return std::nullopt;
1208}
1209
1211 const LoongArchSubtarget &Subtarget) {
1212 SDValue CondV = N->getOperand(0);
1213 SDValue TrueV = N->getOperand(1);
1214 SDValue FalseV = N->getOperand(2);
1215 MVT VT = N->getSimpleValueType(0);
1216 SDLoc DL(N);
1217
1218 // (select c, -1, y) -> -c | y
1219 if (isAllOnesConstant(TrueV)) {
1220 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1221 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1222 }
1223 // (select c, y, -1) -> (c-1) | y
1224 if (isAllOnesConstant(FalseV)) {
1225 SDValue Neg =
1226 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1227 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1228 }
1229
1230 // (select c, 0, y) -> (c-1) & y
1231 if (isNullConstant(TrueV)) {
1232 SDValue Neg =
1233 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1234 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1235 }
1236 // (select c, y, 0) -> -c & y
1237 if (isNullConstant(FalseV)) {
1238 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1239 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1240 }
1241
1242 // select c, ~x, x --> xor -c, x
1243 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1244 const APInt &TrueVal = TrueV->getAsAPIntVal();
1245 const APInt &FalseVal = FalseV->getAsAPIntVal();
1246 if (~TrueVal == FalseVal) {
1247 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1248 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1249 }
1250 }
1251
1252 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1253 // when both truev and falsev are also setcc.
1254 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1255 FalseV.getOpcode() == ISD::SETCC) {
1256 SDValue LHS = CondV.getOperand(0);
1257 SDValue RHS = CondV.getOperand(1);
1258 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1259
1260 // (select x, x, y) -> x | y
1261 // (select !x, x, y) -> x & y
1262 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1263 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1264 DAG.getFreeze(FalseV));
1265 }
1266 // (select x, y, x) -> x & y
1267 // (select !x, y, x) -> x | y
1268 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1269 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1270 DAG.getFreeze(TrueV), FalseV);
1271 }
1272 }
1273
1274 return SDValue();
1275}
1276
1277// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1278// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1279// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1280// being `0` or `-1`. In such cases we can replace `select` with `and`.
1281// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1282// than `c0`?
1283static SDValue
1285 const LoongArchSubtarget &Subtarget) {
1286 unsigned SelOpNo = 0;
1287 SDValue Sel = BO->getOperand(0);
1288 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1289 SelOpNo = 1;
1290 Sel = BO->getOperand(1);
1291 }
1292
1293 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1294 return SDValue();
1295
1296 unsigned ConstSelOpNo = 1;
1297 unsigned OtherSelOpNo = 2;
1298 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1299 ConstSelOpNo = 2;
1300 OtherSelOpNo = 1;
1301 }
1302 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1303 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1304 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1305 return SDValue();
1306
1307 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1308 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1309 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1310 return SDValue();
1311
1312 SDLoc DL(Sel);
1313 EVT VT = BO->getValueType(0);
1314
1315 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1316 if (SelOpNo == 1)
1317 std::swap(NewConstOps[0], NewConstOps[1]);
1318
1319 SDValue NewConstOp =
1320 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1321 if (!NewConstOp)
1322 return SDValue();
1323
1324 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1325 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1326 return SDValue();
1327
1328 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1329 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1330 if (SelOpNo == 1)
1331 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1332 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1333
1334 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1335 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1336 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1337}
1338
1339// Changes the condition code and swaps operands if necessary, so the SetCC
1340// operation matches one of the comparisons supported directly by branches
1341// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1342// compare with 1/-1.
1344 ISD::CondCode &CC, SelectionDAG &DAG) {
1345 // If this is a single bit test that can't be handled by ANDI, shift the
1346 // bit to be tested to the MSB and perform a signed compare with 0.
1347 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1348 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1349 isa<ConstantSDNode>(LHS.getOperand(1))) {
1350 uint64_t Mask = LHS.getConstantOperandVal(1);
1351 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1352 unsigned ShAmt = 0;
1353 if (isPowerOf2_64(Mask)) {
1354 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1355 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1356 } else {
1357 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1358 }
1359
1360 LHS = LHS.getOperand(0);
1361 if (ShAmt != 0)
1362 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1363 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1364 return;
1365 }
1366 }
1367
1368 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1369 int64_t C = RHSC->getSExtValue();
1370 switch (CC) {
1371 default:
1372 break;
1373 case ISD::SETGT:
1374 // Convert X > -1 to X >= 0.
1375 if (C == -1) {
1376 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1377 CC = ISD::SETGE;
1378 return;
1379 }
1380 break;
1381 case ISD::SETLT:
1382 // Convert X < 1 to 0 >= X.
1383 if (C == 1) {
1384 RHS = LHS;
1385 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1386 CC = ISD::SETGE;
1387 return;
1388 }
1389 break;
1390 }
1391 }
1392
1393 switch (CC) {
1394 default:
1395 break;
1396 case ISD::SETGT:
1397 case ISD::SETLE:
1398 case ISD::SETUGT:
1399 case ISD::SETULE:
1401 std::swap(LHS, RHS);
1402 break;
1403 }
1404}
1405
1406SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1407 SelectionDAG &DAG) const {
1408 SDValue CondV = Op.getOperand(0);
1409 SDValue TrueV = Op.getOperand(1);
1410 SDValue FalseV = Op.getOperand(2);
1411 SDLoc DL(Op);
1412 MVT VT = Op.getSimpleValueType();
1413 MVT GRLenVT = Subtarget.getGRLenVT();
1414
1415 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1416 return V;
1417
1418 if (Op.hasOneUse()) {
1419 unsigned UseOpc = Op->user_begin()->getOpcode();
1420 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1421 SDNode *BinOp = *Op->user_begin();
1422 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1423 DAG, Subtarget)) {
1424 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1425 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1426 // may return a constant node and cause crash in lowerSELECT.
1427 if (NewSel.getOpcode() == ISD::SELECT)
1428 return lowerSELECT(NewSel, DAG);
1429 return NewSel;
1430 }
1431 }
1432 }
1433
1434 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1435 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1436 // (select condv, truev, falsev)
1437 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1438 if (CondV.getOpcode() != ISD::SETCC ||
1439 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1440 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1441 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1442
1443 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1444
1445 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1446 }
1447
1448 // If the CondV is the output of a SETCC node which operates on GRLenVT
1449 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1450 // to take advantage of the integer compare+branch instructions. i.e.: (select
1451 // (setcc lhs, rhs, cc), truev, falsev)
1452 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1453 SDValue LHS = CondV.getOperand(0);
1454 SDValue RHS = CondV.getOperand(1);
1455 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1456
1457 // Special case for a select of 2 constants that have a difference of 1.
1458 // Normally this is done by DAGCombine, but if the select is introduced by
1459 // type legalization or op legalization, we miss it. Restricting to SETLT
1460 // case for now because that is what signed saturating add/sub need.
1461 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1462 // but we would probably want to swap the true/false values if the condition
1463 // is SETGE/SETLE to avoid an XORI.
1464 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1465 CCVal == ISD::SETLT) {
1466 const APInt &TrueVal = TrueV->getAsAPIntVal();
1467 const APInt &FalseVal = FalseV->getAsAPIntVal();
1468 if (TrueVal - 1 == FalseVal)
1469 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1470 if (TrueVal + 1 == FalseVal)
1471 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1472 }
1473
1474 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1475 // 1 < x ? x : 1 -> 0 < x ? x : 1
1476 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1477 RHS == TrueV && LHS == FalseV) {
1478 LHS = DAG.getConstant(0, DL, VT);
1479 // 0 <u x is the same as x != 0.
1480 if (CCVal == ISD::SETULT) {
1481 std::swap(LHS, RHS);
1482 CCVal = ISD::SETNE;
1483 }
1484 }
1485
1486 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1487 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1488 RHS == FalseV) {
1489 RHS = DAG.getConstant(0, DL, VT);
1490 }
1491
1492 SDValue TargetCC = DAG.getCondCode(CCVal);
1493
1494 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1495 // (select (setcc lhs, rhs, CC), constant, falsev)
1496 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1497 std::swap(TrueV, FalseV);
1498 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1499 }
1500
1501 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1502 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1503}
1504
1505SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1506 SelectionDAG &DAG) const {
1507 SDValue CondV = Op.getOperand(1);
1508 SDLoc DL(Op);
1509 MVT GRLenVT = Subtarget.getGRLenVT();
1510
1511 if (CondV.getOpcode() == ISD::SETCC) {
1512 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1513 SDValue LHS = CondV.getOperand(0);
1514 SDValue RHS = CondV.getOperand(1);
1515 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1516
1517 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1518
1519 SDValue TargetCC = DAG.getCondCode(CCVal);
1520 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1521 Op.getOperand(0), LHS, RHS, TargetCC,
1522 Op.getOperand(2));
1523 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1524 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1525 Op.getOperand(0), CondV, Op.getOperand(2));
1526 }
1527 }
1528
1529 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1530 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1531 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1532}
1533
1534SDValue
1535LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1536 SelectionDAG &DAG) const {
1537 SDLoc DL(Op);
1538 MVT OpVT = Op.getSimpleValueType();
1539
1540 SDValue Vector = DAG.getUNDEF(OpVT);
1541 SDValue Val = Op.getOperand(0);
1542 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1543
1544 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1545}
1546
1547SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1548 SelectionDAG &DAG) const {
1549 EVT ResTy = Op->getValueType(0);
1550 SDValue Src = Op->getOperand(0);
1551 SDLoc DL(Op);
1552
1553 // LoongArchISD::BITREV_8B is not supported on LA32.
1554 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1555 return SDValue();
1556
1557 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1558 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1559 unsigned int NewEltNum = NewVT.getVectorNumElements();
1560
1561 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1562
1564 for (unsigned int i = 0; i < NewEltNum; i++) {
1565 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1566 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1567 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1568 ? (unsigned)LoongArchISD::BITREV_8B
1569 : (unsigned)ISD::BITREVERSE;
1570 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1571 }
1572 SDValue Res =
1573 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1574
1575 switch (ResTy.getSimpleVT().SimpleTy) {
1576 default:
1577 return SDValue();
1578 case MVT::v16i8:
1579 case MVT::v32i8:
1580 return Res;
1581 case MVT::v8i16:
1582 case MVT::v16i16:
1583 case MVT::v4i32:
1584 case MVT::v8i32: {
1586 for (unsigned int i = 0; i < NewEltNum; i++)
1587 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1588 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1589 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1590 }
1591 }
1592}
1593
1594// Widen element type to get a new mask value (if possible).
1595// For example:
1596// shufflevector <4 x i32> %a, <4 x i32> %b,
1597// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1598// is equivalent to:
1599// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1600// can be lowered to:
1601// VPACKOD_D vr0, vr0, vr1
1603 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1604 unsigned EltBits = VT.getScalarSizeInBits();
1605
1606 if (EltBits > 32 || EltBits == 1)
1607 return SDValue();
1608
1609 SmallVector<int, 8> NewMask;
1610 if (widenShuffleMaskElts(Mask, NewMask)) {
1611 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1612 : MVT::getIntegerVT(EltBits * 2);
1613 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1614 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1615 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1616 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1617 return DAG.getBitcast(
1618 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1619 }
1620 }
1621
1622 return SDValue();
1623}
1624
1625/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1626/// instruction.
1627// The funciton matches elements from one of the input vector shuffled to the
1628// left or right with zeroable elements 'shifted in'. It handles both the
1629// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1630// lane.
1631// Mostly copied from X86.
1632static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1633 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1634 int MaskOffset, const APInt &Zeroable) {
1635 int Size = Mask.size();
1636 unsigned SizeInBits = Size * ScalarSizeInBits;
1637
1638 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1639 for (int i = 0; i < Size; i += Scale)
1640 for (int j = 0; j < Shift; ++j)
1641 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1642 return false;
1643
1644 return true;
1645 };
1646
1647 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1648 int Step = 1) {
1649 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1650 if (!(Mask[i] == -1 || Mask[i] == Low))
1651 return false;
1652 return true;
1653 };
1654
1655 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1656 for (int i = 0; i != Size; i += Scale) {
1657 unsigned Pos = Left ? i + Shift : i;
1658 unsigned Low = Left ? i : i + Shift;
1659 unsigned Len = Scale - Shift;
1660 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1661 return -1;
1662 }
1663
1664 int ShiftEltBits = ScalarSizeInBits * Scale;
1665 bool ByteShift = ShiftEltBits > 64;
1666 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1667 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1668 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1669
1670 // Normalize the scale for byte shifts to still produce an i64 element
1671 // type.
1672 Scale = ByteShift ? Scale / 2 : Scale;
1673
1674 // We need to round trip through the appropriate type for the shift.
1675 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1676 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1677 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1678 return (int)ShiftAmt;
1679 };
1680
1681 unsigned MaxWidth = 128;
1682 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1683 for (int Shift = 1; Shift != Scale; ++Shift)
1684 for (bool Left : {true, false})
1685 if (CheckZeros(Shift, Scale, Left)) {
1686 int ShiftAmt = MatchShift(Shift, Scale, Left);
1687 if (0 < ShiftAmt)
1688 return ShiftAmt;
1689 }
1690
1691 // no match
1692 return -1;
1693}
1694
1695/// Lower VECTOR_SHUFFLE as shift (if possible).
1696///
1697/// For example:
1698/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1699/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1700/// is lowered to:
1701/// (VBSLL_V $v0, $v0, 4)
1702///
1703/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1704/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1705/// is lowered to:
1706/// (VSLLI_D $v0, $v0, 32)
1708 MVT VT, SDValue V1, SDValue V2,
1709 SelectionDAG &DAG,
1710 const LoongArchSubtarget &Subtarget,
1711 const APInt &Zeroable) {
1712 int Size = Mask.size();
1713 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1714
1715 MVT ShiftVT;
1716 SDValue V = V1;
1717 unsigned Opcode;
1718
1719 // Try to match shuffle against V1 shift.
1720 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1721 Mask, 0, Zeroable);
1722
1723 // If V1 failed, try to match shuffle against V2 shift.
1724 if (ShiftAmt < 0) {
1725 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1726 Mask, Size, Zeroable);
1727 V = V2;
1728 }
1729
1730 if (ShiftAmt < 0)
1731 return SDValue();
1732
1733 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1734 "Illegal integer vector type");
1735 V = DAG.getBitcast(ShiftVT, V);
1736 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1737 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1738 return DAG.getBitcast(VT, V);
1739}
1740
1741/// Determine whether a range fits a regular pattern of values.
1742/// This function accounts for the possibility of jumping over the End iterator.
1743template <typename ValType>
1744static bool
1746 unsigned CheckStride,
1748 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1749 auto &I = Begin;
1750
1751 while (I != End) {
1752 if (*I != -1 && *I != ExpectedIndex)
1753 return false;
1754 ExpectedIndex += ExpectedIndexStride;
1755
1756 // Incrementing past End is undefined behaviour so we must increment one
1757 // step at a time and check for End at each step.
1758 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1759 ; // Empty loop body.
1760 }
1761 return true;
1762}
1763
1764/// Compute whether each element of a shuffle is zeroable.
1765///
1766/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1768 SDValue V2, APInt &KnownUndef,
1769 APInt &KnownZero) {
1770 int Size = Mask.size();
1771 KnownUndef = KnownZero = APInt::getZero(Size);
1772
1774 V2 = peekThroughBitcasts(V2);
1775
1776 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1777 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1778
1779 int VectorSizeInBits = V1.getValueSizeInBits();
1780 int ScalarSizeInBits = VectorSizeInBits / Size;
1781 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1782 (void)ScalarSizeInBits;
1783
1784 for (int i = 0; i < Size; ++i) {
1785 int M = Mask[i];
1786 if (M < 0) {
1787 KnownUndef.setBit(i);
1788 continue;
1789 }
1790 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1791 KnownZero.setBit(i);
1792 continue;
1793 }
1794 }
1795}
1796
1797/// Test whether a shuffle mask is equivalent within each sub-lane.
1798///
1799/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1800/// non-trivial to compute in the face of undef lanes. The representation is
1801/// suitable for use with existing 128-bit shuffles as entries from the second
1802/// vector have been remapped to [LaneSize, 2*LaneSize).
1803static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1804 ArrayRef<int> Mask,
1805 SmallVectorImpl<int> &RepeatedMask) {
1806 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1807 RepeatedMask.assign(LaneSize, -1);
1808 int Size = Mask.size();
1809 for (int i = 0; i < Size; ++i) {
1810 assert(Mask[i] == -1 || Mask[i] >= 0);
1811 if (Mask[i] < 0)
1812 continue;
1813 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1814 // This entry crosses lanes, so there is no way to model this shuffle.
1815 return false;
1816
1817 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1818 // Adjust second vector indices to start at LaneSize instead of Size.
1819 int LocalM =
1820 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1821 if (RepeatedMask[i % LaneSize] < 0)
1822 // This is the first non-undef entry in this slot of a 128-bit lane.
1823 RepeatedMask[i % LaneSize] = LocalM;
1824 else if (RepeatedMask[i % LaneSize] != LocalM)
1825 // Found a mismatch with the repeated mask.
1826 return false;
1827 }
1828 return true;
1829}
1830
1831/// Attempts to match vector shuffle as byte rotation.
1833 ArrayRef<int> Mask) {
1834
1835 SDValue Lo, Hi;
1836 SmallVector<int, 16> RepeatedMask;
1837
1838 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1839 return -1;
1840
1841 int NumElts = RepeatedMask.size();
1842 int Rotation = 0;
1843 int Scale = 16 / NumElts;
1844
1845 for (int i = 0; i < NumElts; ++i) {
1846 int M = RepeatedMask[i];
1847 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1848 "Unexpected mask index.");
1849 if (M < 0)
1850 continue;
1851
1852 // Determine where a rotated vector would have started.
1853 int StartIdx = i - (M % NumElts);
1854 if (StartIdx == 0)
1855 return -1;
1856
1857 // If we found the tail of a vector the rotation must be the missing
1858 // front. If we found the head of a vector, it must be how much of the
1859 // head.
1860 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1861
1862 if (Rotation == 0)
1863 Rotation = CandidateRotation;
1864 else if (Rotation != CandidateRotation)
1865 return -1;
1866
1867 // Compute which value this mask is pointing at.
1868 SDValue MaskV = M < NumElts ? V1 : V2;
1869
1870 // Compute which of the two target values this index should be assigned
1871 // to. This reflects whether the high elements are remaining or the low
1872 // elements are remaining.
1873 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1874
1875 // Either set up this value if we've not encountered it before, or check
1876 // that it remains consistent.
1877 if (!TargetV)
1878 TargetV = MaskV;
1879 else if (TargetV != MaskV)
1880 return -1;
1881 }
1882
1883 // Check that we successfully analyzed the mask, and normalize the results.
1884 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1885 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1886 if (!Lo)
1887 Lo = Hi;
1888 else if (!Hi)
1889 Hi = Lo;
1890
1891 V1 = Lo;
1892 V2 = Hi;
1893
1894 return Rotation * Scale;
1895}
1896
1897/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1898///
1899/// For example:
1900/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1901/// <2 x i32> <i32 3, i32 0>
1902/// is lowered to:
1903/// (VBSRL_V $v1, $v1, 8)
1904/// (VBSLL_V $v0, $v0, 8)
1905/// (VOR_V $v0, $V0, $v1)
1906static SDValue
1908 SDValue V1, SDValue V2, SelectionDAG &DAG,
1909 const LoongArchSubtarget &Subtarget) {
1910
1911 SDValue Lo = V1, Hi = V2;
1912 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1913 if (ByteRotation <= 0)
1914 return SDValue();
1915
1916 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1917 Lo = DAG.getBitcast(ByteVT, Lo);
1918 Hi = DAG.getBitcast(ByteVT, Hi);
1919
1920 int LoByteShift = 16 - ByteRotation;
1921 int HiByteShift = ByteRotation;
1922 MVT GRLenVT = Subtarget.getGRLenVT();
1923
1924 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1925 DAG.getConstant(LoByteShift, DL, GRLenVT));
1926 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1927 DAG.getConstant(HiByteShift, DL, GRLenVT));
1928 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1929}
1930
1931/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1932///
1933/// For example:
1934/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1935/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1936/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1937/// is lowered to:
1938/// (VREPLI $v1, 0)
1939/// (VILVL $v0, $v1, $v0)
1941 ArrayRef<int> Mask, MVT VT,
1942 SDValue V1, SDValue V2,
1943 SelectionDAG &DAG,
1944 const APInt &Zeroable) {
1945 int Bits = VT.getSizeInBits();
1946 int EltBits = VT.getScalarSizeInBits();
1947 int NumElements = VT.getVectorNumElements();
1948
1949 if (Zeroable.isAllOnes())
1950 return DAG.getConstant(0, DL, VT);
1951
1952 // Define a helper function to check a particular ext-scale and lower to it if
1953 // valid.
1954 auto Lower = [&](int Scale) -> SDValue {
1955 SDValue InputV;
1956 bool AnyExt = true;
1957 int Offset = 0;
1958 for (int i = 0; i < NumElements; i++) {
1959 int M = Mask[i];
1960 if (M < 0)
1961 continue;
1962 if (i % Scale != 0) {
1963 // Each of the extended elements need to be zeroable.
1964 if (!Zeroable[i])
1965 return SDValue();
1966
1967 AnyExt = false;
1968 continue;
1969 }
1970
1971 // Each of the base elements needs to be consecutive indices into the
1972 // same input vector.
1973 SDValue V = M < NumElements ? V1 : V2;
1974 M = M % NumElements;
1975 if (!InputV) {
1976 InputV = V;
1977 Offset = M - (i / Scale);
1978
1979 // These offset can't be handled
1980 if (Offset % (NumElements / Scale))
1981 return SDValue();
1982 } else if (InputV != V)
1983 return SDValue();
1984
1985 if (M != (Offset + (i / Scale)))
1986 return SDValue(); // Non-consecutive strided elements.
1987 }
1988
1989 // If we fail to find an input, we have a zero-shuffle which should always
1990 // have already been handled.
1991 if (!InputV)
1992 return SDValue();
1993
1994 do {
1995 unsigned VilVLoHi = LoongArchISD::VILVL;
1996 if (Offset >= (NumElements / 2)) {
1997 VilVLoHi = LoongArchISD::VILVH;
1998 Offset -= (NumElements / 2);
1999 }
2000
2001 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
2002 SDValue Ext =
2003 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
2004 InputV = DAG.getBitcast(InputVT, InputV);
2005 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
2006 Scale /= 2;
2007 EltBits *= 2;
2008 NumElements /= 2;
2009 } while (Scale > 1);
2010 return DAG.getBitcast(VT, InputV);
2011 };
2012
2013 // Each iteration, try extending the elements half as much, but into twice as
2014 // many elements.
2015 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
2016 NumExtElements *= 2) {
2017 if (SDValue V = Lower(NumElements / NumExtElements))
2018 return V;
2019 }
2020 return SDValue();
2021}
2022
2023/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
2024///
2025/// VREPLVEI performs vector broadcast based on an element specified by an
2026/// integer immediate, with its mask being similar to:
2027/// <x, x, x, ...>
2028/// where x is any valid index.
2029///
2030/// When undef's appear in the mask they are treated as if they were whatever
2031/// value is necessary in order to fit the above form.
2032static SDValue
2034 SDValue V1, SelectionDAG &DAG,
2035 const LoongArchSubtarget &Subtarget) {
2036 int SplatIndex = -1;
2037 for (const auto &M : Mask) {
2038 if (M != -1) {
2039 SplatIndex = M;
2040 break;
2041 }
2042 }
2043
2044 if (SplatIndex == -1)
2045 return DAG.getUNDEF(VT);
2046
2047 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2048 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
2049 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2050 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2051 }
2052
2053 return SDValue();
2054}
2055
2056/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
2057///
2058/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
2059/// elements according to a <4 x i2> constant (encoded as an integer immediate).
2060///
2061/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
2062/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2063/// When undef's appear they are treated as if they were whatever value is
2064/// necessary in order to fit the above forms.
2065///
2066/// For example:
2067/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2068/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2069/// i32 7, i32 6, i32 5, i32 4>
2070/// is lowered to:
2071/// (VSHUF4I_H $v0, $v1, 27)
2072/// where the 27 comes from:
2073/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2074static SDValue
2076 SDValue V1, SDValue V2, SelectionDAG &DAG,
2077 const LoongArchSubtarget &Subtarget) {
2078
2079 unsigned SubVecSize = 4;
2080 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2081 SubVecSize = 2;
2082
2083 int SubMask[4] = {-1, -1, -1, -1};
2084 for (unsigned i = 0; i < SubVecSize; ++i) {
2085 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2086 int M = Mask[j];
2087
2088 // Convert from vector index to 4-element subvector index
2089 // If an index refers to an element outside of the subvector then give up
2090 if (M != -1) {
2091 M -= 4 * (j / SubVecSize);
2092 if (M < 0 || M >= 4)
2093 return SDValue();
2094 }
2095
2096 // If the mask has an undef, replace it with the current index.
2097 // Note that it might still be undef if the current index is also undef
2098 if (SubMask[i] == -1)
2099 SubMask[i] = M;
2100 // Check that non-undef values are the same as in the mask. If they
2101 // aren't then give up
2102 else if (M != -1 && M != SubMask[i])
2103 return SDValue();
2104 }
2105 }
2106
2107 // Calculate the immediate. Replace any remaining undefs with zero
2108 int Imm = 0;
2109 for (int i = SubVecSize - 1; i >= 0; --i) {
2110 int M = SubMask[i];
2111
2112 if (M == -1)
2113 M = 0;
2114
2115 Imm <<= 2;
2116 Imm |= M & 0x3;
2117 }
2118
2119 MVT GRLenVT = Subtarget.getGRLenVT();
2120
2121 // Return vshuf4i.d
2122 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2123 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2124 DAG.getConstant(Imm, DL, GRLenVT));
2125
2126 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
2127 DAG.getConstant(Imm, DL, GRLenVT));
2128}
2129
2130/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2131///
2132/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2133/// reverse whose mask likes:
2134/// <7, 6, 5, 4, 3, 2, 1, 0>
2135///
2136/// When undef's appear in the mask they are treated as if they were whatever
2137/// value is necessary in order to fit the above forms.
2138static SDValue
2140 SDValue V1, SelectionDAG &DAG,
2141 const LoongArchSubtarget &Subtarget) {
2142 // Only vectors with i8/i16 elements which cannot match other patterns
2143 // directly needs to do this.
2144 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2145 VT != MVT::v16i16)
2146 return SDValue();
2147
2148 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
2149 return SDValue();
2150
2151 int WidenNumElts = VT.getVectorNumElements() / 4;
2152 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2153 for (int i = 0; i < WidenNumElts; ++i)
2154 WidenMask[i] = WidenNumElts - 1 - i;
2155
2156 MVT WidenVT = MVT::getVectorVT(
2157 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
2158 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2159 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2160 DAG.getUNDEF(WidenVT), WidenMask);
2161
2162 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2163 DAG.getBitcast(VT, WidenRev),
2164 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2165}
2166
2167/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2168///
2169/// VPACKEV interleaves the even elements from each vector.
2170///
2171/// It is possible to lower into VPACKEV when the mask consists of two of the
2172/// following forms interleaved:
2173/// <0, 2, 4, ...>
2174/// <n, n+2, n+4, ...>
2175/// where n is the number of elements in the vector.
2176/// For example:
2177/// <0, 0, 2, 2, 4, 4, ...>
2178/// <0, n, 2, n+2, 4, n+4, ...>
2179///
2180/// When undef's appear in the mask they are treated as if they were whatever
2181/// value is necessary in order to fit the above forms.
2183 MVT VT, SDValue V1, SDValue V2,
2184 SelectionDAG &DAG) {
2185
2186 const auto &Begin = Mask.begin();
2187 const auto &End = Mask.end();
2188 SDValue OriV1 = V1, OriV2 = V2;
2189
2190 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2191 V1 = OriV1;
2192 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2193 V1 = OriV2;
2194 else
2195 return SDValue();
2196
2197 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2198 V2 = OriV1;
2199 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2200 V2 = OriV2;
2201 else
2202 return SDValue();
2203
2204 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2205}
2206
2207/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2208///
2209/// VPACKOD interleaves the odd elements from each vector.
2210///
2211/// It is possible to lower into VPACKOD when the mask consists of two of the
2212/// following forms interleaved:
2213/// <1, 3, 5, ...>
2214/// <n+1, n+3, n+5, ...>
2215/// where n is the number of elements in the vector.
2216/// For example:
2217/// <1, 1, 3, 3, 5, 5, ...>
2218/// <1, n+1, 3, n+3, 5, n+5, ...>
2219///
2220/// When undef's appear in the mask they are treated as if they were whatever
2221/// value is necessary in order to fit the above forms.
2223 MVT VT, SDValue V1, SDValue V2,
2224 SelectionDAG &DAG) {
2225
2226 const auto &Begin = Mask.begin();
2227 const auto &End = Mask.end();
2228 SDValue OriV1 = V1, OriV2 = V2;
2229
2230 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2231 V1 = OriV1;
2232 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2233 V1 = OriV2;
2234 else
2235 return SDValue();
2236
2237 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2238 V2 = OriV1;
2239 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2240 V2 = OriV2;
2241 else
2242 return SDValue();
2243
2244 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2245}
2246
2247/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2248///
2249/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2250/// of each vector.
2251///
2252/// It is possible to lower into VILVH when the mask consists of two of the
2253/// following forms interleaved:
2254/// <x, x+1, x+2, ...>
2255/// <n+x, n+x+1, n+x+2, ...>
2256/// where n is the number of elements in the vector and x is half n.
2257/// For example:
2258/// <x, x, x+1, x+1, x+2, x+2, ...>
2259/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2260///
2261/// When undef's appear in the mask they are treated as if they were whatever
2262/// value is necessary in order to fit the above forms.
2264 MVT VT, SDValue V1, SDValue V2,
2265 SelectionDAG &DAG) {
2266
2267 const auto &Begin = Mask.begin();
2268 const auto &End = Mask.end();
2269 unsigned HalfSize = Mask.size() / 2;
2270 SDValue OriV1 = V1, OriV2 = V2;
2271
2272 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2273 V1 = OriV1;
2274 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2275 V1 = OriV2;
2276 else
2277 return SDValue();
2278
2279 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2280 V2 = OriV1;
2281 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2282 1))
2283 V2 = OriV2;
2284 else
2285 return SDValue();
2286
2287 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2288}
2289
2290/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2291///
2292/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2293/// of each vector.
2294///
2295/// It is possible to lower into VILVL when the mask consists of two of the
2296/// following forms interleaved:
2297/// <0, 1, 2, ...>
2298/// <n, n+1, n+2, ...>
2299/// where n is the number of elements in the vector.
2300/// For example:
2301/// <0, 0, 1, 1, 2, 2, ...>
2302/// <0, n, 1, n+1, 2, n+2, ...>
2303///
2304/// When undef's appear in the mask they are treated as if they were whatever
2305/// value is necessary in order to fit the above forms.
2307 MVT VT, SDValue V1, SDValue V2,
2308 SelectionDAG &DAG) {
2309
2310 const auto &Begin = Mask.begin();
2311 const auto &End = Mask.end();
2312 SDValue OriV1 = V1, OriV2 = V2;
2313
2314 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2315 V1 = OriV1;
2316 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2317 V1 = OriV2;
2318 else
2319 return SDValue();
2320
2321 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2322 V2 = OriV1;
2323 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2324 V2 = OriV2;
2325 else
2326 return SDValue();
2327
2328 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2329}
2330
2331/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2332///
2333/// VPICKEV copies the even elements of each vector into the result vector.
2334///
2335/// It is possible to lower into VPICKEV when the mask consists of two of the
2336/// following forms concatenated:
2337/// <0, 2, 4, ...>
2338/// <n, n+2, n+4, ...>
2339/// where n is the number of elements in the vector.
2340/// For example:
2341/// <0, 2, 4, ..., 0, 2, 4, ...>
2342/// <0, 2, 4, ..., n, n+2, n+4, ...>
2343///
2344/// When undef's appear in the mask they are treated as if they were whatever
2345/// value is necessary in order to fit the above forms.
2347 MVT VT, SDValue V1, SDValue V2,
2348 SelectionDAG &DAG) {
2349
2350 const auto &Begin = Mask.begin();
2351 const auto &Mid = Mask.begin() + Mask.size() / 2;
2352 const auto &End = Mask.end();
2353 SDValue OriV1 = V1, OriV2 = V2;
2354
2355 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2356 V1 = OriV1;
2357 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2358 V1 = OriV2;
2359 else
2360 return SDValue();
2361
2362 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2363 V2 = OriV1;
2364 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2365 V2 = OriV2;
2366
2367 else
2368 return SDValue();
2369
2370 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2371}
2372
2373/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2374///
2375/// VPICKOD copies the odd elements of each vector into the result vector.
2376///
2377/// It is possible to lower into VPICKOD when the mask consists of two of the
2378/// following forms concatenated:
2379/// <1, 3, 5, ...>
2380/// <n+1, n+3, n+5, ...>
2381/// where n is the number of elements in the vector.
2382/// For example:
2383/// <1, 3, 5, ..., 1, 3, 5, ...>
2384/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2385///
2386/// When undef's appear in the mask they are treated as if they were whatever
2387/// value is necessary in order to fit the above forms.
2389 MVT VT, SDValue V1, SDValue V2,
2390 SelectionDAG &DAG) {
2391
2392 const auto &Begin = Mask.begin();
2393 const auto &Mid = Mask.begin() + Mask.size() / 2;
2394 const auto &End = Mask.end();
2395 SDValue OriV1 = V1, OriV2 = V2;
2396
2397 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2398 V1 = OriV1;
2399 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2400 V1 = OriV2;
2401 else
2402 return SDValue();
2403
2404 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2405 V2 = OriV1;
2406 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2407 V2 = OriV2;
2408 else
2409 return SDValue();
2410
2411 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2412}
2413
2414/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2415///
2416/// VEXTRINS copies one element of a vector into any place of the result
2417/// vector and makes no change to the rest elements of the result vector.
2418///
2419/// It is possible to lower into VEXTRINS when the mask takes the form:
2420/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2421/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2422/// where n is the number of elements in the vector and i is in [0, n).
2423/// For example:
2424/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2425/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2426///
2427/// When undef's appear in the mask they are treated as if they were whatever
2428/// value is necessary in order to fit the above forms.
2429static SDValue
2431 SDValue V1, SDValue V2, SelectionDAG &DAG,
2432 const LoongArchSubtarget &Subtarget) {
2433 unsigned NumElts = VT.getVectorNumElements();
2434 MVT EltVT = VT.getVectorElementType();
2435 MVT GRLenVT = Subtarget.getGRLenVT();
2436
2437 if (Mask.size() != NumElts)
2438 return SDValue();
2439
2440 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2441 int DiffCount = 0;
2442 int DiffPos = -1;
2443 for (unsigned i = 0; i < NumElts; ++i) {
2444 if (Mask[i] == -1)
2445 continue;
2446 if (Mask[i] != int(Base + i)) {
2447 ++DiffCount;
2448 DiffPos = int(i);
2449 if (DiffCount > 1)
2450 return SDValue();
2451 }
2452 }
2453
2454 // Need exactly one differing element to lower into VEXTRINS.
2455 if (DiffCount != 1)
2456 return SDValue();
2457
2458 // DiffMask must be in [0, 2N).
2459 int DiffMask = Mask[DiffPos];
2460 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2461 return SDValue();
2462
2463 // Determine source vector and source index.
2464 SDValue SrcVec;
2465 unsigned SrcIdx;
2466 if (unsigned(DiffMask) < NumElts) {
2467 SrcVec = V1;
2468 SrcIdx = unsigned(DiffMask);
2469 } else {
2470 SrcVec = V2;
2471 SrcIdx = unsigned(DiffMask) - NumElts;
2472 }
2473
2474 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2475 // patterns of VEXTRINS in tablegen.
2476 SDValue Extracted = DAG.getNode(
2477 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2478 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2479 SDValue Result =
2480 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2481 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2482
2483 return Result;
2484 };
2485
2486 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2487 if (SDValue Result = tryLowerToExtrAndIns(0))
2488 return Result;
2489 return tryLowerToExtrAndIns(NumElts);
2490}
2491
2492// Check the Mask and then build SrcVec and MaskImm infos which will
2493// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
2494// On success, return true. Otherwise, return false.
2497 unsigned &MaskImm) {
2498 unsigned MaskSize = Mask.size();
2499
2500 auto isValid = [&](int M, int Off) {
2501 return (M == -1) || (M >= Off && M < Off + 4);
2502 };
2503
2504 auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
2505 auto immPart = [&](int M, unsigned Off) {
2506 return (M == -1 ? 0 : (M - Off)) & 0x3;
2507 };
2508 MaskImm |= immPart(MLo, Off) << (I * 2);
2509 MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
2510 };
2511
2512 for (unsigned i = 0; i < 4; i += 2) {
2513 int MLo = Mask[i];
2514 int MHi = Mask[i + 1];
2515
2516 if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
2517 int M2Lo = Mask[i + 4];
2518 int M2Hi = Mask[i + 5];
2519 if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
2520 return false;
2521 }
2522
2523 if (isValid(MLo, 0) && isValid(MHi, 0)) {
2524 SrcVec.push_back(V1);
2525 buildImm(MLo, MHi, 0, i);
2526 } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
2527 SrcVec.push_back(V2);
2528 buildImm(MLo, MHi, MaskSize, i);
2529 } else {
2530 return false;
2531 }
2532 }
2533
2534 return true;
2535}
2536
2537/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
2538///
2539/// VPERMI selects two elements from each of the two vectors based on the
2540/// mask and places them in the corresponding positions of the result vector
2541/// in order. Only v4i32 and v4f32 types are allowed.
2542///
2543/// It is possible to lower into VPERMI when the mask consists of two of the
2544/// following forms concatenated:
2545/// <i, j, u, v>
2546/// <u, v, i, j>
2547/// where i,j are in [0,4) and u,v are in [4, 8).
2548/// For example:
2549/// <2, 3, 4, 5>
2550/// <5, 7, 0, 2>
2551///
2552/// When undef's appear in the mask they are treated as if they were whatever
2553/// value is necessary in order to fit the above forms.
2555 MVT VT, SDValue V1, SDValue V2,
2556 SelectionDAG &DAG,
2557 const LoongArchSubtarget &Subtarget) {
2558 if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
2559 Mask.size() != VT.getVectorNumElements())
2560 return SDValue();
2561
2563 unsigned MaskImm = 0;
2564 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2565 return SDValue();
2566
2567 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2568 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2569}
2570
2571/// Lower VECTOR_SHUFFLE into VSHUF.
2572///
2573/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2574/// adding it as an operand to the resulting VSHUF.
2576 MVT VT, SDValue V1, SDValue V2,
2577 SelectionDAG &DAG,
2578 const LoongArchSubtarget &Subtarget) {
2579
2581 for (auto M : Mask)
2582 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2583
2584 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2585 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2586
2587 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2588 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2589 // VSHF concatenates the vectors in a bitwise fashion:
2590 // <0b00, 0b01> + <0b10, 0b11> ->
2591 // 0b0100 + 0b1110 -> 0b01001110
2592 // <0b10, 0b11, 0b00, 0b01>
2593 // We must therefore swap the operands to get the correct result.
2594 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2595}
2596
2597/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2598///
2599/// This routine breaks down the specific type of 128-bit shuffle and
2600/// dispatches to the lowering routines accordingly.
2602 SDValue V1, SDValue V2, SelectionDAG &DAG,
2603 const LoongArchSubtarget &Subtarget) {
2604 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2605 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2606 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2607 "Vector type is unsupported for lsx!");
2608 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
2609 "Two operands have different types!");
2610 assert(VT.getVectorNumElements() == Mask.size() &&
2611 "Unexpected mask size for shuffle!");
2612 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2613
2614 APInt KnownUndef, KnownZero;
2615 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2616 APInt Zeroable = KnownUndef | KnownZero;
2617
2618 SDValue Result;
2619 // TODO: Add more comparison patterns.
2620 if (V2.isUndef()) {
2621 if ((Result =
2622 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2623 return Result;
2624 if ((Result =
2625 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2626 return Result;
2627 if ((Result =
2628 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2629 return Result;
2630
2631 // TODO: This comment may be enabled in the future to better match the
2632 // pattern for instruction selection.
2633 /* V2 = V1; */
2634 }
2635
2636 // It is recommended not to change the pattern comparison order for better
2637 // performance.
2638 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2639 return Result;
2640 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2641 return Result;
2642 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2643 return Result;
2644 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2645 return Result;
2646 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2647 return Result;
2648 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2649 return Result;
2650 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2651 (Result =
2652 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2653 return Result;
2654 if ((Result =
2655 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2656 return Result;
2657 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2658 Zeroable)))
2659 return Result;
2660 if ((Result =
2661 lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2662 return Result;
2663 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2664 Zeroable)))
2665 return Result;
2666 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2667 Subtarget)))
2668 return Result;
2669 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2670 return NewShuffle;
2671 if ((Result =
2672 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2673 return Result;
2674 return SDValue();
2675}
2676
2677/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2678///
2679/// It is a XVREPLVEI when the mask is:
2680/// <x, x, x, ..., x+n, x+n, x+n, ...>
2681/// where the number of x is equal to n and n is half the length of vector.
2682///
2683/// When undef's appear in the mask they are treated as if they were whatever
2684/// value is necessary in order to fit the above form.
2685static SDValue
2687 SDValue V1, SelectionDAG &DAG,
2688 const LoongArchSubtarget &Subtarget) {
2689 int SplatIndex = -1;
2690 for (const auto &M : Mask) {
2691 if (M != -1) {
2692 SplatIndex = M;
2693 break;
2694 }
2695 }
2696
2697 if (SplatIndex == -1)
2698 return DAG.getUNDEF(VT);
2699
2700 const auto &Begin = Mask.begin();
2701 const auto &End = Mask.end();
2702 int HalfSize = Mask.size() / 2;
2703
2704 if (SplatIndex >= HalfSize)
2705 return SDValue();
2706
2707 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2708 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2709 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2710 0)) {
2711 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2712 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2713 }
2714
2715 return SDValue();
2716}
2717
2718/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2719static SDValue
2721 SDValue V1, SDValue V2, SelectionDAG &DAG,
2722 const LoongArchSubtarget &Subtarget) {
2723 // XVSHUF4I_D must be handled separately because it is different from other
2724 // types of [X]VSHUF4I instructions.
2725 if (Mask.size() == 4) {
2726 unsigned MaskImm = 0;
2727 for (int i = 1; i >= 0; --i) {
2728 int MLo = Mask[i];
2729 int MHi = Mask[i + 2];
2730 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2731 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2732 return SDValue();
2733 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2734 return SDValue();
2735
2736 MaskImm <<= 2;
2737 if (MLo != -1)
2738 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2739 else if (MHi != -1)
2740 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2741 }
2742
2743 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2744 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2745 }
2746
2747 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2748}
2749
2750/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2751static SDValue
2753 SDValue V1, SDValue V2, SelectionDAG &DAG,
2754 const LoongArchSubtarget &Subtarget) {
2755 MVT GRLenVT = Subtarget.getGRLenVT();
2756 unsigned MaskSize = Mask.size();
2757 if (MaskSize != VT.getVectorNumElements())
2758 return SDValue();
2759
2760 // Consider XVPERMI_W.
2761 if (VT == MVT::v8i32 || VT == MVT::v8f32) {
2763 unsigned MaskImm = 0;
2764 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2765 return SDValue();
2766
2767 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2768 DAG.getConstant(MaskImm, DL, GRLenVT));
2769 }
2770
2771 // Consider XVPERMI_D.
2772 if (VT == MVT::v4i64 || VT == MVT::v4f64) {
2773 unsigned MaskImm = 0;
2774 for (unsigned i = 0; i < MaskSize; ++i) {
2775 if (Mask[i] == -1)
2776 continue;
2777 if (Mask[i] >= (int)MaskSize)
2778 return SDValue();
2779 MaskImm |= Mask[i] << (i * 2);
2780 }
2781
2782 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2783 DAG.getConstant(MaskImm, DL, GRLenVT));
2784 }
2785
2786 return SDValue();
2787}
2788
2789/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2791 MVT VT, SDValue V1, SelectionDAG &DAG,
2792 const LoongArchSubtarget &Subtarget) {
2793 // LoongArch LASX only have XVPERM_W.
2794 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2795 return SDValue();
2796
2797 unsigned NumElts = VT.getVectorNumElements();
2798 unsigned HalfSize = NumElts / 2;
2799 bool FrontLo = true, FrontHi = true;
2800 bool BackLo = true, BackHi = true;
2801
2802 auto inRange = [](int val, int low, int high) {
2803 return (val == -1) || (val >= low && val < high);
2804 };
2805
2806 for (unsigned i = 0; i < HalfSize; ++i) {
2807 int Fronti = Mask[i];
2808 int Backi = Mask[i + HalfSize];
2809
2810 FrontLo &= inRange(Fronti, 0, HalfSize);
2811 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2812 BackLo &= inRange(Backi, 0, HalfSize);
2813 BackHi &= inRange(Backi, HalfSize, NumElts);
2814 }
2815
2816 // If both the lower and upper 128-bit parts access only one half of the
2817 // vector (either lower or upper), avoid using xvperm.w. The latency of
2818 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2819 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2820 return SDValue();
2821
2823 MVT GRLenVT = Subtarget.getGRLenVT();
2824 for (unsigned i = 0; i < NumElts; ++i)
2825 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2826 : DAG.getConstant(Mask[i], DL, GRLenVT));
2827 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2828
2829 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2830}
2831
2832/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2834 MVT VT, SDValue V1, SDValue V2,
2835 SelectionDAG &DAG) {
2836 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2837}
2838
2839/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2841 MVT VT, SDValue V1, SDValue V2,
2842 SelectionDAG &DAG) {
2843 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2844}
2845
2846/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2848 MVT VT, SDValue V1, SDValue V2,
2849 SelectionDAG &DAG) {
2850
2851 const auto &Begin = Mask.begin();
2852 const auto &End = Mask.end();
2853 unsigned HalfSize = Mask.size() / 2;
2854 unsigned LeftSize = HalfSize / 2;
2855 SDValue OriV1 = V1, OriV2 = V2;
2856
2857 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2858 1) &&
2859 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2860 V1 = OriV1;
2861 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2862 Mask.size() + HalfSize - LeftSize, 1) &&
2863 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2864 Mask.size() + HalfSize + LeftSize, 1))
2865 V1 = OriV2;
2866 else
2867 return SDValue();
2868
2869 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2870 1) &&
2871 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2872 1))
2873 V2 = OriV1;
2874 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2875 Mask.size() + HalfSize - LeftSize, 1) &&
2876 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2877 Mask.size() + HalfSize + LeftSize, 1))
2878 V2 = OriV2;
2879 else
2880 return SDValue();
2881
2882 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2883}
2884
2885/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2887 MVT VT, SDValue V1, SDValue V2,
2888 SelectionDAG &DAG) {
2889
2890 const auto &Begin = Mask.begin();
2891 const auto &End = Mask.end();
2892 unsigned HalfSize = Mask.size() / 2;
2893 SDValue OriV1 = V1, OriV2 = V2;
2894
2895 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2896 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2897 V1 = OriV1;
2898 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2899 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2900 Mask.size() + HalfSize, 1))
2901 V1 = OriV2;
2902 else
2903 return SDValue();
2904
2905 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2906 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2907 V2 = OriV1;
2908 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2909 1) &&
2910 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2911 Mask.size() + HalfSize, 1))
2912 V2 = OriV2;
2913 else
2914 return SDValue();
2915
2916 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2917}
2918
2919/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2921 MVT VT, SDValue V1, SDValue V2,
2922 SelectionDAG &DAG) {
2923
2924 const auto &Begin = Mask.begin();
2925 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2926 const auto &Mid = Mask.begin() + Mask.size() / 2;
2927 const auto &RightMid = Mask.end() - Mask.size() / 4;
2928 const auto &End = Mask.end();
2929 unsigned HalfSize = Mask.size() / 2;
2930 SDValue OriV1 = V1, OriV2 = V2;
2931
2932 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2933 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2934 V1 = OriV1;
2935 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2936 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2937 V1 = OriV2;
2938 else
2939 return SDValue();
2940
2941 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2942 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2943 V2 = OriV1;
2944 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2945 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2946 V2 = OriV2;
2947
2948 else
2949 return SDValue();
2950
2951 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2952}
2953
2954/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2956 MVT VT, SDValue V1, SDValue V2,
2957 SelectionDAG &DAG) {
2958
2959 const auto &Begin = Mask.begin();
2960 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2961 const auto &Mid = Mask.begin() + Mask.size() / 2;
2962 const auto &RightMid = Mask.end() - Mask.size() / 4;
2963 const auto &End = Mask.end();
2964 unsigned HalfSize = Mask.size() / 2;
2965 SDValue OriV1 = V1, OriV2 = V2;
2966
2967 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2968 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2969 V1 = OriV1;
2970 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2971 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2972 2))
2973 V1 = OriV2;
2974 else
2975 return SDValue();
2976
2977 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2978 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2979 V2 = OriV1;
2980 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2981 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2982 2))
2983 V2 = OriV2;
2984 else
2985 return SDValue();
2986
2987 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2988}
2989
2990/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2991static SDValue
2993 SDValue V1, SDValue V2, SelectionDAG &DAG,
2994 const LoongArchSubtarget &Subtarget) {
2995 int NumElts = VT.getVectorNumElements();
2996 int HalfSize = NumElts / 2;
2997 MVT EltVT = VT.getVectorElementType();
2998 MVT GRLenVT = Subtarget.getGRLenVT();
2999
3000 if ((int)Mask.size() != NumElts)
3001 return SDValue();
3002
3003 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
3004 SmallVector<int> DiffPos;
3005 for (int i = 0; i < NumElts; ++i) {
3006 if (Mask[i] == -1)
3007 continue;
3008 if (Mask[i] != Base + i) {
3009 DiffPos.push_back(i);
3010 if (DiffPos.size() > 2)
3011 return SDValue();
3012 }
3013 }
3014
3015 // Need exactly two differing element to lower into XVEXTRINS.
3016 // If only one differing element, the element at a distance of
3017 // HalfSize from it must be undef.
3018 if (DiffPos.size() == 1) {
3019 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
3020 DiffPos.push_back(DiffPos[0] + HalfSize);
3021 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
3022 DiffPos.insert(DiffPos.begin(), DiffPos[0] - HalfSize);
3023 else
3024 return SDValue();
3025 }
3026 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
3027 return SDValue();
3028
3029 // DiffMask must be in its low or high part.
3030 int DiffMaskLo = Mask[DiffPos[0]];
3031 int DiffMaskHi = Mask[DiffPos[1]];
3032 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
3033 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
3034 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
3035 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
3036 return SDValue();
3037 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
3038 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
3039 return SDValue();
3040 if (DiffMaskHi != DiffMaskLo + HalfSize)
3041 return SDValue();
3042
3043 // Determine source vector and source index.
3044 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
3045 int SrcIdxLo =
3046 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
3047 bool IsEltFP = EltVT.isFloatingPoint();
3048
3049 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
3050 // the patterns of XVEXTRINS in tablegen.
3051 SDValue BaseVec = (Base == 0) ? V1 : V2;
3052 SDValue EltLo =
3053 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3054 SrcVec, DAG.getConstant(SrcIdxLo, DL, GRLenVT));
3055 SDValue InsLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, BaseVec, EltLo,
3056 DAG.getConstant(DiffPos[0], DL, GRLenVT));
3057 SDValue EltHi =
3058 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3059 SrcVec, DAG.getConstant(SrcIdxLo + HalfSize, DL, GRLenVT));
3060 SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsLo, EltHi,
3061 DAG.getConstant(DiffPos[1], DL, GRLenVT));
3062
3063 return Result;
3064 };
3065
3066 // Try [0, n-1) insertion then [n, 2n-1) insertion.
3067 if (SDValue Result = tryLowerToExtrAndIns(0))
3068 return Result;
3069 return tryLowerToExtrAndIns(NumElts);
3070}
3071
3072/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
3073static SDValue
3075 SDValue V1, SDValue V2, SelectionDAG &DAG,
3076 const LoongArchSubtarget &Subtarget) {
3077 // LoongArch LASX only supports xvinsve0.{w/d}.
3078 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
3079 VT != MVT::v4f64)
3080 return SDValue();
3081
3082 MVT GRLenVT = Subtarget.getGRLenVT();
3083 int MaskSize = Mask.size();
3084 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
3085
3086 // Check if exactly one element of the Mask is replaced by 'Replaced', while
3087 // all other elements are either 'Base + i' or undef (-1). On success, return
3088 // the index of the replaced element. Otherwise, just return -1.
3089 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
3090 int Idx = -1;
3091 for (int i = 0; i < MaskSize; ++i) {
3092 if (Mask[i] == Base + i || Mask[i] == -1)
3093 continue;
3094 if (Mask[i] != Replaced)
3095 return -1;
3096 if (Idx == -1)
3097 Idx = i;
3098 else
3099 return -1;
3100 }
3101 return Idx;
3102 };
3103
3104 // Case 1: the lowest element of V2 replaces one element in V1.
3105 int Idx = checkReplaceOne(0, MaskSize);
3106 if (Idx != -1)
3107 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
3108 DAG.getConstant(Idx, DL, GRLenVT));
3109
3110 // Case 2: the lowest element of V1 replaces one element in V2.
3111 Idx = checkReplaceOne(MaskSize, 0);
3112 if (Idx != -1)
3113 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
3114 DAG.getConstant(Idx, DL, GRLenVT));
3115
3116 return SDValue();
3117}
3118
3119/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
3121 MVT VT, SDValue V1, SDValue V2,
3122 SelectionDAG &DAG) {
3123
3124 int MaskSize = Mask.size();
3125 int HalfSize = Mask.size() / 2;
3126 const auto &Begin = Mask.begin();
3127 const auto &Mid = Mask.begin() + HalfSize;
3128 const auto &End = Mask.end();
3129
3130 // VECTOR_SHUFFLE concatenates the vectors:
3131 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
3132 // shuffling ->
3133 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
3134 //
3135 // XVSHUF concatenates the vectors:
3136 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
3137 // shuffling ->
3138 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
3139 SmallVector<SDValue, 8> MaskAlloc;
3140 for (auto it = Begin; it < Mid; it++) {
3141 if (*it < 0) // UNDEF
3142 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3143 else if ((*it >= 0 && *it < HalfSize) ||
3144 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
3145 int M = *it < HalfSize ? *it : *it - HalfSize;
3146 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3147 } else
3148 return SDValue();
3149 }
3150 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
3151
3152 for (auto it = Mid; it < End; it++) {
3153 if (*it < 0) // UNDEF
3154 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3155 else if ((*it >= HalfSize && *it < MaskSize) ||
3156 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
3157 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
3158 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3159 } else
3160 return SDValue();
3161 }
3162 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
3163
3164 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
3165 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
3166 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
3167}
3168
3169/// Shuffle vectors by lane to generate more optimized instructions.
3170/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
3171///
3172/// Therefore, except for the following four cases, other cases are regarded
3173/// as cross-lane shuffles, where optimization is relatively limited.
3174///
3175/// - Shuffle high, low lanes of two inputs vector
3176/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
3177/// - Shuffle low, high lanes of two inputs vector
3178/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
3179/// - Shuffle low, low lanes of two inputs vector
3180/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
3181/// - Shuffle high, high lanes of two inputs vector
3182/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
3183///
3184/// The first case is the closest to LoongArch instructions and the other
3185/// cases need to be converted to it for processing.
3186///
3187/// This function will return true for the last three cases above and will
3188/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3189/// cross-lane shuffle cases.
3191 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3192 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3193
3194 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3195
3196 int MaskSize = Mask.size();
3197 int HalfSize = Mask.size() / 2;
3198 MVT GRLenVT = Subtarget.getGRLenVT();
3199
3200 HalfMaskType preMask = None, postMask = None;
3201
3202 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3203 return M < 0 || (M >= 0 && M < HalfSize) ||
3204 (M >= MaskSize && M < MaskSize + HalfSize);
3205 }))
3206 preMask = HighLaneTy;
3207 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3208 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3209 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3210 }))
3211 preMask = LowLaneTy;
3212
3213 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3214 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3215 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3216 }))
3217 postMask = LowLaneTy;
3218 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3219 return M < 0 || (M >= 0 && M < HalfSize) ||
3220 (M >= MaskSize && M < MaskSize + HalfSize);
3221 }))
3222 postMask = HighLaneTy;
3223
3224 // The pre-half of mask is high lane type, and the post-half of mask
3225 // is low lane type, which is closest to the LoongArch instructions.
3226 //
3227 // Note: In the LoongArch architecture, the high lane of mask corresponds
3228 // to the lower 128-bit of vector register, and the low lane of mask
3229 // corresponds the higher 128-bit of vector register.
3230 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3231 return false;
3232 }
3233 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3234 V1 = DAG.getBitcast(MVT::v4i64, V1);
3235 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3236 DAG.getConstant(0b01001110, DL, GRLenVT));
3237 V1 = DAG.getBitcast(VT, V1);
3238
3239 if (!V2.isUndef()) {
3240 V2 = DAG.getBitcast(MVT::v4i64, V2);
3241 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3242 DAG.getConstant(0b01001110, DL, GRLenVT));
3243 V2 = DAG.getBitcast(VT, V2);
3244 }
3245
3246 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3247 *it = *it < 0 ? *it : *it - HalfSize;
3248 }
3249 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3250 *it = *it < 0 ? *it : *it + HalfSize;
3251 }
3252 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3253 V1 = DAG.getBitcast(MVT::v4i64, V1);
3254 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3255 DAG.getConstant(0b11101110, DL, GRLenVT));
3256 V1 = DAG.getBitcast(VT, V1);
3257
3258 if (!V2.isUndef()) {
3259 V2 = DAG.getBitcast(MVT::v4i64, V2);
3260 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3261 DAG.getConstant(0b11101110, DL, GRLenVT));
3262 V2 = DAG.getBitcast(VT, V2);
3263 }
3264
3265 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3266 *it = *it < 0 ? *it : *it - HalfSize;
3267 }
3268 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3269 V1 = DAG.getBitcast(MVT::v4i64, V1);
3270 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3271 DAG.getConstant(0b01000100, DL, GRLenVT));
3272 V1 = DAG.getBitcast(VT, V1);
3273
3274 if (!V2.isUndef()) {
3275 V2 = DAG.getBitcast(MVT::v4i64, V2);
3276 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3277 DAG.getConstant(0b01000100, DL, GRLenVT));
3278 V2 = DAG.getBitcast(VT, V2);
3279 }
3280
3281 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3282 *it = *it < 0 ? *it : *it + HalfSize;
3283 }
3284 } else { // cross-lane
3285 return false;
3286 }
3287
3288 return true;
3289}
3290
3291/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3292/// Only for 256-bit vector.
3293///
3294/// For example:
3295/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3296/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3297/// is lowerded to:
3298/// (XVPERMI $xr2, $xr0, 78)
3299/// (XVSHUF $xr1, $xr2, $xr0)
3300/// (XVORI $xr0, $xr1, 0)
3302 ArrayRef<int> Mask,
3303 MVT VT, SDValue V1,
3304 SDValue V2,
3305 SelectionDAG &DAG) {
3306 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3307 int Size = Mask.size();
3308 int LaneSize = Size / 2;
3309
3310 bool LaneCrossing[2] = {false, false};
3311 for (int i = 0; i < Size; ++i)
3312 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3313 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3314
3315 // Ensure that all lanes ared involved.
3316 if (!LaneCrossing[0] && !LaneCrossing[1])
3317 return SDValue();
3318
3319 SmallVector<int> InLaneMask;
3320 InLaneMask.assign(Mask.begin(), Mask.end());
3321 for (int i = 0; i < Size; ++i) {
3322 int &M = InLaneMask[i];
3323 if (M < 0)
3324 continue;
3325 if (((M % Size) / LaneSize) != (i / LaneSize))
3326 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3327 }
3328
3329 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
3330 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
3331 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
3332 Flipped = DAG.getBitcast(VT, Flipped);
3333 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
3334}
3335
3336/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3337///
3338/// This routine breaks down the specific type of 256-bit shuffle and
3339/// dispatches to the lowering routines accordingly.
3341 SDValue V1, SDValue V2, SelectionDAG &DAG,
3342 const LoongArchSubtarget &Subtarget) {
3343 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3344 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3345 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3346 "Vector type is unsupported for lasx!");
3347 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
3348 "Two operands have different types!");
3349 assert(VT.getVectorNumElements() == Mask.size() &&
3350 "Unexpected mask size for shuffle!");
3351 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3352 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3353
3354 APInt KnownUndef, KnownZero;
3355 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3356 APInt Zeroable = KnownUndef | KnownZero;
3357
3358 SDValue Result;
3359 // TODO: Add more comparison patterns.
3360 if (V2.isUndef()) {
3361 if ((Result =
3362 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3363 return Result;
3364 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3365 Subtarget)))
3366 return Result;
3367 // Try to widen vectors to gain more optimization opportunities.
3368 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3369 return NewShuffle;
3370 if ((Result =
3371 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3372 return Result;
3373 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3374 return Result;
3375 if ((Result =
3376 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3377 return Result;
3378
3379 // TODO: This comment may be enabled in the future to better match the
3380 // pattern for instruction selection.
3381 /* V2 = V1; */
3382 }
3383
3384 // It is recommended not to change the pattern comparison order for better
3385 // performance.
3386 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3387 return Result;
3388 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3389 return Result;
3390 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3391 return Result;
3392 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3393 return Result;
3394 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3395 return Result;
3396 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3397 return Result;
3398 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3399 (Result =
3400 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3401 return Result;
3402 if ((Result =
3403 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3404 return Result;
3405 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3406 Zeroable)))
3407 return Result;
3408 if ((Result =
3409 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3410 return Result;
3411 if ((Result =
3412 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3413 return Result;
3414 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3415 Subtarget)))
3416 return Result;
3417
3418 // canonicalize non cross-lane shuffle vector
3419 SmallVector<int> NewMask(Mask);
3420 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3421 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3422
3423 // FIXME: Handling the remaining cases earlier can degrade performance
3424 // in some situations. Further analysis is required to enable more
3425 // effective optimizations.
3426 if (V2.isUndef()) {
3427 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3428 V1, V2, DAG)))
3429 return Result;
3430 }
3431
3432 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3433 return NewShuffle;
3434 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3435 return Result;
3436
3437 return SDValue();
3438}
3439
3440SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3441 SelectionDAG &DAG) const {
3442 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3443 ArrayRef<int> OrigMask = SVOp->getMask();
3444 SDValue V1 = Op.getOperand(0);
3445 SDValue V2 = Op.getOperand(1);
3446 MVT VT = Op.getSimpleValueType();
3447 int NumElements = VT.getVectorNumElements();
3448 SDLoc DL(Op);
3449
3450 bool V1IsUndef = V1.isUndef();
3451 bool V2IsUndef = V2.isUndef();
3452 if (V1IsUndef && V2IsUndef)
3453 return DAG.getUNDEF(VT);
3454
3455 // When we create a shuffle node we put the UNDEF node to second operand,
3456 // but in some cases the first operand may be transformed to UNDEF.
3457 // In this case we should just commute the node.
3458 if (V1IsUndef)
3459 return DAG.getCommutedVectorShuffle(*SVOp);
3460
3461 // Check for non-undef masks pointing at an undef vector and make the masks
3462 // undef as well. This makes it easier to match the shuffle based solely on
3463 // the mask.
3464 if (V2IsUndef &&
3465 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3466 SmallVector<int, 8> NewMask(OrigMask);
3467 for (int &M : NewMask)
3468 if (M >= NumElements)
3469 M = -1;
3470 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3471 }
3472
3473 // Check for illegal shuffle mask element index values.
3474 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3475 (void)MaskUpperLimit;
3476 assert(llvm::all_of(OrigMask,
3477 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3478 "Out of bounds shuffle index");
3479
3480 // For each vector width, delegate to a specialized lowering routine.
3481 if (VT.is128BitVector())
3482 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3483
3484 if (VT.is256BitVector())
3485 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3486
3487 return SDValue();
3488}
3489
3490SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3491 SelectionDAG &DAG) const {
3492 // Custom lower to ensure the libcall return is passed in an FPR on hard
3493 // float ABIs.
3494 SDLoc DL(Op);
3495 MakeLibCallOptions CallOptions;
3496 SDValue Op0 = Op.getOperand(0);
3497 SDValue Chain = SDValue();
3498 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3499 SDValue Res;
3500 std::tie(Res, Chain) =
3501 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3502 if (Subtarget.is64Bit())
3503 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3504 return DAG.getBitcast(MVT::i32, Res);
3505}
3506
3507SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3508 SelectionDAG &DAG) const {
3509 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3510 // float ABIs.
3511 SDLoc DL(Op);
3512 MakeLibCallOptions CallOptions;
3513 SDValue Op0 = Op.getOperand(0);
3514 SDValue Chain = SDValue();
3515 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3516 DL, MVT::f32, Op0)
3517 : DAG.getBitcast(MVT::f32, Op0);
3518 SDValue Res;
3519 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3520 CallOptions, DL, Chain);
3521 return Res;
3522}
3523
3524SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3525 SelectionDAG &DAG) const {
3526 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3527 SDLoc DL(Op);
3528 MakeLibCallOptions CallOptions;
3529 RTLIB::Libcall LC =
3530 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3531 SDValue Res =
3532 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3533 if (Subtarget.is64Bit())
3534 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3535 return DAG.getBitcast(MVT::i32, Res);
3536}
3537
3538SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3539 SelectionDAG &DAG) const {
3540 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3541 MVT VT = Op.getSimpleValueType();
3542 SDLoc DL(Op);
3543 Op = DAG.getNode(
3544 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3545 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3546 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3547 DL, MVT::f32, Op)
3548 : DAG.getBitcast(MVT::f32, Op);
3549 if (VT != MVT::f32)
3550 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3551 return Res;
3552}
3553
3554// Lower BUILD_VECTOR as broadcast load (if possible).
3555// For example:
3556// %a = load i8, ptr %ptr
3557// %b = build_vector %a, %a, %a, %a
3558// is lowered to :
3559// (VLDREPL_B $a0, 0)
3561 const SDLoc &DL,
3562 SelectionDAG &DAG) {
3563 MVT VT = BVOp->getSimpleValueType(0);
3564 int NumOps = BVOp->getNumOperands();
3565
3566 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3567 "Unsupported vector type for broadcast.");
3568
3569 SDValue IdentitySrc;
3570 bool IsIdeneity = true;
3571
3572 for (int i = 0; i != NumOps; i++) {
3573 SDValue Op = BVOp->getOperand(i);
3574 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3575 IsIdeneity = false;
3576 break;
3577 }
3578 IdentitySrc = BVOp->getOperand(0);
3579 }
3580
3581 // make sure that this load is valid and only has one user.
3582 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3583 return SDValue();
3584
3585 auto *LN = cast<LoadSDNode>(IdentitySrc);
3586 auto ExtType = LN->getExtensionType();
3587
3588 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3589 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3590 // Indexed loads and stores are not supported on LoongArch.
3591 assert(LN->isUnindexed() && "Unexpected indexed load.");
3592
3593 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3594 // The offset operand of unindexed load is always undefined, so there is
3595 // no need to pass it to VLDREPL.
3596 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3597 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3598 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3599 return BCast;
3600 }
3601 return SDValue();
3602}
3603
3604// Sequentially insert elements from Ops into Vector, from low to high indices.
3605// Note: Ops can have fewer elements than Vector.
3607 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3608 EVT ResTy) {
3609 assert(Ops.size() <= ResTy.getVectorNumElements());
3610
3611 SDValue Op0 = Ops[0];
3612 if (!Op0.isUndef())
3613 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3614 for (unsigned i = 1; i < Ops.size(); ++i) {
3615 SDValue Opi = Ops[i];
3616 if (Opi.isUndef())
3617 continue;
3618 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3619 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3620 }
3621}
3622
3623// Build a ResTy subvector from Node, taking NumElts elements starting at index
3624// 'first'.
3626 SelectionDAG &DAG, SDLoc DL,
3627 const LoongArchSubtarget &Subtarget,
3628 EVT ResTy, unsigned first) {
3629 unsigned NumElts = ResTy.getVectorNumElements();
3630
3631 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3632
3633 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3634 Node->op_begin() + first + NumElts);
3635 SDValue Vector = DAG.getUNDEF(ResTy);
3636 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3637 return Vector;
3638}
3639
3640SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3641 SelectionDAG &DAG) const {
3642 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3643 MVT VT = Node->getSimpleValueType(0);
3644 EVT ResTy = Op->getValueType(0);
3645 unsigned NumElts = ResTy.getVectorNumElements();
3646 SDLoc DL(Op);
3647 APInt SplatValue, SplatUndef;
3648 unsigned SplatBitSize;
3649 bool HasAnyUndefs;
3650 bool IsConstant = false;
3651 bool UseSameConstant = true;
3652 SDValue ConstantValue;
3653 bool Is128Vec = ResTy.is128BitVector();
3654 bool Is256Vec = ResTy.is256BitVector();
3655
3656 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3657 (!Subtarget.hasExtLASX() || !Is256Vec))
3658 return SDValue();
3659
3660 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3661 return Result;
3662
3663 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3664 /*MinSplatBits=*/8) &&
3665 SplatBitSize <= 64) {
3666 // We can only cope with 8, 16, 32, or 64-bit elements.
3667 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3668 SplatBitSize != 64)
3669 return SDValue();
3670
3671 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3672 // We can only handle 64-bit elements that are within
3673 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3674 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3675 if (!SplatValue.isSignedIntN(10) &&
3676 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3677 return SDValue();
3678 if ((Is128Vec && ResTy == MVT::v4i32) ||
3679 (Is256Vec && ResTy == MVT::v8i32))
3680 return Op;
3681 }
3682
3683 EVT ViaVecTy;
3684
3685 switch (SplatBitSize) {
3686 default:
3687 return SDValue();
3688 case 8:
3689 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3690 break;
3691 case 16:
3692 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3693 break;
3694 case 32:
3695 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3696 break;
3697 case 64:
3698 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3699 break;
3700 }
3701
3702 // SelectionDAG::getConstant will promote SplatValue appropriately.
3703 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3704
3705 // Bitcast to the type we originally wanted.
3706 if (ViaVecTy != ResTy)
3707 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3708
3709 return Result;
3710 }
3711
3712 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3713 return Op;
3714
3715 for (unsigned i = 0; i < NumElts; ++i) {
3716 SDValue Opi = Node->getOperand(i);
3717 if (isIntOrFPConstant(Opi)) {
3718 IsConstant = true;
3719 if (!ConstantValue.getNode())
3720 ConstantValue = Opi;
3721 else if (ConstantValue != Opi)
3722 UseSameConstant = false;
3723 }
3724 }
3725
3726 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3727 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3728 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3729 for (unsigned i = 0; i < NumElts; ++i) {
3730 SDValue Opi = Node->getOperand(i);
3731 if (!isIntOrFPConstant(Opi))
3732 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3733 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3734 }
3735 return Result;
3736 }
3737
3738 if (!IsConstant) {
3739 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3740 // the sub-sequence of the vector and then broadcast the sub-sequence.
3741 //
3742 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3743 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3744 // generates worse code in some cases. This could be further optimized
3745 // with more consideration.
3747 BitVector UndefElements;
3748 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3749 UndefElements.count() == 0) {
3750 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3751 // because the high part can be simply treated as undef.
3752 SDValue Vector = DAG.getUNDEF(ResTy);
3753 EVT FillTy = Is256Vec
3755 : ResTy;
3756 SDValue FillVec =
3757 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3758
3759 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3760
3761 unsigned SeqLen = Sequence.size();
3762 unsigned SplatLen = NumElts / SeqLen;
3763 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3764 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3765
3766 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3767 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3768 if (SplatEltTy == MVT::i128)
3769 SplatTy = MVT::v4i64;
3770
3771 SDValue SplatVec;
3772 SDValue SrcVec = DAG.getBitcast(
3773 SplatTy,
3774 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3775 if (Is256Vec) {
3776 SplatVec =
3777 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3778 : LoongArchISD::XVREPLVE0,
3779 DL, SplatTy, SrcVec);
3780 } else {
3781 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3782 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3783 }
3784
3785 return DAG.getBitcast(ResTy, SplatVec);
3786 }
3787
3788 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3789 // using memory operations is much lower.
3790 //
3791 // For 256-bit vectors, normally split into two halves and concatenate.
3792 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3793 // one non-undef element, skip spliting to avoid a worse result.
3794 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3795 ResTy == MVT::v4f64) {
3796 unsigned NonUndefCount = 0;
3797 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3798 if (!Node->getOperand(i).isUndef()) {
3799 ++NonUndefCount;
3800 if (NonUndefCount > 1)
3801 break;
3802 }
3803 }
3804 if (NonUndefCount == 1)
3805 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3806 }
3807
3808 EVT VecTy =
3809 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3810 SDValue Vector =
3811 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3812
3813 if (Is128Vec)
3814 return Vector;
3815
3816 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3817 VecTy, NumElts / 2);
3818
3819 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3820 }
3821
3822 return SDValue();
3823}
3824
3825SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3826 SelectionDAG &DAG) const {
3827 SDLoc DL(Op);
3828 MVT ResVT = Op.getSimpleValueType();
3829 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3830
3831 if (Op.getOperand(0).getOpcode() == ISD::TRUNCATE &&
3832 Op.getOperand(1).getOpcode() == ISD::TRUNCATE)
3833 return Op;
3834
3835 unsigned NumOperands = Op.getNumOperands();
3836 unsigned NumFreezeUndef = 0;
3837 unsigned NumZero = 0;
3838 unsigned NumNonZero = 0;
3839 unsigned NonZeros = 0;
3840 SmallSet<SDValue, 4> Undefs;
3841 for (unsigned i = 0; i != NumOperands; ++i) {
3842 SDValue SubVec = Op.getOperand(i);
3843 if (SubVec.isUndef())
3844 continue;
3845 if (ISD::isFreezeUndef(SubVec.getNode())) {
3846 // If the freeze(undef) has multiple uses then we must fold to zero.
3847 if (SubVec.hasOneUse()) {
3848 ++NumFreezeUndef;
3849 } else {
3850 ++NumZero;
3851 Undefs.insert(SubVec);
3852 }
3853 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3854 ++NumZero;
3855 else {
3856 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3857 NonZeros |= 1 << i;
3858 ++NumNonZero;
3859 }
3860 }
3861
3862 // If we have more than 2 non-zeros, build each half separately.
3863 if (NumNonZero > 2) {
3864 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3865 ArrayRef<SDUse> Ops = Op->ops();
3866 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3867 Ops.slice(0, NumOperands / 2));
3868 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3869 Ops.slice(NumOperands / 2));
3870 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3871 }
3872
3873 // Otherwise, build it up through insert_subvectors.
3874 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3875 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3876 : DAG.getUNDEF(ResVT));
3877
3878 // Replace Undef operands with ZeroVector.
3879 for (SDValue U : Undefs)
3880 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3881
3882 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3883 unsigned NumSubElems = SubVT.getVectorNumElements();
3884 for (unsigned i = 0; i != NumOperands; ++i) {
3885 if ((NonZeros & (1 << i)) == 0)
3886 continue;
3887
3888 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3889 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3890 }
3891
3892 return Vec;
3893}
3894
3895SDValue
3896LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3897 SelectionDAG &DAG) const {
3898 MVT EltVT = Op.getSimpleValueType();
3899 SDValue Vec = Op->getOperand(0);
3900 EVT VecTy = Vec->getValueType(0);
3901 SDValue Idx = Op->getOperand(1);
3902 SDLoc DL(Op);
3903 MVT GRLenVT = Subtarget.getGRLenVT();
3904
3905 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3906
3907 if (isa<ConstantSDNode>(Idx))
3908 return Op;
3909
3910 switch (VecTy.getSimpleVT().SimpleTy) {
3911 default:
3912 llvm_unreachable("Unexpected type");
3913 case MVT::v32i8:
3914 case MVT::v16i16:
3915 case MVT::v4i64:
3916 case MVT::v4f64: {
3917 // Extract the high half subvector and place it to the low half of a new
3918 // vector. It doesn't matter what the high half of the new vector is.
3919 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3920 SDValue VecHi =
3921 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3922 SDValue TmpVec =
3923 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3924 VecHi, DAG.getConstant(0, DL, GRLenVT));
3925
3926 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3927 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3928 // desired element.
3929 SDValue IdxCp =
3930 Subtarget.is64Bit()
3931 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3932 : DAG.getBitcast(MVT::f32, Idx);
3933 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3934 SDValue MaskVec =
3935 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3936 SDValue ResVec =
3937 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3938
3939 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3940 DAG.getConstant(0, DL, GRLenVT));
3941 }
3942 case MVT::v8i32:
3943 case MVT::v8f32: {
3944 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3945 SDValue SplatValue =
3946 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3947
3948 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3949 DAG.getConstant(0, DL, GRLenVT));
3950 }
3951 }
3952}
3953
3954SDValue
3955LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3956 SelectionDAG &DAG) const {
3957 MVT VT = Op.getSimpleValueType();
3958 MVT EltVT = VT.getVectorElementType();
3959 unsigned NumElts = VT.getVectorNumElements();
3960 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3961 SDLoc DL(Op);
3962 SDValue Op0 = Op.getOperand(0);
3963 SDValue Op1 = Op.getOperand(1);
3964 SDValue Op2 = Op.getOperand(2);
3965
3966 if (isa<ConstantSDNode>(Op2))
3967 return Op;
3968
3969 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3970 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3971
3972 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3973 return SDValue();
3974
3975 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3976 SmallVector<SDValue, 32> RawIndices;
3977 SDValue SplatIdx;
3978 SDValue Indices;
3979
3980 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3981 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3982 for (unsigned i = 0; i < NumElts; ++i) {
3983 RawIndices.push_back(Op2);
3984 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3985 }
3986 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3987 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3988
3989 RawIndices.clear();
3990 for (unsigned i = 0; i < NumElts; ++i) {
3991 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3992 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3993 }
3994 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3995 Indices = DAG.getBitcast(IdxVTy, Indices);
3996 } else {
3997 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3998
3999 for (unsigned i = 0; i < NumElts; ++i)
4000 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
4001 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
4002 }
4003
4004 // insert vec, elt, idx
4005 // =>
4006 // select (splatidx == {0,1,2...}) ? splatelt : vec
4007 SDValue SelectCC =
4008 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
4009 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
4010}
4011
4012SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4013 SelectionDAG &DAG) const {
4014 SDLoc DL(Op);
4015 SyncScope::ID FenceSSID =
4016 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4017
4018 // singlethread fences only synchronize with signal handlers on the same
4019 // thread and thus only need to preserve instruction order, not actually
4020 // enforce memory ordering.
4021 if (FenceSSID == SyncScope::SingleThread)
4022 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4023 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4024
4025 return Op;
4026}
4027
4028SDValue LoongArchTargetLowering::lowerSET_ROUNDING(SDValue Op,
4029 SelectionDAG &DAG) const {
4030 MVT GRLenVT = Subtarget.getGRLenVT();
4031 SDLoc DL(Op);
4032 SDValue Chain = Op.getOperand(0);
4033 SDValue RMValue = Op.getOperand(1);
4034
4035 if (auto *CVal = dyn_cast<ConstantSDNode>(RMValue)) {
4036 uint64_t RM = CVal->getZExtValue();
4037 if (RM > 3) {
4038 MachineFunction &MF = DAG.getMachineFunction();
4039 LLVMContext &C = MF.getFunction().getContext();
4040 C.diagnose(DiagnosticInfoUnsupported(
4041 MF.getFunction(),
4042 "rounding mode is not supported by LoongArch hardware",
4043 DiagnosticLocation(DL.getDebugLoc()), DS_Error));
4044 return Chain;
4045 }
4046 }
4047
4048 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, GRLenVT, RMValue);
4049
4050 // LLVM rounding mode encoding differs from LoongArch FCSR encoding:
4051 // LLVM: 0=RTZ, 1=RNE, 2=RUP, 3=RDN
4052 // FCSR: 0=RNE, 1=RZ, 2=RP, 3=RN
4053 //
4054 // Need to convert argument into bits of control word:
4055 // 0 Round to 0 -> 01
4056 // 1 Round to nearest -> 00
4057 // 2 Round to +inf -> 10
4058 // 3 Round to -inf -> 11
4059 //
4060 // Transformation: RM ^ (~(RM >> 1) & 1)
4061 SDValue ShiftRight1 = DAG.getNode(ISD::SRL, DL, GRLenVT, RMValue,
4062 DAG.getConstant(1, DL, GRLenVT));
4063 SDValue SwapMask = DAG.getNode(ISD::AND, DL, GRLenVT,
4064 DAG.getNode(ISD::XOR, DL, GRLenVT, ShiftRight1,
4065 DAG.getConstant(1, DL, GRLenVT)),
4066 DAG.getConstant(1, DL, GRLenVT));
4067 RMValue = DAG.getNode(ISD::XOR, DL, GRLenVT, RMValue, SwapMask);
4068
4069 RMValue = DAG.getNode(ISD::AND, DL, GRLenVT, RMValue,
4070 DAG.getConstant(0x3, DL, GRLenVT));
4071
4072 // The RM field in FCSR is at bits [9:8]. Shift the rounding mode value
4073 // into position before writing via WRFCSR.
4074 RMValue = DAG.getNode(ISD::SHL, DL, GRLenVT, RMValue,
4075 DAG.getConstant(8, DL, GRLenVT));
4076
4077 // FCSR3 is an alias of the RM field; writing it avoids clobbering
4078 // unrelated fields in FCSR0.
4079 SDValue FCSRNo = DAG.getTargetConstant(3, DL, GRLenVT);
4080 MachineSDNode *RN = DAG.getMachineNode(LoongArch::WRFCSR, DL, MVT::Other,
4081 FCSRNo, RMValue, Chain);
4082 return SDValue(RN, 0);
4083}
4084
4085SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
4086 SelectionDAG &DAG) const {
4087
4088 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
4089 DAG.getContext()->emitError(
4090 "On LA64, only 64-bit registers can be written.");
4091 return Op.getOperand(0);
4092 }
4093
4094 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
4095 DAG.getContext()->emitError(
4096 "On LA32, only 32-bit registers can be written.");
4097 return Op.getOperand(0);
4098 }
4099
4100 return Op;
4101}
4102
4103SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
4104 SelectionDAG &DAG) const {
4105 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
4106 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
4107 "be a constant integer");
4108 return SDValue();
4109 }
4110
4111 MachineFunction &MF = DAG.getMachineFunction();
4113 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
4114 EVT VT = Op.getValueType();
4115 SDLoc DL(Op);
4116 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
4117 unsigned Depth = Op.getConstantOperandVal(0);
4118 int GRLenInBytes = Subtarget.getGRLen() / 8;
4119
4120 while (Depth--) {
4121 int Offset = -(GRLenInBytes * 2);
4122 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
4123 DAG.getSignedConstant(Offset, DL, VT));
4124 FrameAddr =
4125 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
4126 }
4127 return FrameAddr;
4128}
4129
4130SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
4131 SelectionDAG &DAG) const {
4132 // Currently only support lowering return address for current frame.
4133 if (Op.getConstantOperandVal(0) != 0) {
4134 DAG.getContext()->emitError(
4135 "return address can only be determined for the current frame");
4136 return SDValue();
4137 }
4138
4139 MachineFunction &MF = DAG.getMachineFunction();
4141 MVT GRLenVT = Subtarget.getGRLenVT();
4142
4143 // Return the value of the return address register, marking it an implicit
4144 // live-in.
4145 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
4146 getRegClassFor(GRLenVT));
4147 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
4148}
4149
4150SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
4151 SelectionDAG &DAG) const {
4152 MachineFunction &MF = DAG.getMachineFunction();
4153 auto Size = Subtarget.getGRLen() / 8;
4154 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
4155 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4156}
4157
4158SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
4159 SelectionDAG &DAG) const {
4160 MachineFunction &MF = DAG.getMachineFunction();
4161 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
4162
4163 SDLoc DL(Op);
4164 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4166
4167 // vastart just stores the address of the VarArgsFrameIndex slot into the
4168 // memory location argument.
4169 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4170 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
4171 MachinePointerInfo(SV));
4172}
4173
4174SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
4175 SelectionDAG &DAG) const {
4176 SDLoc DL(Op);
4177 SDValue Op0 = Op.getOperand(0);
4178 EVT VT = Op.getValueType();
4179 EVT Op0VT = Op0.getValueType();
4180
4181 if ((DAG.SignBitIsZero(Op0) || Op->getFlags().hasNonNeg()) &&
4184 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, Op0);
4185
4186 // We can't do uint64 -> double -> float because of double-rounding issue.
4187 if (Subtarget.hasExtLSX() && Op0VT == MVT::i64 && VT == MVT::f64) {
4188 Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2i64, Op0);
4189 SDValue Conv = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::v2f64, Op0);
4190 Conv = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, Conv,
4191 DAG.getIntPtrConstant(0, DL));
4192 return Conv;
4193 }
4194
4195 if (!Subtarget.is64Bit() || !Subtarget.hasBasicF() || Subtarget.hasBasicD())
4196 return SDValue();
4197
4198 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4199 !Subtarget.hasBasicD() && "unexpected target features");
4200
4201 if (Op0->getOpcode() == ISD::AND) {
4202 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
4203 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
4204 return Op;
4205 }
4206
4207 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
4208 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
4209 Op0.getConstantOperandVal(2) == UINT64_C(0))
4210 return Op;
4211
4212 if (Op0.getOpcode() == ISD::AssertZext &&
4213 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
4214 return Op;
4215
4216 EVT OpVT = Op0.getValueType();
4217 EVT RetVT = Op.getValueType();
4218 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
4219 MakeLibCallOptions CallOptions;
4220 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4221 SDValue Chain = SDValue();
4223 std::tie(Result, Chain) =
4224 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4225 return Result;
4226}
4227
4228SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
4229 SelectionDAG &DAG) const {
4230 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4231 !Subtarget.hasBasicD() && "unexpected target features");
4232
4233 SDLoc DL(Op);
4234 SDValue Op0 = Op.getOperand(0);
4235
4236 if ((Op0.getOpcode() == ISD::AssertSext ||
4238 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
4239 return Op;
4240
4241 EVT OpVT = Op0.getValueType();
4242 EVT RetVT = Op.getValueType();
4243 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
4244 MakeLibCallOptions CallOptions;
4245 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4246 SDValue Chain = SDValue();
4248 std::tie(Result, Chain) =
4249 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4250 return Result;
4251}
4252
4253SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
4254 SelectionDAG &DAG) const {
4255
4256 SDLoc DL(Op);
4257 EVT VT = Op.getValueType();
4258 SDValue Op0 = Op.getOperand(0);
4259 EVT Op0VT = Op0.getValueType();
4260
4261 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
4262 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
4263 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4264 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
4265 }
4266 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
4267 SDValue Lo, Hi;
4268 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4269 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
4270 }
4271 return Op;
4272}
4273
4274SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4275 SelectionDAG &DAG) const {
4276
4277 SDLoc DL(Op);
4278 SDValue Op0 = Op.getOperand(0);
4279
4280 if (Op0.getValueType() == MVT::f16)
4281 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
4282
4283 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4284 !Subtarget.hasBasicD()) {
4285 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
4286 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
4287 }
4288
4289 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
4290 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
4291 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
4292}
4293
4294SDValue LoongArchTargetLowering::lowerFP_TO_UINT(SDValue Op,
4295 SelectionDAG &DAG) const {
4296 if (!Subtarget.hasExtLSX())
4297 return SDValue();
4298
4299 SDLoc DL(Op);
4300 SDValue Src = Op.getOperand(0);
4301 EVT VT = Op.getValueType();
4302 EVT SrcVT = Src.getValueType();
4303
4304 if (VT != MVT::i64)
4305 return SDValue();
4306
4307 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
4308 return SDValue();
4309
4310 if (SrcVT == MVT::f32)
4311 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Src);
4312 Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, Src);
4313 SDValue Conv = DAG.getNode(ISD::FP_TO_UINT, DL, MVT::v2i64, Src);
4314 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Conv,
4315 DAG.getIntPtrConstant(0, DL));
4316}
4317
4319 SelectionDAG &DAG, unsigned Flags) {
4320 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4321}
4322
4324 SelectionDAG &DAG, unsigned Flags) {
4325 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4326 Flags);
4327}
4328
4330 SelectionDAG &DAG, unsigned Flags) {
4331 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4332 N->getOffset(), Flags);
4333}
4334
4336 SelectionDAG &DAG, unsigned Flags) {
4337 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4338}
4339
4340template <class NodeTy>
4341SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4343 bool IsLocal) const {
4344 SDLoc DL(N);
4345 EVT Ty = getPointerTy(DAG.getDataLayout());
4346 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4347 SDValue Load;
4348
4349 switch (M) {
4350 default:
4351 report_fatal_error("Unsupported code model");
4352
4353 case CodeModel::Large: {
4354 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4355
4356 // This is not actually used, but is necessary for successfully matching
4357 // the PseudoLA_*_LARGE nodes.
4358 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4359 if (IsLocal) {
4360 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4361 // eventually becomes the desired 5-insn code sequence.
4362 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
4363 Tmp, Addr),
4364 0);
4365 } else {
4366 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4367 // eventually becomes the desired 5-insn code sequence.
4368 Load = SDValue(
4369 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
4370 0);
4371 }
4372 break;
4373 }
4374
4375 case CodeModel::Small:
4376 case CodeModel::Medium:
4377 if (IsLocal) {
4378 // This generates the pattern (PseudoLA_PCREL sym), which
4379 //
4380 // for la32r expands to:
4381 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4382 //
4383 // for la32s and la64 expands to:
4384 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4385 Load = SDValue(
4386 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
4387 } else {
4388 // This generates the pattern (PseudoLA_GOT sym), which
4389 //
4390 // for la32r expands to:
4391 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4392 //
4393 // for la32s and la64 expands to:
4394 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4395 Load =
4396 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
4397 }
4398 }
4399
4400 if (!IsLocal) {
4401 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4402 MachineFunction &MF = DAG.getMachineFunction();
4403 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4407 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4408 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
4409 }
4410
4411 return Load;
4412}
4413
4414SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4415 SelectionDAG &DAG) const {
4416 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
4417 DAG.getTarget().getCodeModel());
4418}
4419
4420SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4421 SelectionDAG &DAG) const {
4422 return getAddr(cast<JumpTableSDNode>(Op), DAG,
4423 DAG.getTarget().getCodeModel());
4424}
4425
4426SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4427 SelectionDAG &DAG) const {
4428 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
4429 DAG.getTarget().getCodeModel());
4430}
4431
4432SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4433 SelectionDAG &DAG) const {
4434 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4435 assert(N->getOffset() == 0 && "unexpected offset in global node");
4436 auto CM = DAG.getTarget().getCodeModel();
4437 const GlobalValue *GV = N->getGlobal();
4438
4439 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
4440 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
4441 CM = *GCM;
4442 }
4443
4444 return getAddr(N, DAG, CM, GV->isDSOLocal());
4445}
4446
4447SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4448 SelectionDAG &DAG,
4449 unsigned Opc, bool UseGOT,
4450 bool Large) const {
4451 SDLoc DL(N);
4452 EVT Ty = getPointerTy(DAG.getDataLayout());
4453 MVT GRLenVT = Subtarget.getGRLenVT();
4454
4455 // This is not actually used, but is necessary for successfully matching the
4456 // PseudoLA_*_LARGE nodes.
4457 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4458 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4459
4460 // Only IE needs an extra argument for large code model.
4461 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4462 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4463 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4464
4465 // If it is LE for normal/medium code model, the add tp operation will occur
4466 // during the pseudo-instruction expansion.
4467 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4468 return Offset;
4469
4470 if (UseGOT) {
4471 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4472 MachineFunction &MF = DAG.getMachineFunction();
4473 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4477 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4478 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4479 }
4480
4481 // Add the thread pointer.
4482 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4483 DAG.getRegister(LoongArch::R2, GRLenVT));
4484}
4485
4486SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4487 SelectionDAG &DAG,
4488 unsigned Opc,
4489 bool Large) const {
4490 SDLoc DL(N);
4491 EVT Ty = getPointerTy(DAG.getDataLayout());
4492 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4493
4494 // This is not actually used, but is necessary for successfully matching the
4495 // PseudoLA_*_LARGE nodes.
4496 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4497
4498 // Use a PC-relative addressing mode to access the dynamic GOT address.
4499 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4500 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4501 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4502
4503 // Prepare argument list to generate call.
4505 Args.emplace_back(Load, CallTy);
4506
4507 // Setup call to __tls_get_addr.
4508 TargetLowering::CallLoweringInfo CLI(DAG);
4509 CLI.setDebugLoc(DL)
4510 .setChain(DAG.getEntryNode())
4511 .setLibCallee(CallingConv::C, CallTy,
4512 DAG.getExternalSymbol("__tls_get_addr", Ty),
4513 std::move(Args));
4514
4515 return LowerCallTo(CLI).first;
4516}
4517
4518SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4519 SelectionDAG &DAG, unsigned Opc,
4520 bool Large) const {
4521 SDLoc DL(N);
4522 EVT Ty = getPointerTy(DAG.getDataLayout());
4523 const GlobalValue *GV = N->getGlobal();
4524
4525 // This is not actually used, but is necessary for successfully matching the
4526 // PseudoLA_*_LARGE nodes.
4527 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4528
4529 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4530 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4531 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4532 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4533 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4534}
4535
4536SDValue
4537LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4538 SelectionDAG &DAG) const {
4541 report_fatal_error("In GHC calling convention TLS is not supported");
4542
4543 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4544 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4545
4546 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4547 assert(N->getOffset() == 0 && "unexpected offset in global node");
4548
4549 if (DAG.getTarget().useEmulatedTLS())
4550 reportFatalUsageError("the emulated TLS is prohibited");
4551
4552 bool IsDesc = DAG.getTarget().useTLSDESC();
4553
4554 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4556 // In this model, application code calls the dynamic linker function
4557 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4558 // runtime.
4559 if (!IsDesc)
4560 return getDynamicTLSAddr(N, DAG,
4561 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4562 : LoongArch::PseudoLA_TLS_GD,
4563 Large);
4564 break;
4566 // Same as GeneralDynamic, except for assembly modifiers and relocation
4567 // records.
4568 if (!IsDesc)
4569 return getDynamicTLSAddr(N, DAG,
4570 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4571 : LoongArch::PseudoLA_TLS_LD,
4572 Large);
4573 break;
4575 // This model uses the GOT to resolve TLS offsets.
4576 return getStaticTLSAddr(N, DAG,
4577 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4578 : LoongArch::PseudoLA_TLS_IE,
4579 /*UseGOT=*/true, Large);
4581 // This model is used when static linking as the TLS offsets are resolved
4582 // during program linking.
4583 //
4584 // This node doesn't need an extra argument for the large code model.
4585 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4586 /*UseGOT=*/false, Large);
4587 }
4588
4589 return getTLSDescAddr(N, DAG,
4590 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4591 : LoongArch::PseudoLA_TLS_DESC,
4592 Large);
4593}
4594
4595template <unsigned N>
4597 SelectionDAG &DAG, bool IsSigned = false) {
4598 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4599 // Check the ImmArg.
4600 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4601 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4602 DAG.getContext()->emitError(Op->getOperationName(0) +
4603 ": argument out of range.");
4604 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4605 }
4606 return SDValue();
4607}
4608
4609SDValue
4610LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4611 SelectionDAG &DAG) const {
4612 switch (Op.getConstantOperandVal(0)) {
4613 default:
4614 return SDValue(); // Don't custom lower most intrinsics.
4615 case Intrinsic::thread_pointer: {
4616 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4617 return DAG.getRegister(LoongArch::R2, PtrVT);
4618 }
4619 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4620 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4621 case Intrinsic::loongarch_lsx_vreplvei_d:
4622 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4623 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4624 case Intrinsic::loongarch_lsx_vreplvei_w:
4625 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4626 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4627 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4628 case Intrinsic::loongarch_lasx_xvpickve_d:
4629 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4630 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4631 case Intrinsic::loongarch_lasx_xvinsve0_d:
4632 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4633 case Intrinsic::loongarch_lsx_vsat_b:
4634 case Intrinsic::loongarch_lsx_vsat_bu:
4635 case Intrinsic::loongarch_lsx_vrotri_b:
4636 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4637 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4638 case Intrinsic::loongarch_lsx_vsrlri_b:
4639 case Intrinsic::loongarch_lsx_vsrari_b:
4640 case Intrinsic::loongarch_lsx_vreplvei_h:
4641 case Intrinsic::loongarch_lasx_xvsat_b:
4642 case Intrinsic::loongarch_lasx_xvsat_bu:
4643 case Intrinsic::loongarch_lasx_xvrotri_b:
4644 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4645 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4646 case Intrinsic::loongarch_lasx_xvsrlri_b:
4647 case Intrinsic::loongarch_lasx_xvsrari_b:
4648 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4649 case Intrinsic::loongarch_lasx_xvpickve_w:
4650 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4651 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4652 case Intrinsic::loongarch_lasx_xvinsve0_w:
4653 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4654 case Intrinsic::loongarch_lsx_vsat_h:
4655 case Intrinsic::loongarch_lsx_vsat_hu:
4656 case Intrinsic::loongarch_lsx_vrotri_h:
4657 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4658 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4659 case Intrinsic::loongarch_lsx_vsrlri_h:
4660 case Intrinsic::loongarch_lsx_vsrari_h:
4661 case Intrinsic::loongarch_lsx_vreplvei_b:
4662 case Intrinsic::loongarch_lasx_xvsat_h:
4663 case Intrinsic::loongarch_lasx_xvsat_hu:
4664 case Intrinsic::loongarch_lasx_xvrotri_h:
4665 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4666 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4667 case Intrinsic::loongarch_lasx_xvsrlri_h:
4668 case Intrinsic::loongarch_lasx_xvsrari_h:
4669 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4670 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4671 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4672 case Intrinsic::loongarch_lsx_vsrani_b_h:
4673 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4674 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4675 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4676 case Intrinsic::loongarch_lsx_vssrani_b_h:
4677 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4678 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4679 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4680 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4681 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4682 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4683 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4684 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4685 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4686 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4687 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4688 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4689 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4690 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4691 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4692 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4693 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4694 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4695 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4696 case Intrinsic::loongarch_lsx_vsat_w:
4697 case Intrinsic::loongarch_lsx_vsat_wu:
4698 case Intrinsic::loongarch_lsx_vrotri_w:
4699 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4700 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4701 case Intrinsic::loongarch_lsx_vsrlri_w:
4702 case Intrinsic::loongarch_lsx_vsrari_w:
4703 case Intrinsic::loongarch_lsx_vslei_bu:
4704 case Intrinsic::loongarch_lsx_vslei_hu:
4705 case Intrinsic::loongarch_lsx_vslei_wu:
4706 case Intrinsic::loongarch_lsx_vslei_du:
4707 case Intrinsic::loongarch_lsx_vslti_bu:
4708 case Intrinsic::loongarch_lsx_vslti_hu:
4709 case Intrinsic::loongarch_lsx_vslti_wu:
4710 case Intrinsic::loongarch_lsx_vslti_du:
4711 case Intrinsic::loongarch_lsx_vbsll_v:
4712 case Intrinsic::loongarch_lsx_vbsrl_v:
4713 case Intrinsic::loongarch_lasx_xvsat_w:
4714 case Intrinsic::loongarch_lasx_xvsat_wu:
4715 case Intrinsic::loongarch_lasx_xvrotri_w:
4716 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4717 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4718 case Intrinsic::loongarch_lasx_xvsrlri_w:
4719 case Intrinsic::loongarch_lasx_xvsrari_w:
4720 case Intrinsic::loongarch_lasx_xvslei_bu:
4721 case Intrinsic::loongarch_lasx_xvslei_hu:
4722 case Intrinsic::loongarch_lasx_xvslei_wu:
4723 case Intrinsic::loongarch_lasx_xvslei_du:
4724 case Intrinsic::loongarch_lasx_xvslti_bu:
4725 case Intrinsic::loongarch_lasx_xvslti_hu:
4726 case Intrinsic::loongarch_lasx_xvslti_wu:
4727 case Intrinsic::loongarch_lasx_xvslti_du:
4728 case Intrinsic::loongarch_lasx_xvbsll_v:
4729 case Intrinsic::loongarch_lasx_xvbsrl_v:
4730 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4731 case Intrinsic::loongarch_lsx_vseqi_b:
4732 case Intrinsic::loongarch_lsx_vseqi_h:
4733 case Intrinsic::loongarch_lsx_vseqi_w:
4734 case Intrinsic::loongarch_lsx_vseqi_d:
4735 case Intrinsic::loongarch_lsx_vslei_b:
4736 case Intrinsic::loongarch_lsx_vslei_h:
4737 case Intrinsic::loongarch_lsx_vslei_w:
4738 case Intrinsic::loongarch_lsx_vslei_d:
4739 case Intrinsic::loongarch_lsx_vslti_b:
4740 case Intrinsic::loongarch_lsx_vslti_h:
4741 case Intrinsic::loongarch_lsx_vslti_w:
4742 case Intrinsic::loongarch_lsx_vslti_d:
4743 case Intrinsic::loongarch_lasx_xvseqi_b:
4744 case Intrinsic::loongarch_lasx_xvseqi_h:
4745 case Intrinsic::loongarch_lasx_xvseqi_w:
4746 case Intrinsic::loongarch_lasx_xvseqi_d:
4747 case Intrinsic::loongarch_lasx_xvslei_b:
4748 case Intrinsic::loongarch_lasx_xvslei_h:
4749 case Intrinsic::loongarch_lasx_xvslei_w:
4750 case Intrinsic::loongarch_lasx_xvslei_d:
4751 case Intrinsic::loongarch_lasx_xvslti_b:
4752 case Intrinsic::loongarch_lasx_xvslti_h:
4753 case Intrinsic::loongarch_lasx_xvslti_w:
4754 case Intrinsic::loongarch_lasx_xvslti_d:
4755 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4756 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4757 case Intrinsic::loongarch_lsx_vsrani_h_w:
4758 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4759 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4760 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4761 case Intrinsic::loongarch_lsx_vssrani_h_w:
4762 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4763 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4764 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4765 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4766 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4767 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4768 case Intrinsic::loongarch_lsx_vfrstpi_b:
4769 case Intrinsic::loongarch_lsx_vfrstpi_h:
4770 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4771 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4772 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4773 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4774 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4775 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4776 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4777 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4778 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4779 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4780 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4781 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4782 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4783 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4784 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4785 case Intrinsic::loongarch_lsx_vsat_d:
4786 case Intrinsic::loongarch_lsx_vsat_du:
4787 case Intrinsic::loongarch_lsx_vrotri_d:
4788 case Intrinsic::loongarch_lsx_vsrlri_d:
4789 case Intrinsic::loongarch_lsx_vsrari_d:
4790 case Intrinsic::loongarch_lasx_xvsat_d:
4791 case Intrinsic::loongarch_lasx_xvsat_du:
4792 case Intrinsic::loongarch_lasx_xvrotri_d:
4793 case Intrinsic::loongarch_lasx_xvsrlri_d:
4794 case Intrinsic::loongarch_lasx_xvsrari_d:
4795 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4796 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4797 case Intrinsic::loongarch_lsx_vsrani_w_d:
4798 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4799 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4800 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4801 case Intrinsic::loongarch_lsx_vssrani_w_d:
4802 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4803 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4804 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4805 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4806 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4807 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4808 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4809 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4810 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4811 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4812 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4813 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4814 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4815 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4816 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4817 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4818 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4819 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4820 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4821 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4822 case Intrinsic::loongarch_lsx_vsrani_d_q:
4823 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4824 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4825 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4826 case Intrinsic::loongarch_lsx_vssrani_d_q:
4827 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4828 case Intrinsic::loongarch_lsx_vssrani_du_q:
4829 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4830 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4831 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4832 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4833 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4834 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4835 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4836 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4837 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4838 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4839 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4840 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4841 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4842 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4843 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4844 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4845 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4846 case Intrinsic::loongarch_lsx_vnori_b:
4847 case Intrinsic::loongarch_lsx_vshuf4i_b:
4848 case Intrinsic::loongarch_lsx_vshuf4i_h:
4849 case Intrinsic::loongarch_lsx_vshuf4i_w:
4850 case Intrinsic::loongarch_lasx_xvnori_b:
4851 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4852 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4853 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4854 case Intrinsic::loongarch_lasx_xvpermi_d:
4855 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4856 case Intrinsic::loongarch_lsx_vshuf4i_d:
4857 case Intrinsic::loongarch_lsx_vpermi_w:
4858 case Intrinsic::loongarch_lsx_vbitseli_b:
4859 case Intrinsic::loongarch_lsx_vextrins_b:
4860 case Intrinsic::loongarch_lsx_vextrins_h:
4861 case Intrinsic::loongarch_lsx_vextrins_w:
4862 case Intrinsic::loongarch_lsx_vextrins_d:
4863 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4864 case Intrinsic::loongarch_lasx_xvpermi_w:
4865 case Intrinsic::loongarch_lasx_xvpermi_q:
4866 case Intrinsic::loongarch_lasx_xvbitseli_b:
4867 case Intrinsic::loongarch_lasx_xvextrins_b:
4868 case Intrinsic::loongarch_lasx_xvextrins_h:
4869 case Intrinsic::loongarch_lasx_xvextrins_w:
4870 case Intrinsic::loongarch_lasx_xvextrins_d:
4871 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4872 case Intrinsic::loongarch_lsx_vrepli_b:
4873 case Intrinsic::loongarch_lsx_vrepli_h:
4874 case Intrinsic::loongarch_lsx_vrepli_w:
4875 case Intrinsic::loongarch_lsx_vrepli_d:
4876 case Intrinsic::loongarch_lasx_xvrepli_b:
4877 case Intrinsic::loongarch_lasx_xvrepli_h:
4878 case Intrinsic::loongarch_lasx_xvrepli_w:
4879 case Intrinsic::loongarch_lasx_xvrepli_d:
4880 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4881 case Intrinsic::loongarch_lsx_vldi:
4882 case Intrinsic::loongarch_lasx_xvldi:
4883 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4884 }
4885}
4886
4887// Helper function that emits error message for intrinsics with chain and return
4888// merge values of a UNDEF and the chain.
4890 StringRef ErrorMsg,
4891 SelectionDAG &DAG) {
4892 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4893 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4894 SDLoc(Op));
4895}
4896
4897SDValue
4898LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4899 SelectionDAG &DAG) const {
4900 SDLoc DL(Op);
4901 MVT GRLenVT = Subtarget.getGRLenVT();
4902 EVT VT = Op.getValueType();
4903 SDValue Chain = Op.getOperand(0);
4904 const StringRef ErrorMsgOOR = "argument out of range";
4905 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4906 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4907
4908 switch (Op.getConstantOperandVal(1)) {
4909 default:
4910 return Op;
4911 case Intrinsic::loongarch_crc_w_b_w:
4912 case Intrinsic::loongarch_crc_w_h_w:
4913 case Intrinsic::loongarch_crc_w_w_w:
4914 case Intrinsic::loongarch_crc_w_d_w:
4915 case Intrinsic::loongarch_crcc_w_b_w:
4916 case Intrinsic::loongarch_crcc_w_h_w:
4917 case Intrinsic::loongarch_crcc_w_w_w:
4918 case Intrinsic::loongarch_crcc_w_d_w:
4919 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4920 case Intrinsic::loongarch_csrrd_w:
4921 case Intrinsic::loongarch_csrrd_d: {
4922 unsigned Imm = Op.getConstantOperandVal(2);
4923 return !isUInt<14>(Imm)
4924 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4925 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4926 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4927 }
4928 case Intrinsic::loongarch_csrwr_w:
4929 case Intrinsic::loongarch_csrwr_d: {
4930 unsigned Imm = Op.getConstantOperandVal(3);
4931 return !isUInt<14>(Imm)
4932 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4933 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4934 {Chain, Op.getOperand(2),
4935 DAG.getConstant(Imm, DL, GRLenVT)});
4936 }
4937 case Intrinsic::loongarch_csrxchg_w:
4938 case Intrinsic::loongarch_csrxchg_d: {
4939 unsigned Imm = Op.getConstantOperandVal(4);
4940 return !isUInt<14>(Imm)
4941 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4942 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4943 {Chain, Op.getOperand(2), Op.getOperand(3),
4944 DAG.getConstant(Imm, DL, GRLenVT)});
4945 }
4946 case Intrinsic::loongarch_iocsrrd_d: {
4947 return DAG.getNode(
4948 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4949 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4950 }
4951#define IOCSRRD_CASE(NAME, NODE) \
4952 case Intrinsic::loongarch_##NAME: { \
4953 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4954 {Chain, Op.getOperand(2)}); \
4955 }
4956 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4957 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4958 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4959#undef IOCSRRD_CASE
4960 case Intrinsic::loongarch_cpucfg: {
4961 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4962 {Chain, Op.getOperand(2)});
4963 }
4964 case Intrinsic::loongarch_lddir_d: {
4965 unsigned Imm = Op.getConstantOperandVal(3);
4966 return !isUInt<8>(Imm)
4967 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4968 : Op;
4969 }
4970 case Intrinsic::loongarch_movfcsr2gr: {
4971 if (!Subtarget.hasBasicF())
4972 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4973 unsigned Imm = Op.getConstantOperandVal(2);
4974 return !isUInt<2>(Imm)
4975 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4976 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4977 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4978 }
4979 case Intrinsic::loongarch_lsx_vld:
4980 case Intrinsic::loongarch_lsx_vldrepl_b:
4981 case Intrinsic::loongarch_lasx_xvld:
4982 case Intrinsic::loongarch_lasx_xvldrepl_b:
4983 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4984 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4985 : SDValue();
4986 case Intrinsic::loongarch_lsx_vldrepl_h:
4987 case Intrinsic::loongarch_lasx_xvldrepl_h:
4988 return !isShiftedInt<11, 1>(
4989 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4991 Op, "argument out of range or not a multiple of 2", DAG)
4992 : SDValue();
4993 case Intrinsic::loongarch_lsx_vldrepl_w:
4994 case Intrinsic::loongarch_lasx_xvldrepl_w:
4995 return !isShiftedInt<10, 2>(
4996 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4998 Op, "argument out of range or not a multiple of 4", DAG)
4999 : SDValue();
5000 case Intrinsic::loongarch_lsx_vldrepl_d:
5001 case Intrinsic::loongarch_lasx_xvldrepl_d:
5002 return !isShiftedInt<9, 3>(
5003 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
5005 Op, "argument out of range or not a multiple of 8", DAG)
5006 : SDValue();
5007 }
5008}
5009
5010// Helper function that emits error message for intrinsics with void return
5011// value and return the chain.
5013 SelectionDAG &DAG) {
5014
5015 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
5016 return Op.getOperand(0);
5017}
5018
5019SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
5020 SelectionDAG &DAG) const {
5021 SDLoc DL(Op);
5022 MVT GRLenVT = Subtarget.getGRLenVT();
5023 SDValue Chain = Op.getOperand(0);
5024 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
5025 SDValue Op2 = Op.getOperand(2);
5026 const StringRef ErrorMsgOOR = "argument out of range";
5027 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5028 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
5029 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5030
5031 switch (IntrinsicEnum) {
5032 default:
5033 // TODO: Add more Intrinsics.
5034 return SDValue();
5035 case Intrinsic::loongarch_cacop_d:
5036 case Intrinsic::loongarch_cacop_w: {
5037 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
5038 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
5039 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
5040 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
5041 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
5042 unsigned Imm1 = Op2->getAsZExtVal();
5043 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
5044 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
5045 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
5046 return Op;
5047 }
5048 case Intrinsic::loongarch_dbar: {
5049 unsigned Imm = Op2->getAsZExtVal();
5050 return !isUInt<15>(Imm)
5051 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5052 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
5053 DAG.getConstant(Imm, DL, GRLenVT));
5054 }
5055 case Intrinsic::loongarch_ibar: {
5056 unsigned Imm = Op2->getAsZExtVal();
5057 return !isUInt<15>(Imm)
5058 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5059 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
5060 DAG.getConstant(Imm, DL, GRLenVT));
5061 }
5062 case Intrinsic::loongarch_break: {
5063 unsigned Imm = Op2->getAsZExtVal();
5064 return !isUInt<15>(Imm)
5065 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5066 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
5067 DAG.getConstant(Imm, DL, GRLenVT));
5068 }
5069 case Intrinsic::loongarch_movgr2fcsr: {
5070 if (!Subtarget.hasBasicF())
5071 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
5072 unsigned Imm = Op2->getAsZExtVal();
5073 return !isUInt<2>(Imm)
5074 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5075 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
5076 DAG.getConstant(Imm, DL, GRLenVT),
5077 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
5078 Op.getOperand(3)));
5079 }
5080 case Intrinsic::loongarch_syscall: {
5081 unsigned Imm = Op2->getAsZExtVal();
5082 return !isUInt<15>(Imm)
5083 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5084 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
5085 DAG.getConstant(Imm, DL, GRLenVT));
5086 }
5087#define IOCSRWR_CASE(NAME, NODE) \
5088 case Intrinsic::loongarch_##NAME: { \
5089 SDValue Op3 = Op.getOperand(3); \
5090 return Subtarget.is64Bit() \
5091 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
5092 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5093 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
5094 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
5095 Op3); \
5096 }
5097 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
5098 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
5099 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
5100#undef IOCSRWR_CASE
5101 case Intrinsic::loongarch_iocsrwr_d: {
5102 return !Subtarget.is64Bit()
5103 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
5104 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
5105 Op2,
5106 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
5107 Op.getOperand(3)));
5108 }
5109#define ASRT_LE_GT_CASE(NAME) \
5110 case Intrinsic::loongarch_##NAME: { \
5111 return !Subtarget.is64Bit() \
5112 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
5113 : Op; \
5114 }
5115 ASRT_LE_GT_CASE(asrtle_d)
5116 ASRT_LE_GT_CASE(asrtgt_d)
5117#undef ASRT_LE_GT_CASE
5118 case Intrinsic::loongarch_ldpte_d: {
5119 unsigned Imm = Op.getConstantOperandVal(3);
5120 return !Subtarget.is64Bit()
5121 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
5122 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5123 : Op;
5124 }
5125 case Intrinsic::loongarch_lsx_vst:
5126 case Intrinsic::loongarch_lasx_xvst:
5127 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
5128 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5129 : SDValue();
5130 case Intrinsic::loongarch_lasx_xvstelm_b:
5131 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5132 !isUInt<5>(Op.getConstantOperandVal(5)))
5133 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5134 : SDValue();
5135 case Intrinsic::loongarch_lsx_vstelm_b:
5136 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5137 !isUInt<4>(Op.getConstantOperandVal(5)))
5138 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
5139 : SDValue();
5140 case Intrinsic::loongarch_lasx_xvstelm_h:
5141 return (!isShiftedInt<8, 1>(
5142 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5143 !isUInt<4>(Op.getConstantOperandVal(5)))
5145 Op, "argument out of range or not a multiple of 2", DAG)
5146 : SDValue();
5147 case Intrinsic::loongarch_lsx_vstelm_h:
5148 return (!isShiftedInt<8, 1>(
5149 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5150 !isUInt<3>(Op.getConstantOperandVal(5)))
5152 Op, "argument out of range or not a multiple of 2", DAG)
5153 : SDValue();
5154 case Intrinsic::loongarch_lasx_xvstelm_w:
5155 return (!isShiftedInt<8, 2>(
5156 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5157 !isUInt<3>(Op.getConstantOperandVal(5)))
5159 Op, "argument out of range or not a multiple of 4", DAG)
5160 : SDValue();
5161 case Intrinsic::loongarch_lsx_vstelm_w:
5162 return (!isShiftedInt<8, 2>(
5163 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5164 !isUInt<2>(Op.getConstantOperandVal(5)))
5166 Op, "argument out of range or not a multiple of 4", DAG)
5167 : SDValue();
5168 case Intrinsic::loongarch_lasx_xvstelm_d:
5169 return (!isShiftedInt<8, 3>(
5170 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5171 !isUInt<2>(Op.getConstantOperandVal(5)))
5173 Op, "argument out of range or not a multiple of 8", DAG)
5174 : SDValue();
5175 case Intrinsic::loongarch_lsx_vstelm_d:
5176 return (!isShiftedInt<8, 3>(
5177 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5178 !isUInt<1>(Op.getConstantOperandVal(5)))
5180 Op, "argument out of range or not a multiple of 8", DAG)
5181 : SDValue();
5182 }
5183}
5184
5185SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
5186 SelectionDAG &DAG) const {
5187 SDLoc DL(Op);
5188 SDValue Lo = Op.getOperand(0);
5189 SDValue Hi = Op.getOperand(1);
5190 SDValue Shamt = Op.getOperand(2);
5191 EVT VT = Lo.getValueType();
5192
5193 // if Shamt-GRLen < 0: // Shamt < GRLen
5194 // Lo = Lo << Shamt
5195 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
5196 // else:
5197 // Lo = 0
5198 // Hi = Lo << (Shamt-GRLen)
5199
5200 SDValue Zero = DAG.getConstant(0, DL, VT);
5201 SDValue One = DAG.getConstant(1, DL, VT);
5202 SDValue MinusGRLen =
5203 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5204 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5205 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5206 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5207
5208 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
5209 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
5210 SDValue ShiftRightLo =
5211 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
5212 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
5213 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
5214 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
5215
5216 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5217
5218 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
5219 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5220
5221 SDValue Parts[2] = {Lo, Hi};
5222 return DAG.getMergeValues(Parts, DL);
5223}
5224
5225SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
5226 SelectionDAG &DAG,
5227 bool IsSRA) const {
5228 SDLoc DL(Op);
5229 SDValue Lo = Op.getOperand(0);
5230 SDValue Hi = Op.getOperand(1);
5231 SDValue Shamt = Op.getOperand(2);
5232 EVT VT = Lo.getValueType();
5233
5234 // SRA expansion:
5235 // if Shamt-GRLen < 0: // Shamt < GRLen
5236 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5237 // Hi = Hi >>s Shamt
5238 // else:
5239 // Lo = Hi >>s (Shamt-GRLen);
5240 // Hi = Hi >>s (GRLen-1)
5241 //
5242 // SRL expansion:
5243 // if Shamt-GRLen < 0: // Shamt < GRLen
5244 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5245 // Hi = Hi >>u Shamt
5246 // else:
5247 // Lo = Hi >>u (Shamt-GRLen);
5248 // Hi = 0;
5249
5250 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
5251
5252 SDValue Zero = DAG.getConstant(0, DL, VT);
5253 SDValue One = DAG.getConstant(1, DL, VT);
5254 SDValue MinusGRLen =
5255 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5256 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5257 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5258 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5259
5260 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
5261 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
5262 SDValue ShiftLeftHi =
5263 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
5264 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
5265 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
5266 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
5267 SDValue HiFalse =
5268 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
5269
5270 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5271
5272 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
5273 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5274
5275 SDValue Parts[2] = {Lo, Hi};
5276 return DAG.getMergeValues(Parts, DL);
5277}
5278
5279// Returns the opcode of the target-specific SDNode that implements the 32-bit
5280// form of the given Opcode.
5281static unsigned getLoongArchWOpcode(unsigned Opcode) {
5282 switch (Opcode) {
5283 default:
5284 llvm_unreachable("Unexpected opcode");
5285 case ISD::SDIV:
5286 return LoongArchISD::DIV_W;
5287 case ISD::UDIV:
5288 return LoongArchISD::DIV_WU;
5289 case ISD::SREM:
5290 return LoongArchISD::MOD_W;
5291 case ISD::UREM:
5292 return LoongArchISD::MOD_WU;
5293 case ISD::SHL:
5294 return LoongArchISD::SLL_W;
5295 case ISD::SRA:
5296 return LoongArchISD::SRA_W;
5297 case ISD::SRL:
5298 return LoongArchISD::SRL_W;
5299 case ISD::ROTL:
5300 case ISD::ROTR:
5301 return LoongArchISD::ROTR_W;
5302 case ISD::CTTZ:
5303 return LoongArchISD::CTZ_W;
5304 case ISD::CTLZ:
5305 return LoongArchISD::CLZ_W;
5306 }
5307}
5308
5309// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5310// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5311// otherwise be promoted to i64, making it difficult to select the
5312// SLL_W/.../*W later one because the fact the operation was originally of
5313// type i8/i16/i32 is lost.
5315 unsigned ExtOpc = ISD::ANY_EXTEND) {
5316 SDLoc DL(N);
5317 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
5318 SDValue NewOp0, NewRes;
5319
5320 switch (NumOp) {
5321 default:
5322 llvm_unreachable("Unexpected NumOp");
5323 case 1: {
5324 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5325 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
5326 break;
5327 }
5328 case 2: {
5329 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5330 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5331 if (N->getOpcode() == ISD::ROTL) {
5332 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
5333 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
5334 }
5335 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5336 break;
5337 }
5338 // TODO:Handle more NumOp.
5339 }
5340
5341 // ReplaceNodeResults requires we maintain the same type for the return
5342 // value.
5343 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5344}
5345
5346// Converts the given 32-bit operation to a i64 operation with signed extension
5347// semantic to reduce the signed extension instructions.
5349 SDLoc DL(N);
5350 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5351 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5352 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5353 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5354 DAG.getValueType(MVT::i32));
5355 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5356}
5357
5358// Helper function that emits error message for intrinsics with/without chain
5359// and return a UNDEF or and the chain as the results.
5362 StringRef ErrorMsg, bool WithChain = true) {
5363 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5364 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
5365 if (!WithChain)
5366 return;
5367 Results.push_back(N->getOperand(0));
5368}
5369
5370template <unsigned N>
5371static void
5373 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5374 unsigned ResOp) {
5375 const StringRef ErrorMsgOOR = "argument out of range";
5376 unsigned Imm = Node->getConstantOperandVal(2);
5377 if (!isUInt<N>(Imm)) {
5379 /*WithChain=*/false);
5380 return;
5381 }
5382 SDLoc DL(Node);
5383 SDValue Vec = Node->getOperand(1);
5384
5385 SDValue PickElt =
5386 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
5387 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
5389 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
5390 PickElt.getValue(0)));
5391}
5392
5395 SelectionDAG &DAG,
5396 const LoongArchSubtarget &Subtarget,
5397 unsigned ResOp) {
5398 SDLoc DL(N);
5399 SDValue Vec = N->getOperand(1);
5400
5401 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
5402 Results.push_back(
5403 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
5404}
5405
5406static void
5408 SelectionDAG &DAG,
5409 const LoongArchSubtarget &Subtarget) {
5410 switch (N->getConstantOperandVal(0)) {
5411 default:
5412 llvm_unreachable("Unexpected Intrinsic.");
5413 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5414 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5415 LoongArchISD::VPICK_SEXT_ELT);
5416 break;
5417 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5418 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5419 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5420 LoongArchISD::VPICK_SEXT_ELT);
5421 break;
5422 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5423 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5424 LoongArchISD::VPICK_SEXT_ELT);
5425 break;
5426 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5427 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5428 LoongArchISD::VPICK_ZEXT_ELT);
5429 break;
5430 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5431 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5432 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5433 LoongArchISD::VPICK_ZEXT_ELT);
5434 break;
5435 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5436 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5437 LoongArchISD::VPICK_ZEXT_ELT);
5438 break;
5439 case Intrinsic::loongarch_lsx_bz_b:
5440 case Intrinsic::loongarch_lsx_bz_h:
5441 case Intrinsic::loongarch_lsx_bz_w:
5442 case Intrinsic::loongarch_lsx_bz_d:
5443 case Intrinsic::loongarch_lasx_xbz_b:
5444 case Intrinsic::loongarch_lasx_xbz_h:
5445 case Intrinsic::loongarch_lasx_xbz_w:
5446 case Intrinsic::loongarch_lasx_xbz_d:
5447 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5448 LoongArchISD::VALL_ZERO);
5449 break;
5450 case Intrinsic::loongarch_lsx_bz_v:
5451 case Intrinsic::loongarch_lasx_xbz_v:
5452 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5453 LoongArchISD::VANY_ZERO);
5454 break;
5455 case Intrinsic::loongarch_lsx_bnz_b:
5456 case Intrinsic::loongarch_lsx_bnz_h:
5457 case Intrinsic::loongarch_lsx_bnz_w:
5458 case Intrinsic::loongarch_lsx_bnz_d:
5459 case Intrinsic::loongarch_lasx_xbnz_b:
5460 case Intrinsic::loongarch_lasx_xbnz_h:
5461 case Intrinsic::loongarch_lasx_xbnz_w:
5462 case Intrinsic::loongarch_lasx_xbnz_d:
5463 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5464 LoongArchISD::VALL_NONZERO);
5465 break;
5466 case Intrinsic::loongarch_lsx_bnz_v:
5467 case Intrinsic::loongarch_lasx_xbnz_v:
5468 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5469 LoongArchISD::VANY_NONZERO);
5470 break;
5471 }
5472}
5473
5476 SelectionDAG &DAG) {
5477 assert(N->getValueType(0) == MVT::i128 &&
5478 "AtomicCmpSwap on types less than 128 should be legal");
5479 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5480
5481 unsigned Opcode;
5482 switch (MemOp->getMergedOrdering()) {
5486 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5487 break;
5490 Opcode = LoongArch::PseudoCmpXchg128;
5491 break;
5492 default:
5493 llvm_unreachable("Unexpected ordering!");
5494 }
5495
5496 SDLoc DL(N);
5497 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5498 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5499 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5500 NewVal.first, NewVal.second, N->getOperand(0)};
5501
5502 SDNode *CmpSwap = DAG.getMachineNode(
5503 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5504 Ops);
5505 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5506 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5507 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5508 Results.push_back(SDValue(CmpSwap, 3));
5509}
5510
5513 SDLoc DL(N);
5514 EVT VT = N->getValueType(0);
5515 switch (N->getOpcode()) {
5516 default:
5517 llvm_unreachable("Don't know how to legalize this operation");
5518 case ISD::ADD:
5519 case ISD::SUB:
5520 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5521 "Unexpected custom legalisation");
5522 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5523 break;
5524 case ISD::SDIV:
5525 case ISD::UDIV:
5526 case ISD::SREM:
5527 case ISD::UREM:
5528 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5529 "Unexpected custom legalisation");
5530 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5531 Subtarget.hasDiv32() && VT == MVT::i32
5533 : ISD::SIGN_EXTEND));
5534 break;
5535 case ISD::SHL:
5536 case ISD::SRA:
5537 case ISD::SRL:
5538 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5539 "Unexpected custom legalisation");
5540 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5541 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5542 break;
5543 }
5544 break;
5545 case ISD::ROTL:
5546 case ISD::ROTR:
5547 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5548 "Unexpected custom legalisation");
5549 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5550 break;
5551 case ISD::LOAD: {
5552 // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
5553 // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
5554 // cast since type legalization will try to use an i64 load.
5555 MVT VT = N->getSimpleValueType(0);
5556 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5557 "Unexpected custom legalisation");
5559 "Unexpected type action!");
5560 if (!ISD::isNON_EXTLoad(N))
5561 return;
5562 auto *Ld = cast<LoadSDNode>(N);
5563 SDValue Res = DAG.getLoad(MVT::f64, DL, Ld->getChain(), Ld->getBasePtr(),
5564 Ld->getPointerInfo(), Ld->getBaseAlign(),
5565 Ld->getMemOperand()->getFlags());
5566 SDValue Chain = Res.getValue(1);
5567 MVT VecVT = MVT::getVectorVT(MVT::f64, 2);
5568 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Res);
5569 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
5570 Res = DAG.getBitcast(WideVT, Res);
5571 Results.push_back(Res);
5572 Results.push_back(Chain);
5573 break;
5574 }
5575 case ISD::FP_TO_SINT: {
5576 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5577 "Unexpected custom legalisation");
5578 SDValue Src = N->getOperand(0);
5579 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5580 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5582 if (!isTypeLegal(Src.getValueType()))
5583 return;
5584 if (Src.getValueType() == MVT::f16)
5585 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5586 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5587 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5588 return;
5589 }
5590 // If the FP type needs to be softened, emit a library call using the 'si'
5591 // version. If we left it to default legalization we'd end up with 'di'.
5592 RTLIB::Libcall LC;
5593 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5594 MakeLibCallOptions CallOptions;
5595 EVT OpVT = Src.getValueType();
5596 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5597 SDValue Chain = SDValue();
5598 SDValue Result;
5599 std::tie(Result, Chain) =
5600 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5601 Results.push_back(Result);
5602 break;
5603 }
5604 case ISD::BITCAST: {
5605 SDValue Src = N->getOperand(0);
5606 EVT SrcVT = Src.getValueType();
5607 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5608 Subtarget.hasBasicF()) {
5609 SDValue Dst =
5610 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5611 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5612 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5613 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5614 DAG.getVTList(MVT::i32, MVT::i32), Src);
5615 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5616 NewReg.getValue(0), NewReg.getValue(1));
5617 Results.push_back(RetReg);
5618 }
5619 break;
5620 }
5621 case ISD::FP_TO_UINT: {
5622 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5623 "Unexpected custom legalisation");
5624 auto &TLI = DAG.getTargetLoweringInfo();
5625 SDValue Tmp1, Tmp2;
5626 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5627 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5628 break;
5629 }
5630 case ISD::FP_ROUND: {
5631 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5632 "Unexpected custom legalisation");
5633 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5634 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5635 // a target-specific LoongArchISD::VFCVT to optimize it.
5636 SDValue Op0 = N->getOperand(0);
5637 EVT OpVT = Op0.getValueType();
5638 if (OpVT == MVT::v2f64) {
5639 SDValue Undef = DAG.getUNDEF(OpVT);
5640 SDValue Dst =
5641 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5642 Results.push_back(Dst);
5643 }
5644 break;
5645 }
5646 case ISD::BSWAP: {
5647 SDValue Src = N->getOperand(0);
5648 assert((VT == MVT::i16 || VT == MVT::i32) &&
5649 "Unexpected custom legalization");
5650 MVT GRLenVT = Subtarget.getGRLenVT();
5651 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5652 SDValue Tmp;
5653 switch (VT.getSizeInBits()) {
5654 default:
5655 llvm_unreachable("Unexpected operand width");
5656 case 16:
5657 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5658 break;
5659 case 32:
5660 // Only LA64 will get to here due to the size mismatch between VT and
5661 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5662 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5663 break;
5664 }
5665 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5666 break;
5667 }
5668 case ISD::BITREVERSE: {
5669 SDValue Src = N->getOperand(0);
5670 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5671 "Unexpected custom legalization");
5672 MVT GRLenVT = Subtarget.getGRLenVT();
5673 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5674 SDValue Tmp;
5675 switch (VT.getSizeInBits()) {
5676 default:
5677 llvm_unreachable("Unexpected operand width");
5678 case 8:
5679 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5680 break;
5681 case 32:
5682 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5683 break;
5684 }
5685 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5686 break;
5687 }
5688 case ISD::CTLZ:
5689 case ISD::CTTZ: {
5690 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5691 "Unexpected custom legalisation");
5692 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5693 break;
5694 }
5696 SDValue Chain = N->getOperand(0);
5697 SDValue Op2 = N->getOperand(2);
5698 MVT GRLenVT = Subtarget.getGRLenVT();
5699 const StringRef ErrorMsgOOR = "argument out of range";
5700 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5701 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5702
5703 switch (N->getConstantOperandVal(1)) {
5704 default:
5705 llvm_unreachable("Unexpected Intrinsic.");
5706 case Intrinsic::loongarch_movfcsr2gr: {
5707 if (!Subtarget.hasBasicF()) {
5708 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5709 return;
5710 }
5711 unsigned Imm = Op2->getAsZExtVal();
5712 if (!isUInt<2>(Imm)) {
5713 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5714 return;
5715 }
5716 SDValue MOVFCSR2GRResults = DAG.getNode(
5717 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5718 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5719 Results.push_back(
5720 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5721 Results.push_back(MOVFCSR2GRResults.getValue(1));
5722 break;
5723 }
5724#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5725 case Intrinsic::loongarch_##NAME: { \
5726 SDValue NODE = DAG.getNode( \
5727 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5728 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5729 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5730 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5731 Results.push_back(NODE.getValue(1)); \
5732 break; \
5733 }
5734 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5735 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5736 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5737 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5738 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5739 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5740#undef CRC_CASE_EXT_BINARYOP
5741
5742#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5743 case Intrinsic::loongarch_##NAME: { \
5744 SDValue NODE = DAG.getNode( \
5745 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5746 {Chain, Op2, \
5747 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5748 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5749 Results.push_back(NODE.getValue(1)); \
5750 break; \
5751 }
5752 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5753 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5754#undef CRC_CASE_EXT_UNARYOP
5755#define CSR_CASE(ID) \
5756 case Intrinsic::loongarch_##ID: { \
5757 if (!Subtarget.is64Bit()) \
5758 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5759 break; \
5760 }
5761 CSR_CASE(csrrd_d);
5762 CSR_CASE(csrwr_d);
5763 CSR_CASE(csrxchg_d);
5764 CSR_CASE(iocsrrd_d);
5765#undef CSR_CASE
5766 case Intrinsic::loongarch_csrrd_w: {
5767 unsigned Imm = Op2->getAsZExtVal();
5768 if (!isUInt<14>(Imm)) {
5769 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5770 return;
5771 }
5772 SDValue CSRRDResults =
5773 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5774 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5775 Results.push_back(
5776 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5777 Results.push_back(CSRRDResults.getValue(1));
5778 break;
5779 }
5780 case Intrinsic::loongarch_csrwr_w: {
5781 unsigned Imm = N->getConstantOperandVal(3);
5782 if (!isUInt<14>(Imm)) {
5783 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5784 return;
5785 }
5786 SDValue CSRWRResults =
5787 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5788 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5789 DAG.getConstant(Imm, DL, GRLenVT)});
5790 Results.push_back(
5791 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5792 Results.push_back(CSRWRResults.getValue(1));
5793 break;
5794 }
5795 case Intrinsic::loongarch_csrxchg_w: {
5796 unsigned Imm = N->getConstantOperandVal(4);
5797 if (!isUInt<14>(Imm)) {
5798 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5799 return;
5800 }
5801 SDValue CSRXCHGResults = DAG.getNode(
5802 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5803 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5804 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5805 DAG.getConstant(Imm, DL, GRLenVT)});
5806 Results.push_back(
5807 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5808 Results.push_back(CSRXCHGResults.getValue(1));
5809 break;
5810 }
5811#define IOCSRRD_CASE(NAME, NODE) \
5812 case Intrinsic::loongarch_##NAME: { \
5813 SDValue IOCSRRDResults = \
5814 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5815 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5816 Results.push_back( \
5817 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5818 Results.push_back(IOCSRRDResults.getValue(1)); \
5819 break; \
5820 }
5821 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5822 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5823 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5824#undef IOCSRRD_CASE
5825 case Intrinsic::loongarch_cpucfg: {
5826 SDValue CPUCFGResults =
5827 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5828 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5829 Results.push_back(
5830 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5831 Results.push_back(CPUCFGResults.getValue(1));
5832 break;
5833 }
5834 case Intrinsic::loongarch_lddir_d: {
5835 if (!Subtarget.is64Bit()) {
5836 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5837 return;
5838 }
5839 break;
5840 }
5841 }
5842 break;
5843 }
5844 case ISD::READ_REGISTER: {
5845 if (Subtarget.is64Bit())
5846 DAG.getContext()->emitError(
5847 "On LA64, only 64-bit registers can be read.");
5848 else
5849 DAG.getContext()->emitError(
5850 "On LA32, only 32-bit registers can be read.");
5851 Results.push_back(DAG.getUNDEF(VT));
5852 Results.push_back(N->getOperand(0));
5853 break;
5854 }
5856 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5857 break;
5858 }
5859 case ISD::LROUND: {
5860 SDValue Op0 = N->getOperand(0);
5861 EVT OpVT = Op0.getValueType();
5862 RTLIB::Libcall LC =
5863 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5864 MakeLibCallOptions CallOptions;
5865 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5866 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5867 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5868 Results.push_back(Result);
5869 break;
5870 }
5871 case ISD::ATOMIC_CMP_SWAP: {
5873 break;
5874 }
5875 case ISD::TRUNCATE: {
5876 MVT VT = N->getSimpleValueType(0);
5877 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5878 return;
5879
5880 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5881 SDValue In = N->getOperand(0);
5882 EVT InVT = In.getValueType();
5883 EVT InEltVT = InVT.getVectorElementType();
5884 EVT EltVT = VT.getVectorElementType();
5885 unsigned MinElts = VT.getVectorNumElements();
5886 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5887 unsigned InBits = InVT.getSizeInBits();
5888
5889 // v8i64 -> (v8i32) -> v8i8
5890 if (InVT == MVT::v8i64 && WidenVT.is128BitVector()) {
5891 InVT = MVT::getVectorVT(MVT::getIntegerVT(256 / MinElts), MinElts);
5892 In = DAG.getNode(N->getOpcode(), DL, InVT, In);
5893 InBits = 256;
5894 }
5895
5896 // v8i32 -> v8i8 / v4i64 -> v4i16 / v4i64 -> v4i8
5897 if ((InVT == MVT::v8i32 || InVT == MVT::v4i64) &&
5898 WidenVT.is128BitVector()) {
5899 InVT = MVT::getVectorVT(MVT::getIntegerVT(128 / MinElts), MinElts);
5900 In = DAG.getNode(N->getOpcode(), DL, InVT, In);
5901 InBits = 128;
5902 InEltVT = InVT.getVectorElementType();
5903 }
5904
5905 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5906 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5907 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5908 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5909 for (unsigned I = 0; I < MinElts; ++I)
5910 TruncMask[I] = Scale * I;
5911
5912 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5913 MVT SVT = In.getSimpleValueType().getScalarType();
5914 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5915 SDValue WidenIn =
5916 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5917 DAG.getVectorIdxConstant(0, DL));
5918 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5919 "Illegal vector type in truncation");
5920 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5921 Results.push_back(
5922 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5923 return;
5924 }
5925 }
5926
5927 break;
5928 }
5929 case ISD::SIGN_EXTEND: {
5930 // LASX has native VEXT2XV_* for sign extension.
5931 if (!Subtarget.hasExtLSX() || Subtarget.hasExtLASX())
5932 return;
5933
5934 EVT DstVT = N->getValueType(0);
5935 SDValue Src = N->getOperand(0);
5936 MVT SrcVT = Src.getSimpleValueType();
5937
5938 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
5939 unsigned DstEltBits = DstVT.getScalarSizeInBits();
5940 unsigned NumElts = DstVT.getVectorNumElements();
5941
5942 if (SrcVT.getSizeInBits() > 128)
5943 return;
5944
5945 if (!DstVT.isVector() || DstVT.getSizeInBits() <= 128)
5946 return;
5947
5948 // Legalize and extend the src to 128-bit first.
5949 if (SrcVT.getSizeInBits() < 128) {
5950 unsigned WidenSrcElts = 128 / SrcEltBits;
5951 MVT WidenSrcVT = MVT::getVectorVT(SrcVT.getScalarType(), WidenSrcElts);
5952 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WidenSrcVT,
5953 DAG.getUNDEF(WidenSrcVT), Src,
5954 DAG.getVectorIdxConstant(0, DL));
5955 SrcVT = WidenSrcVT;
5956
5957 unsigned FirstStageEltBits = 128 / NumElts;
5958 MVT FirstStageEltVT = MVT::getIntegerVT(FirstStageEltBits);
5959 MVT FirstStageVT = MVT::getVectorVT(FirstStageEltVT, NumElts);
5960 Src = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, FirstStageVT, Src);
5961 SrcVT = FirstStageVT;
5962 SrcEltBits = FirstStageEltBits;
5963 }
5964
5966 Blocks.push_back(Src);
5967
5968 // Sign-extend the src by using SLTI + VILVL + VILVH recursively.
5969 while (SrcEltBits < DstEltBits) {
5970 unsigned NextEltBits = SrcEltBits * 2;
5971 MVT NextEltVT = MVT::getIntegerVT(NextEltBits);
5972 unsigned CurEltsPerBlock = SrcVT.getVectorNumElements();
5973 unsigned NextEltsPerBlock = CurEltsPerBlock / 2;
5974 MVT NextBlockVT = MVT::getVectorVT(NextEltVT, NextEltsPerBlock);
5975
5976 SmallVector<SDValue, 8> NextBlocks;
5977 NextBlocks.reserve(Blocks.size() * 2);
5978 for (SDValue Block : Blocks) {
5979 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
5980 SDValue Mask = DAG.getNode(ISD::SETCC, DL, SrcVT, Block, Zero,
5981 DAG.getCondCode(ISD::SETLT));
5982 SDValue LoInterleaved =
5983 DAG.getNode(LoongArchISD::VILVL, DL, SrcVT, Mask, Block);
5984 SDValue HiInterleaved =
5985 DAG.getNode(LoongArchISD::VILVH, DL, SrcVT, Mask, Block);
5986
5987 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, LoInterleaved));
5988 NextBlocks.push_back(DAG.getBitcast(NextBlockVT, HiInterleaved));
5989 }
5990
5991 Blocks = std::move(NextBlocks);
5992 SrcVT = NextBlockVT;
5993 SrcEltBits = NextEltBits;
5994 }
5995
5996 Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Blocks));
5997 break;
5998 }
5999 case ISD::FP_EXTEND:
6000 // FP_EXTEND may reach here due to the Custom action for v2f32 results, but
6001 // no target-specific lowering is required. Leave it unchanged and rely on
6002 // the default type legalization.
6003 break;
6004 }
6005}
6006
6007/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
6009 SelectionDAG &DAG) {
6010 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
6011
6012 MVT VT = N->getSimpleValueType(0);
6013 if (!VT.is128BitVector() && !VT.is256BitVector())
6014 return SDValue();
6015
6016 SDValue X, Y;
6017 SDValue N0 = N->getOperand(0);
6018 SDValue N1 = N->getOperand(1);
6019
6020 if (SDValue Not = isNOT(N0, DAG)) {
6021 X = Not;
6022 Y = N1;
6023 } else if (SDValue Not = isNOT(N1, DAG)) {
6024 X = Not;
6025 Y = N0;
6026 } else
6027 return SDValue();
6028
6029 X = DAG.getBitcast(VT, X);
6030 Y = DAG.getBitcast(VT, Y);
6031 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
6032}
6033
6034static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
6035 unsigned MinSizeInBits) {
6038
6039 if (!Node)
6040 return false;
6041
6042 APInt SplatUndef;
6043 unsigned SplatBitSize;
6044 bool HasAnyUndefs;
6045
6046 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
6047 HasAnyUndefs, MinSizeInBits,
6048 /*IsBigEndian=*/false);
6049}
6050
6051static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex) {
6052 auto *BV = dyn_cast<BuildVectorSDNode>(N);
6053 if (!BV)
6054 return SDValue();
6055
6056 SDValue Src;
6057 int Start = -1;
6058
6059 for (unsigned i = 0, NumElts = BV->getNumOperands(); i < NumElts; ++i) {
6060 SDValue Op = BV->getOperand(i);
6061 if (Op.isUndef())
6062 continue;
6063 if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
6064 return SDValue();
6065
6066 auto *IdxC = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6067 if (!IdxC)
6068 return SDValue();
6069
6070 unsigned EltIdx = IdxC->getZExtValue();
6071 if (Start < 0)
6072 Start = (int)EltIdx - (int)(i * 2);
6073 if (Start < 0 || Start > 1 || EltIdx != (unsigned)(Start + (int)(i * 2)))
6074 return SDValue();
6075
6076 SDValue CurSrc = Op.getOperand(0);
6077 if (!Src)
6078 Src = CurSrc;
6079 else if (Src != CurSrc)
6080 return SDValue();
6081 }
6082
6083 if (!Src || Start < 0)
6084 return SDValue();
6085
6086 StartIndex = (unsigned)Start;
6087 return Src;
6088}
6089
6090static SDValue
6092 const LoongArchSubtarget &Subtarget) {
6093 if (!Subtarget.hasExtLSX())
6094 return SDValue();
6095
6096 unsigned Opc = N->getOpcode();
6097 assert((Opc == ISD::ADD || Opc == ISD::SUB) && "Unexpected opcode");
6098
6099 EVT VT = N->getValueType(0);
6100 SDLoc DL(N);
6101
6102 SDValue LHS = N->getOperand(0);
6103 SDValue RHS = N->getOperand(1);
6104
6105 bool isSigned;
6106 unsigned ExtOpc = LHS.getOpcode();
6107 if (ExtOpc == ISD::SIGN_EXTEND)
6108 isSigned = true;
6109 else if (ExtOpc == ISD::ZERO_EXTEND)
6110 isSigned = false;
6111 else
6112 return SDValue();
6113
6114 if (ExtOpc != RHS.getOpcode())
6115 return SDValue();
6116
6117 if (!LHS.hasOneUse() || !RHS.hasOneUse())
6118 return SDValue();
6119
6120 unsigned OddIdx, EvenIdx;
6121 SDValue LHSVec = matchDeinterleaveBuildVector(LHS.getOperand(0), OddIdx);
6122 SDValue RHSVec = matchDeinterleaveBuildVector(RHS.getOperand(0), EvenIdx);
6123
6124 if (!LHSVec || !RHSVec)
6125 return SDValue();
6126 if (OddIdx != 1 || EvenIdx != 0)
6127 return SDValue();
6128 if (LHSVec.getValueType() != RHSVec.getValueType())
6129 return SDValue();
6130
6131 EVT SrcVT = LHSVec.getValueType();
6132 EVT SrcEltVT = SrcVT.getVectorElementType();
6133 EVT DstEltVT = VT.getVectorElementType();
6134 auto &TLI = DAG.getTargetLoweringInfo();
6135
6136 if (!TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
6137 return SDValue();
6138 if (!SrcVT.isVector() || !VT.isVector())
6139 return SDValue();
6140 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6141 return SDValue();
6142 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6143 return SDValue();
6144 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6145 return SDValue();
6146
6147 unsigned TargetOpc;
6148 if (Opc == ISD::ADD)
6149 TargetOpc = isSigned ? LoongArchISD::VHADDW : LoongArchISD::VHADDW_U;
6150 else
6151 TargetOpc = isSigned ? LoongArchISD::VHSUBW : LoongArchISD::VHSUBW_U;
6152
6153 return DAG.getNode(TargetOpc, DL, VT, LHSVec, RHSVec);
6154}
6155
6158 const LoongArchSubtarget &Subtarget) {
6159 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6160 return V;
6161
6162 if (DCI.isBeforeLegalizeOps())
6163 return SDValue();
6164
6165 EVT VT = N->getValueType(0);
6166 if (!VT.isVector())
6167 return SDValue();
6168
6169 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
6170 return SDValue();
6171
6172 EVT EltVT = VT.getVectorElementType();
6173 if (!EltVT.isInteger())
6174 return SDValue();
6175
6176 // match:
6177 //
6178 // add
6179 // (and
6180 // (srl X, shift-1) / X
6181 // 1)
6182 // (srl/sra X, shift)
6183
6184 SDValue Add0 = N->getOperand(0);
6185 SDValue Add1 = N->getOperand(1);
6186 SDValue And;
6187 SDValue Shr;
6188
6189 if (Add0.getOpcode() == ISD::AND) {
6190 And = Add0;
6191 Shr = Add1;
6192 } else if (Add1.getOpcode() == ISD::AND) {
6193 And = Add1;
6194 Shr = Add0;
6195 } else {
6196 return SDValue();
6197 }
6198
6199 // match:
6200 //
6201 // srl/sra X, shift
6202
6203 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
6204 return SDValue();
6205
6206 SDValue X = Shr.getOperand(0);
6207 SDValue Shift = Shr.getOperand(1);
6208 APInt ShiftVal;
6209
6210 if (!isConstantSplatVector(Shift, ShiftVal, EltVT.getSizeInBits()))
6211 return SDValue();
6212
6213 if (ShiftVal == 0)
6214 return SDValue();
6215
6216 // match:
6217 //
6218 // and
6219 // (srl X, shift-1) / X
6220 // 1
6221
6222 SDValue One = And.getOperand(1);
6223 APInt SplatVal;
6224
6225 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
6226 return SDValue();
6227
6228 if (SplatVal != 1)
6229 return SDValue();
6230
6231 if (And.getOperand(0) == X) {
6232 // match:
6233 //
6234 // shift == 1
6235
6236 if (ShiftVal != 1)
6237 return SDValue();
6238 } else {
6239 // match:
6240 //
6241 // srl X, shift-1
6242
6243 SDValue Srl = And.getOperand(0);
6244
6245 if (Srl.getOpcode() != ISD::SRL)
6246 return SDValue();
6247
6248 if (Srl.getOperand(0) != X)
6249 return SDValue();
6250
6251 // match:
6252 //
6253 // shift-1
6254
6255 SDValue ShiftMinus1 = Srl.getOperand(1);
6256
6257 if (!isConstantSplatVector(ShiftMinus1, SplatVal, EltVT.getSizeInBits()))
6258 return SDValue();
6259
6260 if (ShiftVal != (SplatVal + 1))
6261 return SDValue();
6262 }
6263
6264 // We matched a rounded right shift pattern and can lower it
6265 // to a single vector rounded shift instruction.
6266
6267 SDLoc DL(N);
6268 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
6269 : LoongArchISD::VSRAR,
6270 DL, VT, X, Shift);
6271}
6272
6275 const LoongArchSubtarget &Subtarget) {
6276 if (DCI.isBeforeLegalizeOps())
6277 return SDValue();
6278
6279 SDValue FirstOperand = N->getOperand(0);
6280 SDValue SecondOperand = N->getOperand(1);
6281 unsigned FirstOperandOpc = FirstOperand.getOpcode();
6282 EVT ValTy = N->getValueType(0);
6283 SDLoc DL(N);
6284 uint64_t lsb, msb;
6285 unsigned SMIdx, SMLen;
6286 ConstantSDNode *CN;
6287 SDValue NewOperand;
6288 MVT GRLenVT = Subtarget.getGRLenVT();
6289
6290 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
6291 return R;
6292
6293 // BSTRPICK requires the 32S feature.
6294 if (!Subtarget.has32S())
6295 return SDValue();
6296
6297 // Op's second operand must be a shifted mask.
6298 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
6299 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
6300 return SDValue();
6301
6302 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
6303 // Pattern match BSTRPICK.
6304 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
6305 // => BSTRPICK $dst, $src, msb, lsb
6306 // where msb = lsb + len - 1
6307
6308 // The second operand of the shift must be an immediate.
6309 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
6310 return SDValue();
6311
6312 lsb = CN->getZExtValue();
6313
6314 // Return if the shifted mask does not start at bit 0 or the sum of its
6315 // length and lsb exceeds the word's size.
6316 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
6317 return SDValue();
6318
6319 NewOperand = FirstOperand.getOperand(0);
6320 } else {
6321 // Pattern match BSTRPICK.
6322 // $dst = and $src, (2**len- 1) , if len > 12
6323 // => BSTRPICK $dst, $src, msb, lsb
6324 // where lsb = 0 and msb = len - 1
6325
6326 // If the mask is <= 0xfff, andi can be used instead.
6327 if (CN->getZExtValue() <= 0xfff)
6328 return SDValue();
6329
6330 // Return if the MSB exceeds.
6331 if (SMIdx + SMLen > ValTy.getSizeInBits())
6332 return SDValue();
6333
6334 if (SMIdx > 0) {
6335 // Omit if the constant has more than 2 uses. This a conservative
6336 // decision. Whether it is a win depends on the HW microarchitecture.
6337 // However it should always be better for 1 and 2 uses.
6338 if (CN->use_size() > 2)
6339 return SDValue();
6340 // Return if the constant can be composed by a single LU12I.W.
6341 if ((CN->getZExtValue() & 0xfff) == 0)
6342 return SDValue();
6343 // Return if the constand can be composed by a single ADDI with
6344 // the zero register.
6345 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
6346 return SDValue();
6347 }
6348
6349 lsb = SMIdx;
6350 NewOperand = FirstOperand;
6351 }
6352
6353 msb = lsb + SMLen - 1;
6354 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
6355 DAG.getConstant(msb, DL, GRLenVT),
6356 DAG.getConstant(lsb, DL, GRLenVT));
6357 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
6358 return NR0;
6359 // Try to optimize to
6360 // bstrpick $Rd, $Rs, msb, lsb
6361 // slli $Rd, $Rd, lsb
6362 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
6363 DAG.getConstant(lsb, DL, GRLenVT));
6364}
6365
6366// Return the original source vector if N consists of the low half
6367// of each 128-bit lane.
6370
6371 EVT DstVT = N.getValueType();
6372 if (!DstVT.isVector())
6373 return SDValue();
6374
6375 // LSX canonical form:
6376 if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
6377 SDValue Src = N.getOperand(0);
6378 EVT SrcVT = Src.getValueType();
6379
6380 if (!SrcVT.isVector() || !SrcVT.is128BitVector())
6381 return SDValue();
6382 if (N.getConstantOperandVal(1) != 0)
6383 return SDValue();
6384 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6385 return SDValue();
6386 if (SrcVT.getVectorNumElements() != DstVT.getVectorNumElements() * 2)
6387 return SDValue();
6388
6389 return Src;
6390 }
6391
6392 // LASX canonical form:
6393 auto *BV = dyn_cast<BuildVectorSDNode>(N);
6394 if (!BV)
6395 return SDValue();
6396
6397 unsigned NumElts = DstVT.getVectorNumElements();
6398 if (NumElts % 2 != 0)
6399 return SDValue();
6400
6401 SDValue Src;
6402 EVT SrcVT;
6403
6404 for (unsigned I = 0; I != NumElts; ++I) {
6405 SDValue Elt = BV->getOperand(I);
6407 return SDValue();
6408
6409 SDValue ThisSrc = Elt.getOperand(0);
6410 SDValue Idx = Elt.getOperand(1);
6411 auto *CI = dyn_cast<ConstantSDNode>(Idx);
6412 if (!CI)
6413 return SDValue();
6414
6415 if (!Src) {
6416 Src = ThisSrc;
6417 SrcVT = Src.getValueType();
6418 if (!SrcVT.isVector())
6419 return SDValue();
6420
6421 if (SrcVT.getSizeInBits() != DstVT.getSizeInBits() * 2)
6422 return SDValue();
6423 if (SrcVT.getVectorNumElements() != NumElts * 2)
6424 return SDValue();
6425 if (!SrcVT.is256BitVector())
6426 return SDValue();
6427 } else if (ThisSrc != Src) {
6428 return SDValue();
6429 }
6430
6431 unsigned Half = NumElts / 2;
6432 unsigned ExpectedIdx = (I < Half) ? I : (I + Half);
6433 if (CI->getZExtValue() != ExpectedIdx)
6434 return SDValue();
6435 }
6436
6437 return Src;
6438}
6439
6442 const LoongArchSubtarget &Subtarget) {
6443 if (!Subtarget.hasExtLSX())
6444 return SDValue();
6445
6446 assert(N->getOpcode() == ISD::SHL && "Unexpected opcode");
6447
6448 EVT VT = N->getValueType(0);
6449 SDLoc DL(N);
6450
6451 SDValue LHS = N->getOperand(0);
6452 SDValue RHS = N->getOperand(1);
6453
6454 bool isSigned;
6455 unsigned ExtOpc = LHS.getOpcode();
6456 if (ExtOpc == ISD::SIGN_EXTEND)
6457 isSigned = true;
6458 else if (ExtOpc == ISD::ZERO_EXTEND)
6459 isSigned = false;
6460 else
6461 return SDValue();
6462
6463 if (!LHS.hasOneUse())
6464 return SDValue();
6465
6466 SDValue Vec = matchLowHalfOf128BitLanes(LHS.getOperand(0));
6467 if (!Vec)
6468 return SDValue();
6469
6470 EVT SrcVT = Vec.getValueType();
6471 EVT SrcEltVT = SrcVT.getVectorElementType();
6472 EVT DstEltVT = VT.getVectorElementType();
6473
6474 if (!SrcVT.isVector() || !VT.isVector())
6475 return SDValue();
6476 if (SrcVT.getSizeInBits() != VT.getSizeInBits())
6477 return SDValue();
6478 if (DstEltVT.getSizeInBits() != SrcEltVT.getSizeInBits() * 2)
6479 return SDValue();
6480 if (!SrcEltVT.isInteger() || SrcEltVT.getSizeInBits() > 32)
6481 return SDValue();
6482
6483 APInt Imm;
6484 if (!isConstantSplatVector(RHS, Imm, DstEltVT.getSizeInBits()))
6485 return SDValue();
6486 if (!Imm.ult(SrcEltVT.getSizeInBits()))
6487 return SDValue();
6488
6489 unsigned Opc = isSigned ? LoongArchISD::VSLLWIL : LoongArchISD::VSLLWIL_U;
6490 SDValue Sht = DAG.getConstant(Imm.getZExtValue(), DL, Subtarget.getGRLenVT());
6491 return DAG.getNode(Opc, DL, VT, Vec, Sht);
6492}
6493
6496 const LoongArchSubtarget &Subtarget) {
6497 // BSTRPICK requires the 32S feature.
6498 if (!Subtarget.has32S())
6499 return SDValue();
6500
6501 if (DCI.isBeforeLegalizeOps())
6502 return SDValue();
6503
6504 // $dst = srl (and $src, Mask), Shamt
6505 // =>
6506 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
6507 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
6508 //
6509
6510 SDValue FirstOperand = N->getOperand(0);
6511 ConstantSDNode *CN;
6512 EVT ValTy = N->getValueType(0);
6513 SDLoc DL(N);
6514 MVT GRLenVT = Subtarget.getGRLenVT();
6515 unsigned MaskIdx, MaskLen;
6516 uint64_t Shamt;
6517
6518 // The first operand must be an AND and the second operand of the AND must be
6519 // a shifted mask.
6520 if (FirstOperand.getOpcode() != ISD::AND ||
6521 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
6522 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
6523 return SDValue();
6524
6525 // The second operand (shift amount) must be an immediate.
6526 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
6527 return SDValue();
6528
6529 Shamt = CN->getZExtValue();
6530 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
6531 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
6532 FirstOperand->getOperand(0),
6533 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6534 DAG.getConstant(Shamt, DL, GRLenVT));
6535
6536 return SDValue();
6537}
6538
6541 const LoongArchSubtarget &Subtarget) {
6542 if (SDValue V = performHorizWideningCombine(N, DAG, Subtarget))
6543 return V;
6544
6545 return SDValue();
6546}
6547
6548// Helper to peek through bitops/trunc/setcc to determine size of source vector.
6549// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
6550static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
6551 unsigned Depth) {
6552 // Limit recursion.
6554 return false;
6555 switch (Src.getOpcode()) {
6556 case ISD::SETCC:
6557 case ISD::TRUNCATE:
6558 return Src.getOperand(0).getValueSizeInBits() == Size;
6559 case ISD::FREEZE:
6560 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
6561 case ISD::AND:
6562 case ISD::XOR:
6563 case ISD::OR:
6564 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
6565 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
6566 case ISD::SELECT:
6567 case ISD::VSELECT:
6568 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
6569 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
6570 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
6571 case ISD::BUILD_VECTOR:
6572 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
6573 ISD::isBuildVectorAllOnes(Src.getNode());
6574 }
6575 return false;
6576}
6577
6578// Helper to push sign extension of vXi1 SETCC result through bitops.
6580 SDValue Src, const SDLoc &DL) {
6581 switch (Src.getOpcode()) {
6582 case ISD::SETCC:
6583 case ISD::FREEZE:
6584 case ISD::TRUNCATE:
6585 case ISD::BUILD_VECTOR:
6586 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6587 case ISD::AND:
6588 case ISD::XOR:
6589 case ISD::OR:
6590 return DAG.getNode(
6591 Src.getOpcode(), DL, SExtVT,
6592 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
6593 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
6594 case ISD::SELECT:
6595 case ISD::VSELECT:
6596 return DAG.getSelect(
6597 DL, SExtVT, Src.getOperand(0),
6598 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
6599 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
6600 }
6601 llvm_unreachable("Unexpected node type for vXi1 sign extension");
6602}
6603
6604static SDValue
6607 const LoongArchSubtarget &Subtarget) {
6608 SDLoc DL(N);
6609 EVT VT = N->getValueType(0);
6610 SDValue Src = N->getOperand(0);
6611 EVT SrcVT = Src.getValueType();
6612
6613 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
6614 return SDValue();
6615
6616 bool UseLASX;
6617 unsigned Opc = ISD::DELETED_NODE;
6618 EVT CmpVT = Src.getOperand(0).getValueType();
6619 EVT EltVT = CmpVT.getVectorElementType();
6620
6621 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
6622 UseLASX = false;
6623 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
6624 CmpVT.getSizeInBits() == 256)
6625 UseLASX = true;
6626 else
6627 return SDValue();
6628
6629 SDValue SrcN1 = Src.getOperand(1);
6630 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
6631 default:
6632 break;
6633 case ISD::SETEQ:
6634 // x == 0 => not (vmsknez.b x)
6635 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6636 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
6637 break;
6638 case ISD::SETGT:
6639 // x > -1 => vmskgez.b x
6640 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
6641 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6642 break;
6643 case ISD::SETGE:
6644 // x >= 0 => vmskgez.b x
6645 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6646 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6647 break;
6648 case ISD::SETLT:
6649 // x < 0 => vmskltz.{b,h,w,d} x
6650 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
6651 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6652 EltVT == MVT::i64))
6653 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6654 break;
6655 case ISD::SETLE:
6656 // x <= -1 => vmskltz.{b,h,w,d} x
6657 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
6658 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6659 EltVT == MVT::i64))
6660 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6661 break;
6662 case ISD::SETNE:
6663 // x != 0 => vmsknez.b x
6664 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6665 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6666 break;
6667 }
6668
6669 if (Opc == ISD::DELETED_NODE)
6670 return SDValue();
6671
6672 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
6674 V = DAG.getZExtOrTrunc(V, DL, T);
6675 return DAG.getBitcast(VT, V);
6676}
6677
6680 const LoongArchSubtarget &Subtarget) {
6681 SDLoc DL(N);
6682 EVT VT = N->getValueType(0);
6683 SDValue Src = N->getOperand(0);
6684 EVT SrcVT = Src.getValueType();
6685 MVT GRLenVT = Subtarget.getGRLenVT();
6686
6687 if (!DCI.isBeforeLegalizeOps())
6688 return SDValue();
6689
6690 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6691 return SDValue();
6692
6693 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6694 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6695 if (Res)
6696 return Res;
6697
6698 // Generate vXi1 using [X]VMSKLTZ
6699 MVT SExtVT;
6700 unsigned Opc;
6701 bool UseLASX = false;
6702 bool PropagateSExt = false;
6703
6704 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6705 EVT CmpVT = Src.getOperand(0).getValueType();
6706 if (CmpVT.getSizeInBits() > 256)
6707 return SDValue();
6708 }
6709
6710 switch (SrcVT.getSimpleVT().SimpleTy) {
6711 default:
6712 return SDValue();
6713 case MVT::v2i1:
6714 SExtVT = MVT::v2i64;
6715 break;
6716 case MVT::v4i1:
6717 SExtVT = MVT::v4i32;
6718 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6719 SExtVT = MVT::v4i64;
6720 UseLASX = true;
6721 PropagateSExt = true;
6722 }
6723 break;
6724 case MVT::v8i1:
6725 SExtVT = MVT::v8i16;
6726 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6727 SExtVT = MVT::v8i32;
6728 UseLASX = true;
6729 PropagateSExt = true;
6730 }
6731 break;
6732 case MVT::v16i1:
6733 SExtVT = MVT::v16i8;
6734 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6735 SExtVT = MVT::v16i16;
6736 UseLASX = true;
6737 PropagateSExt = true;
6738 }
6739 break;
6740 case MVT::v32i1:
6741 SExtVT = MVT::v32i8;
6742 UseLASX = true;
6743 break;
6744 };
6745 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6746 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6747
6748 SDValue V;
6749 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6750 if (Src.getSimpleValueType() == MVT::v32i8) {
6751 SDValue Lo, Hi;
6752 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
6753 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
6754 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
6755 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
6756 DAG.getShiftAmountConstant(16, GRLenVT, DL));
6757 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
6758 } else if (UseLASX) {
6759 return SDValue();
6760 }
6761 }
6762
6763 if (!V) {
6764 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6765 V = DAG.getNode(Opc, DL, GRLenVT, Src);
6766 }
6767
6769 V = DAG.getZExtOrTrunc(V, DL, T);
6770 return DAG.getBitcast(VT, V);
6771}
6772
6775 const LoongArchSubtarget &Subtarget) {
6776 MVT GRLenVT = Subtarget.getGRLenVT();
6777 EVT ValTy = N->getValueType(0);
6778 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6779 ConstantSDNode *CN0, *CN1;
6780 SDLoc DL(N);
6781 unsigned ValBits = ValTy.getSizeInBits();
6782 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6783 unsigned Shamt;
6784 bool SwapAndRetried = false;
6785
6786 // BSTRPICK requires the 32S feature.
6787 if (!Subtarget.has32S())
6788 return SDValue();
6789
6790 if (DCI.isBeforeLegalizeOps())
6791 return SDValue();
6792
6793 if (ValBits != 32 && ValBits != 64)
6794 return SDValue();
6795
6796Retry:
6797 // 1st pattern to match BSTRINS:
6798 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6799 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6800 // =>
6801 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6802 if (N0.getOpcode() == ISD::AND &&
6803 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6804 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6805 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
6806 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6807 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6808 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6809 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6810 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6811 (MaskIdx0 + MaskLen0 <= ValBits)) {
6812 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6813 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6814 N1.getOperand(0).getOperand(0),
6815 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6816 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6817 }
6818
6819 // 2nd pattern to match BSTRINS:
6820 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6821 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6822 // =>
6823 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6824 if (N0.getOpcode() == ISD::AND &&
6825 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6826 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6827 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6828 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6829 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6830 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6831 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6832 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6833 (MaskIdx0 + MaskLen0 <= ValBits)) {
6834 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6835 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6836 N1.getOperand(0).getOperand(0),
6837 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6838 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6839 }
6840
6841 // 3rd pattern to match BSTRINS:
6842 // R = or (and X, mask0), (and Y, mask1)
6843 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6844 // =>
6845 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6846 // where msb = lsb + size - 1
6847 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6848 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6849 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6850 (MaskIdx0 + MaskLen0 <= 64) &&
6851 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
6852 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6853 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6854 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6855 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
6856 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
6857 DAG.getConstant(ValBits == 32
6858 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6859 : (MaskIdx0 + MaskLen0 - 1),
6860 DL, GRLenVT),
6861 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6862 }
6863
6864 // 4th pattern to match BSTRINS:
6865 // R = or (and X, mask), (shl Y, shamt)
6866 // where mask = (2**shamt - 1)
6867 // =>
6868 // R = BSTRINS X, Y, ValBits - 1, shamt
6869 // where ValBits = 32 or 64
6870 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6871 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6872 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
6873 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6874 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6875 (MaskIdx0 + MaskLen0 <= ValBits)) {
6876 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6877 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6878 N1.getOperand(0),
6879 DAG.getConstant((ValBits - 1), DL, GRLenVT),
6880 DAG.getConstant(Shamt, DL, GRLenVT));
6881 }
6882
6883 // 5th pattern to match BSTRINS:
6884 // R = or (and X, mask), const
6885 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6886 // =>
6887 // R = BSTRINS X, (const >> lsb), msb, lsb
6888 // where msb = lsb + size - 1
6889 if (N0.getOpcode() == ISD::AND &&
6890 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6891 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6892 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
6893 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6894 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6895 return DAG.getNode(
6896 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6897 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
6898 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6899 : (MaskIdx0 + MaskLen0 - 1),
6900 DL, GRLenVT),
6901 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6902 }
6903
6904 // 6th pattern.
6905 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6906 // by the incoming bits are known to be zero.
6907 // =>
6908 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6909 //
6910 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6911 // pattern is more common than the 1st. So we put the 1st before the 6th in
6912 // order to match as many nodes as possible.
6913 ConstantSDNode *CNMask, *CNShamt;
6914 unsigned MaskIdx, MaskLen;
6915 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6916 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6917 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6918 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6919 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6920 Shamt = CNShamt->getZExtValue();
6921 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6922 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6923 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6924 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6925 N1.getOperand(0).getOperand(0),
6926 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
6927 DAG.getConstant(Shamt, DL, GRLenVT));
6928 }
6929 }
6930
6931 // 7th pattern.
6932 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6933 // overwritten by the incoming bits are known to be zero.
6934 // =>
6935 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6936 //
6937 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6938 // before the 7th in order to match as many nodes as possible.
6939 if (N1.getOpcode() == ISD::AND &&
6940 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6941 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6942 N1.getOperand(0).getOpcode() == ISD::SHL &&
6943 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6944 CNShamt->getZExtValue() == MaskIdx) {
6945 APInt ShMask(ValBits, CNMask->getZExtValue());
6946 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6947 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6948 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6949 N1.getOperand(0).getOperand(0),
6950 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6951 DAG.getConstant(MaskIdx, DL, GRLenVT));
6952 }
6953 }
6954
6955 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6956 if (!SwapAndRetried) {
6957 std::swap(N0, N1);
6958 SwapAndRetried = true;
6959 goto Retry;
6960 }
6961
6962 SwapAndRetried = false;
6963Retry2:
6964 // 8th pattern.
6965 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6966 // the incoming bits are known to be zero.
6967 // =>
6968 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6969 //
6970 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6971 // we put it here in order to match as many nodes as possible or generate less
6972 // instructions.
6973 if (N1.getOpcode() == ISD::AND &&
6974 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6975 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6976 APInt ShMask(ValBits, CNMask->getZExtValue());
6977 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6978 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6979 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6980 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6981 N1->getOperand(0),
6982 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6983 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6984 DAG.getConstant(MaskIdx, DL, GRLenVT));
6985 }
6986 }
6987 // Swap N0/N1 and retry.
6988 if (!SwapAndRetried) {
6989 std::swap(N0, N1);
6990 SwapAndRetried = true;
6991 goto Retry2;
6992 }
6993
6994 return SDValue();
6995}
6996
6997static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6998 ExtType = ISD::NON_EXTLOAD;
6999
7000 switch (V.getNode()->getOpcode()) {
7001 case ISD::LOAD: {
7002 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
7003 if ((LoadNode->getMemoryVT() == MVT::i8) ||
7004 (LoadNode->getMemoryVT() == MVT::i16)) {
7005 ExtType = LoadNode->getExtensionType();
7006 return true;
7007 }
7008 return false;
7009 }
7010 case ISD::AssertSext: {
7011 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
7012 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
7013 ExtType = ISD::SEXTLOAD;
7014 return true;
7015 }
7016 return false;
7017 }
7018 case ISD::AssertZext: {
7019 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
7020 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
7021 ExtType = ISD::ZEXTLOAD;
7022 return true;
7023 }
7024 return false;
7025 }
7026 default:
7027 return false;
7028 }
7029
7030 return false;
7031}
7032
7033// Eliminate redundant truncation and zero-extension nodes.
7034// * Case 1:
7035// +------------+ +------------+ +------------+
7036// | Input1 | | Input2 | | CC |
7037// +------------+ +------------+ +------------+
7038// | | |
7039// V V +----+
7040// +------------+ +------------+ |
7041// | TRUNCATE | | TRUNCATE | |
7042// +------------+ +------------+ |
7043// | | |
7044// V V |
7045// +------------+ +------------+ |
7046// | ZERO_EXT | | ZERO_EXT | |
7047// +------------+ +------------+ |
7048// | | |
7049// | +-------------+ |
7050// V V | |
7051// +----------------+ | |
7052// | AND | | |
7053// +----------------+ | |
7054// | | |
7055// +---------------+ | |
7056// | | |
7057// V V V
7058// +-------------+
7059// | CMP |
7060// +-------------+
7061// * Case 2:
7062// +------------+ +------------+ +-------------+ +------------+ +------------+
7063// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
7064// +------------+ +------------+ +-------------+ +------------+ +------------+
7065// | | | | |
7066// V | | | |
7067// +------------+ | | | |
7068// | XOR |<---------------------+ | |
7069// +------------+ | | |
7070// | | | |
7071// V V +---------------+ |
7072// +------------+ +------------+ | |
7073// | TRUNCATE | | TRUNCATE | | +-------------------------+
7074// +------------+ +------------+ | |
7075// | | | |
7076// V V | |
7077// +------------+ +------------+ | |
7078// | ZERO_EXT | | ZERO_EXT | | |
7079// +------------+ +------------+ | |
7080// | | | |
7081// V V | |
7082// +----------------+ | |
7083// | AND | | |
7084// +----------------+ | |
7085// | | |
7086// +---------------+ | |
7087// | | |
7088// V V V
7089// +-------------+
7090// | CMP |
7091// +-------------+
7094 const LoongArchSubtarget &Subtarget) {
7095 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
7096
7097 SDNode *AndNode = N->getOperand(0).getNode();
7098 if (AndNode->getOpcode() != ISD::AND)
7099 return SDValue();
7100
7101 SDValue AndInputValue2 = AndNode->getOperand(1);
7102 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
7103 return SDValue();
7104
7105 SDValue CmpInputValue = N->getOperand(1);
7106 SDValue AndInputValue1 = AndNode->getOperand(0);
7107 if (AndInputValue1.getOpcode() == ISD::XOR) {
7108 if (CC != ISD::SETEQ && CC != ISD::SETNE)
7109 return SDValue();
7110 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
7111 if (!CN || !CN->isAllOnes())
7112 return SDValue();
7113 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
7114 if (!CN || !CN->isZero())
7115 return SDValue();
7116 AndInputValue1 = AndInputValue1.getOperand(0);
7117 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
7118 return SDValue();
7119 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
7120 if (AndInputValue2 != CmpInputValue)
7121 return SDValue();
7122 } else {
7123 return SDValue();
7124 }
7125
7126 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
7127 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
7128 return SDValue();
7129
7130 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
7131 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
7132 return SDValue();
7133
7134 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
7135 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
7136 ISD::LoadExtType ExtType1;
7137 ISD::LoadExtType ExtType2;
7138
7139 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
7140 !checkValueWidth(TruncInputValue2, ExtType2))
7141 return SDValue();
7142
7143 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
7144 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
7145 return SDValue();
7146
7147 if ((ExtType2 != ISD::ZEXTLOAD) &&
7148 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
7149 return SDValue();
7150
7151 // These truncation and zero-extension nodes are not necessary, remove them.
7152 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
7153 TruncInputValue1, TruncInputValue2);
7154 SDValue NewSetCC =
7155 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
7156 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
7157 return SDValue(N, 0);
7158}
7159
7160// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
7163 const LoongArchSubtarget &Subtarget) {
7164 if (DCI.isBeforeLegalizeOps())
7165 return SDValue();
7166
7167 SDValue Src = N->getOperand(0);
7168 if (Src.getOpcode() != LoongArchISD::REVB_2W)
7169 return SDValue();
7170
7171 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
7172 Src.getOperand(0));
7173}
7174
7175// Perform common combines for BR_CC and SELECT_CC conditions.
7176static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
7177 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
7178 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
7179
7180 // As far as arithmetic right shift always saves the sign,
7181 // shift can be omitted.
7182 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
7183 // setge (sra X, N), 0 -> setge X, 0
7184 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
7185 LHS.getOpcode() == ISD::SRA) {
7186 LHS = LHS.getOperand(0);
7187 return true;
7188 }
7189
7190 if (!ISD::isIntEqualitySetCC(CCVal))
7191 return false;
7192
7193 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
7194 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
7195 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
7196 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
7197 // If we're looking for eq 0 instead of ne 0, we need to invert the
7198 // condition.
7199 bool Invert = CCVal == ISD::SETEQ;
7200 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
7201 if (Invert)
7202 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7203
7204 RHS = LHS.getOperand(1);
7205 LHS = LHS.getOperand(0);
7206 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
7207
7208 CC = DAG.getCondCode(CCVal);
7209 return true;
7210 }
7211
7212 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
7213 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
7214 LHS.getOperand(1).getOpcode() == ISD::Constant) {
7215 SDValue LHS0 = LHS.getOperand(0);
7216 if (LHS0.getOpcode() == ISD::AND &&
7217 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
7218 uint64_t Mask = LHS0.getConstantOperandVal(1);
7219 uint64_t ShAmt = LHS.getConstantOperandVal(1);
7220 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
7221 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
7222 CC = DAG.getCondCode(CCVal);
7223
7224 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
7225 LHS = LHS0.getOperand(0);
7226 if (ShAmt != 0)
7227 LHS =
7228 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
7229 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
7230 return true;
7231 }
7232 }
7233 }
7234
7235 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
7236 // This can occur when legalizing some floating point comparisons.
7237 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
7238 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
7239 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
7240 CC = DAG.getCondCode(CCVal);
7241 RHS = DAG.getConstant(0, DL, LHS.getValueType());
7242 return true;
7243 }
7244
7245 return false;
7246}
7247
7250 const LoongArchSubtarget &Subtarget) {
7251 SDValue LHS = N->getOperand(1);
7252 SDValue RHS = N->getOperand(2);
7253 SDValue CC = N->getOperand(3);
7254 SDLoc DL(N);
7255
7256 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7257 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
7258 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
7259
7260 return SDValue();
7261}
7262
7265 const LoongArchSubtarget &Subtarget) {
7266 // Transform
7267 SDValue LHS = N->getOperand(0);
7268 SDValue RHS = N->getOperand(1);
7269 SDValue CC = N->getOperand(2);
7270 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
7271 SDValue TrueV = N->getOperand(3);
7272 SDValue FalseV = N->getOperand(4);
7273 SDLoc DL(N);
7274 EVT VT = N->getValueType(0);
7275
7276 // If the True and False values are the same, we don't need a select_cc.
7277 if (TrueV == FalseV)
7278 return TrueV;
7279
7280 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
7281 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
7282 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
7284 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
7285 if (CCVal == ISD::CondCode::SETGE)
7286 std::swap(TrueV, FalseV);
7287
7288 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
7289 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
7290 // Only handle simm12, if it is not in this range, it can be considered as
7291 // register.
7292 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
7293 isInt<12>(TrueSImm - FalseSImm)) {
7294 SDValue SRA =
7295 DAG.getNode(ISD::SRA, DL, VT, LHS,
7296 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
7297 SDValue AND =
7298 DAG.getNode(ISD::AND, DL, VT, SRA,
7299 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
7300 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
7301 }
7302
7303 if (CCVal == ISD::CondCode::SETGE)
7304 std::swap(TrueV, FalseV);
7305 }
7306
7307 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
7308 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
7309 {LHS, RHS, CC, TrueV, FalseV});
7310
7311 return SDValue();
7312}
7313
7314template <unsigned N>
7316 SelectionDAG &DAG,
7317 const LoongArchSubtarget &Subtarget,
7318 bool IsSigned = false) {
7319 SDLoc DL(Node);
7320 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
7321 // Check the ImmArg.
7322 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7323 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7324 DAG.getContext()->emitError(Node->getOperationName(0) +
7325 ": argument out of range.");
7326 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
7327 }
7328 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
7329}
7330
7331template <unsigned N>
7332static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
7333 SelectionDAG &DAG, bool IsSigned = false) {
7334 SDLoc DL(Node);
7335 EVT ResTy = Node->getValueType(0);
7336 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
7337
7338 // Check the ImmArg.
7339 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
7340 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
7341 DAG.getContext()->emitError(Node->getOperationName(0) +
7342 ": argument out of range.");
7343 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7344 }
7345 return DAG.getConstant(
7347 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
7348 DL, ResTy);
7349}
7350
7352 SDLoc DL(Node);
7353 EVT ResTy = Node->getValueType(0);
7354 SDValue Vec = Node->getOperand(2);
7355 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
7356 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
7357}
7358
7360 SDLoc DL(Node);
7361 EVT ResTy = Node->getValueType(0);
7362 SDValue One = DAG.getConstant(1, DL, ResTy);
7363 SDValue Bit =
7364 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
7365
7366 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
7367 DAG.getNOT(DL, Bit, ResTy));
7368}
7369
7370template <unsigned N>
7372 SDLoc DL(Node);
7373 EVT ResTy = Node->getValueType(0);
7374 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7375 // Check the unsigned ImmArg.
7376 if (!isUInt<N>(CImm->getZExtValue())) {
7377 DAG.getContext()->emitError(Node->getOperationName(0) +
7378 ": argument out of range.");
7379 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7380 }
7381
7382 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7383 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
7384
7385 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
7386}
7387
7388template <unsigned N>
7390 SDLoc DL(Node);
7391 EVT ResTy = Node->getValueType(0);
7392 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7393 // Check the unsigned ImmArg.
7394 if (!isUInt<N>(CImm->getZExtValue())) {
7395 DAG.getContext()->emitError(Node->getOperationName(0) +
7396 ": argument out of range.");
7397 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7398 }
7399
7400 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7401 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7402 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
7403}
7404
7405template <unsigned N>
7407 SDLoc DL(Node);
7408 EVT ResTy = Node->getValueType(0);
7409 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
7410 // Check the unsigned ImmArg.
7411 if (!isUInt<N>(CImm->getZExtValue())) {
7412 DAG.getContext()->emitError(Node->getOperationName(0) +
7413 ": argument out of range.");
7414 return DAG.getNode(ISD::UNDEF, DL, ResTy);
7415 }
7416
7417 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
7418 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
7419 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
7420}
7421
7422template <unsigned W>
7424 unsigned ResOp) {
7425 unsigned Imm = N->getConstantOperandVal(2);
7426 if (!isUInt<W>(Imm)) {
7427 const StringRef ErrorMsg = "argument out of range";
7428 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
7429 return DAG.getUNDEF(N->getValueType(0));
7430 }
7431 SDLoc DL(N);
7432 SDValue Vec = N->getOperand(1);
7433 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
7435 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
7436}
7437
7438static SDValue
7441 const LoongArchSubtarget &Subtarget) {
7442 SDLoc DL(N);
7443 switch (N->getConstantOperandVal(0)) {
7444 default:
7445 break;
7446 case Intrinsic::loongarch_lsx_vadd_b:
7447 case Intrinsic::loongarch_lsx_vadd_h:
7448 case Intrinsic::loongarch_lsx_vadd_w:
7449 case Intrinsic::loongarch_lsx_vadd_d:
7450 case Intrinsic::loongarch_lasx_xvadd_b:
7451 case Intrinsic::loongarch_lasx_xvadd_h:
7452 case Intrinsic::loongarch_lasx_xvadd_w:
7453 case Intrinsic::loongarch_lasx_xvadd_d:
7454 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7455 N->getOperand(2));
7456 case Intrinsic::loongarch_lsx_vaddi_bu:
7457 case Intrinsic::loongarch_lsx_vaddi_hu:
7458 case Intrinsic::loongarch_lsx_vaddi_wu:
7459 case Intrinsic::loongarch_lsx_vaddi_du:
7460 case Intrinsic::loongarch_lasx_xvaddi_bu:
7461 case Intrinsic::loongarch_lasx_xvaddi_hu:
7462 case Intrinsic::loongarch_lasx_xvaddi_wu:
7463 case Intrinsic::loongarch_lasx_xvaddi_du:
7464 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
7465 lowerVectorSplatImm<5>(N, 2, DAG));
7466 case Intrinsic::loongarch_lsx_vsub_b:
7467 case Intrinsic::loongarch_lsx_vsub_h:
7468 case Intrinsic::loongarch_lsx_vsub_w:
7469 case Intrinsic::loongarch_lsx_vsub_d:
7470 case Intrinsic::loongarch_lasx_xvsub_b:
7471 case Intrinsic::loongarch_lasx_xvsub_h:
7472 case Intrinsic::loongarch_lasx_xvsub_w:
7473 case Intrinsic::loongarch_lasx_xvsub_d:
7474 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7475 N->getOperand(2));
7476 case Intrinsic::loongarch_lsx_vsubi_bu:
7477 case Intrinsic::loongarch_lsx_vsubi_hu:
7478 case Intrinsic::loongarch_lsx_vsubi_wu:
7479 case Intrinsic::loongarch_lsx_vsubi_du:
7480 case Intrinsic::loongarch_lasx_xvsubi_bu:
7481 case Intrinsic::loongarch_lasx_xvsubi_hu:
7482 case Intrinsic::loongarch_lasx_xvsubi_wu:
7483 case Intrinsic::loongarch_lasx_xvsubi_du:
7484 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
7485 lowerVectorSplatImm<5>(N, 2, DAG));
7486 case Intrinsic::loongarch_lsx_vneg_b:
7487 case Intrinsic::loongarch_lsx_vneg_h:
7488 case Intrinsic::loongarch_lsx_vneg_w:
7489 case Intrinsic::loongarch_lsx_vneg_d:
7490 case Intrinsic::loongarch_lasx_xvneg_b:
7491 case Intrinsic::loongarch_lasx_xvneg_h:
7492 case Intrinsic::loongarch_lasx_xvneg_w:
7493 case Intrinsic::loongarch_lasx_xvneg_d:
7494 return DAG.getNode(
7495 ISD::SUB, DL, N->getValueType(0),
7496 DAG.getConstant(
7497 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
7498 /*isSigned=*/true),
7499 SDLoc(N), N->getValueType(0)),
7500 N->getOperand(1));
7501 case Intrinsic::loongarch_lsx_vmax_b:
7502 case Intrinsic::loongarch_lsx_vmax_h:
7503 case Intrinsic::loongarch_lsx_vmax_w:
7504 case Intrinsic::loongarch_lsx_vmax_d:
7505 case Intrinsic::loongarch_lasx_xvmax_b:
7506 case Intrinsic::loongarch_lasx_xvmax_h:
7507 case Intrinsic::loongarch_lasx_xvmax_w:
7508 case Intrinsic::loongarch_lasx_xvmax_d:
7509 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7510 N->getOperand(2));
7511 case Intrinsic::loongarch_lsx_vmax_bu:
7512 case Intrinsic::loongarch_lsx_vmax_hu:
7513 case Intrinsic::loongarch_lsx_vmax_wu:
7514 case Intrinsic::loongarch_lsx_vmax_du:
7515 case Intrinsic::loongarch_lasx_xvmax_bu:
7516 case Intrinsic::loongarch_lasx_xvmax_hu:
7517 case Intrinsic::loongarch_lasx_xvmax_wu:
7518 case Intrinsic::loongarch_lasx_xvmax_du:
7519 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7520 N->getOperand(2));
7521 case Intrinsic::loongarch_lsx_vmaxi_b:
7522 case Intrinsic::loongarch_lsx_vmaxi_h:
7523 case Intrinsic::loongarch_lsx_vmaxi_w:
7524 case Intrinsic::loongarch_lsx_vmaxi_d:
7525 case Intrinsic::loongarch_lasx_xvmaxi_b:
7526 case Intrinsic::loongarch_lasx_xvmaxi_h:
7527 case Intrinsic::loongarch_lasx_xvmaxi_w:
7528 case Intrinsic::loongarch_lasx_xvmaxi_d:
7529 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7530 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7531 case Intrinsic::loongarch_lsx_vmaxi_bu:
7532 case Intrinsic::loongarch_lsx_vmaxi_hu:
7533 case Intrinsic::loongarch_lsx_vmaxi_wu:
7534 case Intrinsic::loongarch_lsx_vmaxi_du:
7535 case Intrinsic::loongarch_lasx_xvmaxi_bu:
7536 case Intrinsic::loongarch_lasx_xvmaxi_hu:
7537 case Intrinsic::loongarch_lasx_xvmaxi_wu:
7538 case Intrinsic::loongarch_lasx_xvmaxi_du:
7539 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7540 lowerVectorSplatImm<5>(N, 2, DAG));
7541 case Intrinsic::loongarch_lsx_vmin_b:
7542 case Intrinsic::loongarch_lsx_vmin_h:
7543 case Intrinsic::loongarch_lsx_vmin_w:
7544 case Intrinsic::loongarch_lsx_vmin_d:
7545 case Intrinsic::loongarch_lasx_xvmin_b:
7546 case Intrinsic::loongarch_lasx_xvmin_h:
7547 case Intrinsic::loongarch_lasx_xvmin_w:
7548 case Intrinsic::loongarch_lasx_xvmin_d:
7549 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7550 N->getOperand(2));
7551 case Intrinsic::loongarch_lsx_vmin_bu:
7552 case Intrinsic::loongarch_lsx_vmin_hu:
7553 case Intrinsic::loongarch_lsx_vmin_wu:
7554 case Intrinsic::loongarch_lsx_vmin_du:
7555 case Intrinsic::loongarch_lasx_xvmin_bu:
7556 case Intrinsic::loongarch_lasx_xvmin_hu:
7557 case Intrinsic::loongarch_lasx_xvmin_wu:
7558 case Intrinsic::loongarch_lasx_xvmin_du:
7559 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7560 N->getOperand(2));
7561 case Intrinsic::loongarch_lsx_vmini_b:
7562 case Intrinsic::loongarch_lsx_vmini_h:
7563 case Intrinsic::loongarch_lsx_vmini_w:
7564 case Intrinsic::loongarch_lsx_vmini_d:
7565 case Intrinsic::loongarch_lasx_xvmini_b:
7566 case Intrinsic::loongarch_lasx_xvmini_h:
7567 case Intrinsic::loongarch_lasx_xvmini_w:
7568 case Intrinsic::loongarch_lasx_xvmini_d:
7569 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7570 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7571 case Intrinsic::loongarch_lsx_vmini_bu:
7572 case Intrinsic::loongarch_lsx_vmini_hu:
7573 case Intrinsic::loongarch_lsx_vmini_wu:
7574 case Intrinsic::loongarch_lsx_vmini_du:
7575 case Intrinsic::loongarch_lasx_xvmini_bu:
7576 case Intrinsic::loongarch_lasx_xvmini_hu:
7577 case Intrinsic::loongarch_lasx_xvmini_wu:
7578 case Intrinsic::loongarch_lasx_xvmini_du:
7579 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7580 lowerVectorSplatImm<5>(N, 2, DAG));
7581 case Intrinsic::loongarch_lsx_vmul_b:
7582 case Intrinsic::loongarch_lsx_vmul_h:
7583 case Intrinsic::loongarch_lsx_vmul_w:
7584 case Intrinsic::loongarch_lsx_vmul_d:
7585 case Intrinsic::loongarch_lasx_xvmul_b:
7586 case Intrinsic::loongarch_lasx_xvmul_h:
7587 case Intrinsic::loongarch_lasx_xvmul_w:
7588 case Intrinsic::loongarch_lasx_xvmul_d:
7589 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
7590 N->getOperand(2));
7591 case Intrinsic::loongarch_lsx_vmadd_b:
7592 case Intrinsic::loongarch_lsx_vmadd_h:
7593 case Intrinsic::loongarch_lsx_vmadd_w:
7594 case Intrinsic::loongarch_lsx_vmadd_d:
7595 case Intrinsic::loongarch_lasx_xvmadd_b:
7596 case Intrinsic::loongarch_lasx_xvmadd_h:
7597 case Intrinsic::loongarch_lasx_xvmadd_w:
7598 case Intrinsic::loongarch_lasx_xvmadd_d: {
7599 EVT ResTy = N->getValueType(0);
7600 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
7601 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7602 N->getOperand(3)));
7603 }
7604 case Intrinsic::loongarch_lsx_vmsub_b:
7605 case Intrinsic::loongarch_lsx_vmsub_h:
7606 case Intrinsic::loongarch_lsx_vmsub_w:
7607 case Intrinsic::loongarch_lsx_vmsub_d:
7608 case Intrinsic::loongarch_lasx_xvmsub_b:
7609 case Intrinsic::loongarch_lasx_xvmsub_h:
7610 case Intrinsic::loongarch_lasx_xvmsub_w:
7611 case Intrinsic::loongarch_lasx_xvmsub_d: {
7612 EVT ResTy = N->getValueType(0);
7613 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
7614 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7615 N->getOperand(3)));
7616 }
7617 case Intrinsic::loongarch_lsx_vdiv_b:
7618 case Intrinsic::loongarch_lsx_vdiv_h:
7619 case Intrinsic::loongarch_lsx_vdiv_w:
7620 case Intrinsic::loongarch_lsx_vdiv_d:
7621 case Intrinsic::loongarch_lasx_xvdiv_b:
7622 case Intrinsic::loongarch_lasx_xvdiv_h:
7623 case Intrinsic::loongarch_lasx_xvdiv_w:
7624 case Intrinsic::loongarch_lasx_xvdiv_d:
7625 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
7626 N->getOperand(2));
7627 case Intrinsic::loongarch_lsx_vdiv_bu:
7628 case Intrinsic::loongarch_lsx_vdiv_hu:
7629 case Intrinsic::loongarch_lsx_vdiv_wu:
7630 case Intrinsic::loongarch_lsx_vdiv_du:
7631 case Intrinsic::loongarch_lasx_xvdiv_bu:
7632 case Intrinsic::loongarch_lasx_xvdiv_hu:
7633 case Intrinsic::loongarch_lasx_xvdiv_wu:
7634 case Intrinsic::loongarch_lasx_xvdiv_du:
7635 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
7636 N->getOperand(2));
7637 case Intrinsic::loongarch_lsx_vmod_b:
7638 case Intrinsic::loongarch_lsx_vmod_h:
7639 case Intrinsic::loongarch_lsx_vmod_w:
7640 case Intrinsic::loongarch_lsx_vmod_d:
7641 case Intrinsic::loongarch_lasx_xvmod_b:
7642 case Intrinsic::loongarch_lasx_xvmod_h:
7643 case Intrinsic::loongarch_lasx_xvmod_w:
7644 case Intrinsic::loongarch_lasx_xvmod_d:
7645 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
7646 N->getOperand(2));
7647 case Intrinsic::loongarch_lsx_vmod_bu:
7648 case Intrinsic::loongarch_lsx_vmod_hu:
7649 case Intrinsic::loongarch_lsx_vmod_wu:
7650 case Intrinsic::loongarch_lsx_vmod_du:
7651 case Intrinsic::loongarch_lasx_xvmod_bu:
7652 case Intrinsic::loongarch_lasx_xvmod_hu:
7653 case Intrinsic::loongarch_lasx_xvmod_wu:
7654 case Intrinsic::loongarch_lasx_xvmod_du:
7655 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
7656 N->getOperand(2));
7657 case Intrinsic::loongarch_lsx_vand_v:
7658 case Intrinsic::loongarch_lasx_xvand_v:
7659 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7660 N->getOperand(2));
7661 case Intrinsic::loongarch_lsx_vor_v:
7662 case Intrinsic::loongarch_lasx_xvor_v:
7663 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7664 N->getOperand(2));
7665 case Intrinsic::loongarch_lsx_vxor_v:
7666 case Intrinsic::loongarch_lasx_xvxor_v:
7667 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7668 N->getOperand(2));
7669 case Intrinsic::loongarch_lsx_vnor_v:
7670 case Intrinsic::loongarch_lasx_xvnor_v: {
7671 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7672 N->getOperand(2));
7673 return DAG.getNOT(DL, Res, Res->getValueType(0));
7674 }
7675 case Intrinsic::loongarch_lsx_vandi_b:
7676 case Intrinsic::loongarch_lasx_xvandi_b:
7677 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7678 lowerVectorSplatImm<8>(N, 2, DAG));
7679 case Intrinsic::loongarch_lsx_vori_b:
7680 case Intrinsic::loongarch_lasx_xvori_b:
7681 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7682 lowerVectorSplatImm<8>(N, 2, DAG));
7683 case Intrinsic::loongarch_lsx_vxori_b:
7684 case Intrinsic::loongarch_lasx_xvxori_b:
7685 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7686 lowerVectorSplatImm<8>(N, 2, DAG));
7687 case Intrinsic::loongarch_lsx_vsll_b:
7688 case Intrinsic::loongarch_lsx_vsll_h:
7689 case Intrinsic::loongarch_lsx_vsll_w:
7690 case Intrinsic::loongarch_lsx_vsll_d:
7691 case Intrinsic::loongarch_lasx_xvsll_b:
7692 case Intrinsic::loongarch_lasx_xvsll_h:
7693 case Intrinsic::loongarch_lasx_xvsll_w:
7694 case Intrinsic::loongarch_lasx_xvsll_d:
7695 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7696 truncateVecElts(N, DAG));
7697 case Intrinsic::loongarch_lsx_vslli_b:
7698 case Intrinsic::loongarch_lasx_xvslli_b:
7699 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7700 lowerVectorSplatImm<3>(N, 2, DAG));
7701 case Intrinsic::loongarch_lsx_vslli_h:
7702 case Intrinsic::loongarch_lasx_xvslli_h:
7703 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7704 lowerVectorSplatImm<4>(N, 2, DAG));
7705 case Intrinsic::loongarch_lsx_vslli_w:
7706 case Intrinsic::loongarch_lasx_xvslli_w:
7707 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7708 lowerVectorSplatImm<5>(N, 2, DAG));
7709 case Intrinsic::loongarch_lsx_vslli_d:
7710 case Intrinsic::loongarch_lasx_xvslli_d:
7711 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7712 lowerVectorSplatImm<6>(N, 2, DAG));
7713 case Intrinsic::loongarch_lsx_vsrl_b:
7714 case Intrinsic::loongarch_lsx_vsrl_h:
7715 case Intrinsic::loongarch_lsx_vsrl_w:
7716 case Intrinsic::loongarch_lsx_vsrl_d:
7717 case Intrinsic::loongarch_lasx_xvsrl_b:
7718 case Intrinsic::loongarch_lasx_xvsrl_h:
7719 case Intrinsic::loongarch_lasx_xvsrl_w:
7720 case Intrinsic::loongarch_lasx_xvsrl_d:
7721 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7722 truncateVecElts(N, DAG));
7723 case Intrinsic::loongarch_lsx_vsrli_b:
7724 case Intrinsic::loongarch_lasx_xvsrli_b:
7725 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7726 lowerVectorSplatImm<3>(N, 2, DAG));
7727 case Intrinsic::loongarch_lsx_vsrli_h:
7728 case Intrinsic::loongarch_lasx_xvsrli_h:
7729 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7730 lowerVectorSplatImm<4>(N, 2, DAG));
7731 case Intrinsic::loongarch_lsx_vsrli_w:
7732 case Intrinsic::loongarch_lasx_xvsrli_w:
7733 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7734 lowerVectorSplatImm<5>(N, 2, DAG));
7735 case Intrinsic::loongarch_lsx_vsrli_d:
7736 case Intrinsic::loongarch_lasx_xvsrli_d:
7737 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7738 lowerVectorSplatImm<6>(N, 2, DAG));
7739 case Intrinsic::loongarch_lsx_vsra_b:
7740 case Intrinsic::loongarch_lsx_vsra_h:
7741 case Intrinsic::loongarch_lsx_vsra_w:
7742 case Intrinsic::loongarch_lsx_vsra_d:
7743 case Intrinsic::loongarch_lasx_xvsra_b:
7744 case Intrinsic::loongarch_lasx_xvsra_h:
7745 case Intrinsic::loongarch_lasx_xvsra_w:
7746 case Intrinsic::loongarch_lasx_xvsra_d:
7747 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7748 truncateVecElts(N, DAG));
7749 case Intrinsic::loongarch_lsx_vsrai_b:
7750 case Intrinsic::loongarch_lasx_xvsrai_b:
7751 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7752 lowerVectorSplatImm<3>(N, 2, DAG));
7753 case Intrinsic::loongarch_lsx_vsrai_h:
7754 case Intrinsic::loongarch_lasx_xvsrai_h:
7755 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7756 lowerVectorSplatImm<4>(N, 2, DAG));
7757 case Intrinsic::loongarch_lsx_vsrai_w:
7758 case Intrinsic::loongarch_lasx_xvsrai_w:
7759 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7760 lowerVectorSplatImm<5>(N, 2, DAG));
7761 case Intrinsic::loongarch_lsx_vsrai_d:
7762 case Intrinsic::loongarch_lasx_xvsrai_d:
7763 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7764 lowerVectorSplatImm<6>(N, 2, DAG));
7765 case Intrinsic::loongarch_lsx_vclz_b:
7766 case Intrinsic::loongarch_lsx_vclz_h:
7767 case Intrinsic::loongarch_lsx_vclz_w:
7768 case Intrinsic::loongarch_lsx_vclz_d:
7769 case Intrinsic::loongarch_lasx_xvclz_b:
7770 case Intrinsic::loongarch_lasx_xvclz_h:
7771 case Intrinsic::loongarch_lasx_xvclz_w:
7772 case Intrinsic::loongarch_lasx_xvclz_d:
7773 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
7774 case Intrinsic::loongarch_lsx_vpcnt_b:
7775 case Intrinsic::loongarch_lsx_vpcnt_h:
7776 case Intrinsic::loongarch_lsx_vpcnt_w:
7777 case Intrinsic::loongarch_lsx_vpcnt_d:
7778 case Intrinsic::loongarch_lasx_xvpcnt_b:
7779 case Intrinsic::loongarch_lasx_xvpcnt_h:
7780 case Intrinsic::loongarch_lasx_xvpcnt_w:
7781 case Intrinsic::loongarch_lasx_xvpcnt_d:
7782 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
7783 case Intrinsic::loongarch_lsx_vbitclr_b:
7784 case Intrinsic::loongarch_lsx_vbitclr_h:
7785 case Intrinsic::loongarch_lsx_vbitclr_w:
7786 case Intrinsic::loongarch_lsx_vbitclr_d:
7787 case Intrinsic::loongarch_lasx_xvbitclr_b:
7788 case Intrinsic::loongarch_lasx_xvbitclr_h:
7789 case Intrinsic::loongarch_lasx_xvbitclr_w:
7790 case Intrinsic::loongarch_lasx_xvbitclr_d:
7791 return lowerVectorBitClear(N, DAG);
7792 case Intrinsic::loongarch_lsx_vbitclri_b:
7793 case Intrinsic::loongarch_lasx_xvbitclri_b:
7794 return lowerVectorBitClearImm<3>(N, DAG);
7795 case Intrinsic::loongarch_lsx_vbitclri_h:
7796 case Intrinsic::loongarch_lasx_xvbitclri_h:
7797 return lowerVectorBitClearImm<4>(N, DAG);
7798 case Intrinsic::loongarch_lsx_vbitclri_w:
7799 case Intrinsic::loongarch_lasx_xvbitclri_w:
7800 return lowerVectorBitClearImm<5>(N, DAG);
7801 case Intrinsic::loongarch_lsx_vbitclri_d:
7802 case Intrinsic::loongarch_lasx_xvbitclri_d:
7803 return lowerVectorBitClearImm<6>(N, DAG);
7804 case Intrinsic::loongarch_lsx_vbitset_b:
7805 case Intrinsic::loongarch_lsx_vbitset_h:
7806 case Intrinsic::loongarch_lsx_vbitset_w:
7807 case Intrinsic::loongarch_lsx_vbitset_d:
7808 case Intrinsic::loongarch_lasx_xvbitset_b:
7809 case Intrinsic::loongarch_lasx_xvbitset_h:
7810 case Intrinsic::loongarch_lasx_xvbitset_w:
7811 case Intrinsic::loongarch_lasx_xvbitset_d: {
7812 EVT VecTy = N->getValueType(0);
7813 SDValue One = DAG.getConstant(1, DL, VecTy);
7814 return DAG.getNode(
7815 ISD::OR, DL, VecTy, N->getOperand(1),
7816 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7817 }
7818 case Intrinsic::loongarch_lsx_vbitseti_b:
7819 case Intrinsic::loongarch_lasx_xvbitseti_b:
7820 return lowerVectorBitSetImm<3>(N, DAG);
7821 case Intrinsic::loongarch_lsx_vbitseti_h:
7822 case Intrinsic::loongarch_lasx_xvbitseti_h:
7823 return lowerVectorBitSetImm<4>(N, DAG);
7824 case Intrinsic::loongarch_lsx_vbitseti_w:
7825 case Intrinsic::loongarch_lasx_xvbitseti_w:
7826 return lowerVectorBitSetImm<5>(N, DAG);
7827 case Intrinsic::loongarch_lsx_vbitseti_d:
7828 case Intrinsic::loongarch_lasx_xvbitseti_d:
7829 return lowerVectorBitSetImm<6>(N, DAG);
7830 case Intrinsic::loongarch_lsx_vbitrev_b:
7831 case Intrinsic::loongarch_lsx_vbitrev_h:
7832 case Intrinsic::loongarch_lsx_vbitrev_w:
7833 case Intrinsic::loongarch_lsx_vbitrev_d:
7834 case Intrinsic::loongarch_lasx_xvbitrev_b:
7835 case Intrinsic::loongarch_lasx_xvbitrev_h:
7836 case Intrinsic::loongarch_lasx_xvbitrev_w:
7837 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7838 EVT VecTy = N->getValueType(0);
7839 SDValue One = DAG.getConstant(1, DL, VecTy);
7840 return DAG.getNode(
7841 ISD::XOR, DL, VecTy, N->getOperand(1),
7842 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7843 }
7844 case Intrinsic::loongarch_lsx_vbitrevi_b:
7845 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7846 return lowerVectorBitRevImm<3>(N, DAG);
7847 case Intrinsic::loongarch_lsx_vbitrevi_h:
7848 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7849 return lowerVectorBitRevImm<4>(N, DAG);
7850 case Intrinsic::loongarch_lsx_vbitrevi_w:
7851 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7852 return lowerVectorBitRevImm<5>(N, DAG);
7853 case Intrinsic::loongarch_lsx_vbitrevi_d:
7854 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7855 return lowerVectorBitRevImm<6>(N, DAG);
7856 case Intrinsic::loongarch_lsx_vfadd_s:
7857 case Intrinsic::loongarch_lsx_vfadd_d:
7858 case Intrinsic::loongarch_lasx_xvfadd_s:
7859 case Intrinsic::loongarch_lasx_xvfadd_d:
7860 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
7861 N->getOperand(2));
7862 case Intrinsic::loongarch_lsx_vfsub_s:
7863 case Intrinsic::loongarch_lsx_vfsub_d:
7864 case Intrinsic::loongarch_lasx_xvfsub_s:
7865 case Intrinsic::loongarch_lasx_xvfsub_d:
7866 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
7867 N->getOperand(2));
7868 case Intrinsic::loongarch_lsx_vfmul_s:
7869 case Intrinsic::loongarch_lsx_vfmul_d:
7870 case Intrinsic::loongarch_lasx_xvfmul_s:
7871 case Intrinsic::loongarch_lasx_xvfmul_d:
7872 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
7873 N->getOperand(2));
7874 case Intrinsic::loongarch_lsx_vfdiv_s:
7875 case Intrinsic::loongarch_lsx_vfdiv_d:
7876 case Intrinsic::loongarch_lasx_xvfdiv_s:
7877 case Intrinsic::loongarch_lasx_xvfdiv_d:
7878 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
7879 N->getOperand(2));
7880 case Intrinsic::loongarch_lsx_vfmadd_s:
7881 case Intrinsic::loongarch_lsx_vfmadd_d:
7882 case Intrinsic::loongarch_lasx_xvfmadd_s:
7883 case Intrinsic::loongarch_lasx_xvfmadd_d:
7884 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
7885 N->getOperand(2), N->getOperand(3));
7886 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7887 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7888 N->getOperand(1), N->getOperand(2),
7889 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
7890 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7891 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7892 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7893 N->getOperand(1), N->getOperand(2),
7894 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
7895 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7896 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7897 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7898 N->getOperand(1), N->getOperand(2),
7899 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
7900 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7901 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7902 N->getOperand(1), N->getOperand(2),
7903 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
7904 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7905 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7906 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7907 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7908 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7909 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7910 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7911 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7912 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
7913 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7914 N->getOperand(1)));
7915 case Intrinsic::loongarch_lsx_vreplve_b:
7916 case Intrinsic::loongarch_lsx_vreplve_h:
7917 case Intrinsic::loongarch_lsx_vreplve_w:
7918 case Intrinsic::loongarch_lsx_vreplve_d:
7919 case Intrinsic::loongarch_lasx_xvreplve_b:
7920 case Intrinsic::loongarch_lasx_xvreplve_h:
7921 case Intrinsic::loongarch_lasx_xvreplve_w:
7922 case Intrinsic::loongarch_lasx_xvreplve_d:
7923 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
7924 N->getOperand(1),
7925 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7926 N->getOperand(2)));
7927 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7928 if (!Subtarget.is64Bit())
7929 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7930 break;
7931 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7932 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7933 if (!Subtarget.is64Bit())
7934 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7935 break;
7936 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7937 if (!Subtarget.is64Bit())
7938 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7939 break;
7940 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7941 if (!Subtarget.is64Bit())
7942 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7943 break;
7944 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7945 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7946 if (!Subtarget.is64Bit())
7947 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7948 break;
7949 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7950 if (!Subtarget.is64Bit())
7951 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7952 break;
7953 case Intrinsic::loongarch_lsx_bz_b:
7954 case Intrinsic::loongarch_lsx_bz_h:
7955 case Intrinsic::loongarch_lsx_bz_w:
7956 case Intrinsic::loongarch_lsx_bz_d:
7957 case Intrinsic::loongarch_lasx_xbz_b:
7958 case Intrinsic::loongarch_lasx_xbz_h:
7959 case Intrinsic::loongarch_lasx_xbz_w:
7960 case Intrinsic::loongarch_lasx_xbz_d:
7961 if (!Subtarget.is64Bit())
7962 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
7963 N->getOperand(1));
7964 break;
7965 case Intrinsic::loongarch_lsx_bz_v:
7966 case Intrinsic::loongarch_lasx_xbz_v:
7967 if (!Subtarget.is64Bit())
7968 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7969 N->getOperand(1));
7970 break;
7971 case Intrinsic::loongarch_lsx_bnz_b:
7972 case Intrinsic::loongarch_lsx_bnz_h:
7973 case Intrinsic::loongarch_lsx_bnz_w:
7974 case Intrinsic::loongarch_lsx_bnz_d:
7975 case Intrinsic::loongarch_lasx_xbnz_b:
7976 case Intrinsic::loongarch_lasx_xbnz_h:
7977 case Intrinsic::loongarch_lasx_xbnz_w:
7978 case Intrinsic::loongarch_lasx_xbnz_d:
7979 if (!Subtarget.is64Bit())
7980 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7981 N->getOperand(1));
7982 break;
7983 case Intrinsic::loongarch_lsx_bnz_v:
7984 case Intrinsic::loongarch_lasx_xbnz_v:
7985 if (!Subtarget.is64Bit())
7986 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7987 N->getOperand(1));
7988 break;
7989 case Intrinsic::loongarch_lasx_concat_128_s:
7990 case Intrinsic::loongarch_lasx_concat_128_d:
7991 case Intrinsic::loongarch_lasx_concat_128:
7992 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7993 N->getOperand(1), N->getOperand(2));
7994 }
7995 return SDValue();
7996}
7997
8000 const LoongArchSubtarget &Subtarget) {
8001 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
8002 // conversion is unnecessary and can be replaced with the
8003 // MOVFR2GR_S_LA64 operand.
8004 SDValue Op0 = N->getOperand(0);
8005 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
8006 return Op0.getOperand(0);
8007 return SDValue();
8008}
8009
8012 const LoongArchSubtarget &Subtarget) {
8013 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
8014 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
8015 // operand.
8016 SDValue Op0 = N->getOperand(0);
8017 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
8018 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
8019 "Unexpected value type!");
8020 return Op0.getOperand(0);
8021 }
8022 return SDValue();
8023}
8024
8025static SDValue
8028 MVT VT = N->getSimpleValueType(0);
8029 unsigned NumBits = VT.getScalarSizeInBits();
8030
8031 // Simplify the inputs.
8032 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8033 APInt DemandedMask(APInt::getAllOnes(NumBits));
8034 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
8035 return SDValue(N, 0);
8036
8037 return SDValue();
8038}
8039
8040static SDValue
8043 const LoongArchSubtarget &Subtarget) {
8044 SDValue Op0 = N->getOperand(0);
8045 SDLoc DL(N);
8046
8047 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
8048 // redundant. Instead, use BuildPairF64's operands directly.
8049 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
8050 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
8051
8052 if (Op0->isUndef()) {
8053 SDValue Lo = DAG.getUNDEF(MVT::i32);
8054 SDValue Hi = DAG.getUNDEF(MVT::i32);
8055 return DCI.CombineTo(N, Lo, Hi);
8056 }
8057
8058 // It's cheaper to materialise two 32-bit integers than to load a double
8059 // from the constant pool and transfer it to integer registers through the
8060 // stack.
8062 APInt V = C->getValueAPF().bitcastToAPInt();
8063 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
8064 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
8065 return DCI.CombineTo(N, Lo, Hi);
8066 }
8067
8068 return SDValue();
8069}
8070
8071/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
8074 const LoongArchSubtarget &Subtarget) {
8075 SDValue N0 = N->getOperand(0);
8076 SDValue N1 = N->getOperand(1);
8077 MVT VT = N->getSimpleValueType(0);
8078 SDLoc DL(N);
8079
8080 // VANDN(undef, x) -> 0
8081 // VANDN(x, undef) -> 0
8082 if (N0.isUndef() || N1.isUndef())
8083 return DAG.getConstant(0, DL, VT);
8084
8085 // VANDN(0, x) -> x
8087 return N1;
8088
8089 // VANDN(x, 0) -> 0
8091 return DAG.getConstant(0, DL, VT);
8092
8093 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
8095 return DAG.getNOT(DL, N0, VT);
8096
8097 // Turn VANDN back to AND if input is inverted.
8098 if (SDValue Not = isNOT(N0, DAG))
8099 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
8100
8101 // Folds for better commutativity:
8102 if (N1->hasOneUse()) {
8103 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
8104 if (SDValue Not = isNOT(N1, DAG))
8105 return DAG.getNOT(
8106 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
8107
8108 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
8109 // -> NOT(OR(x, SplatVector(-Imm))
8110 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
8111 // gain benefits.
8112 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
8113 N1.getOpcode() == ISD::BUILD_VECTOR) {
8114 if (SDValue SplatValue =
8115 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
8116 if (!N1->isOnlyUserOf(SplatValue.getNode()))
8117 return SDValue();
8118
8119 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
8120 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
8121 SDValue Not =
8122 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
8123 return DAG.getNOT(
8124 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
8125 VT);
8126 }
8127 }
8128 }
8129 }
8130
8131 return SDValue();
8132}
8133
8134static SDValue ExtendSrcToDst(SDNode *N, SelectionDAG &DAG, unsigned ExtendOp) {
8135 SDLoc DL(N);
8136 EVT VT = N->getValueType(0);
8137 SDValue Src = N->getOperand(0);
8138 EVT SrcVT = Src.getValueType();
8139
8140 unsigned DstElts = VT.getVectorNumElements();
8141 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8142 unsigned DstEltBits = VT.getScalarSizeInBits();
8143
8144 if (SrcEltBits >= DstEltBits)
8145 return SDValue();
8146
8147 MVT WidenEltVT = MVT::getIntegerVT(DstEltBits);
8148 MVT WidenSrcVT = MVT::getVectorVT(WidenEltVT, DstElts);
8149
8150 SDValue Extend = DAG.getNode(ExtendOp, DL, WidenSrcVT, Src);
8151 return DAG.getNode(N->getOpcode(), DL, VT, Extend);
8152}
8153
8156 const LoongArchSubtarget &Subtarget) {
8157 SDLoc DL(N);
8158 EVT VT = N->getValueType(0);
8159
8160 // Sign-extend src to avoid scalarization.
8161 if (VT.isVector())
8162 return ExtendSrcToDst(N, DAG, ISD::SIGN_EXTEND);
8163
8164 if (VT != MVT::f32 && VT != MVT::f64)
8165 return SDValue();
8166 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8167 return SDValue();
8168 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8169 return SDValue();
8170
8171 // Only optimize when the source and destination types have the same width.
8172 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
8173 return SDValue();
8174
8175 SDValue Src = N->getOperand(0);
8176 // If the result of an integer load is only used by an integer-to-float
8177 // conversion, use a fp load instead. This eliminates an integer-to-float-move
8178 // (movgr2fr) instruction.
8179 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
8180 // Do not change the width of a volatile load. This condition check is
8181 // inspired by AArch64.
8182 !cast<LoadSDNode>(Src)->isVolatile()) {
8183 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
8184 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
8185 LN0->getPointerInfo(), LN0->getAlign(),
8186 LN0->getMemOperand()->getFlags());
8187
8188 // Make sure successors of the original load stay after it by updating them
8189 // to use the new Chain.
8190 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
8191 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
8192 }
8193
8194 return SDValue();
8195}
8196
8199 const LoongArchSubtarget &Subtarget) {
8200 SDLoc DL(N);
8201 EVT VT = N->getValueType(0);
8202
8203 // Zero-extend src to avoid scalarization.
8204 if (VT.isVector())
8205 return ExtendSrcToDst(N, DAG, ISD::ZERO_EXTEND);
8206
8207 return SDValue();
8208}
8209
8210// Using [X]VFTINTRZ_W_D for double to signed 32-bit integer conversion.
8211// For example:
8212// v4i32 = fp_to_sint (concat_vectors v2f64, v2f64)
8213// Can be combined into:
8214// v4i32 = VFTINTRZ_W_D v2f64. v2f64
8217 const LoongArchSubtarget &Subtarget) {
8218 if (!Subtarget.hasExtLSX())
8219 return SDValue();
8220
8221 SDLoc DL(N);
8222 EVT DstVT = N->getValueType(0);
8223 SDValue Src = N->getOperand(0);
8224 EVT SrcVT = Src.getValueType();
8225 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
8226
8227 if (!DstVT.isVector() || !DstVT.isSimple() || !SrcVT.isSimple())
8228 return SDValue();
8229
8230 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8231 unsigned SrcBits = SrcVT.getSizeInBits();
8232 unsigned DstEltBits = DstVT.getScalarSizeInBits();
8233 unsigned NumElts = DstVT.getVectorNumElements();
8234 unsigned BlockBits = Subtarget.hasExtLASX() ? 256 : 128;
8235
8236 if (!isPowerOf2_32(NumElts) || !isPowerOf2_32(DstEltBits))
8237 return SDValue();
8238
8239 if (SrcBits % BlockBits != 0 && SrcBits != 128)
8240 return SDValue();
8241
8242 if (DstEltBits < 32) {
8243 MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(32), NumElts);
8244 SDValue Conv = DAG.getNode(N->getOpcode(), DL, PromoteVT, Src);
8245 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Conv);
8246 }
8247
8248 if (SrcEltBits != 64 || DstEltBits != 32)
8249 return SDValue();
8250
8251 if (!IsSigned) {
8252 // LASX already has pattern for double convert to uint32.
8253 if (Subtarget.hasExtLASX())
8254 return SDValue();
8255 MVT TmpVT = MVT::getVectorVT(MVT::i64, NumElts);
8256 SDValue Tmp = DAG.getNode(ISD::FP_TO_SINT, DL, TmpVT, Src);
8257 return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Tmp);
8258 }
8259
8261 unsigned BlockNumElts = BlockBits / 64;
8262 MVT BlockVT = MVT::getVectorVT(MVT::f64, BlockNumElts);
8263 if (Src.getOpcode() == ISD::CONCAT_VECTORS &&
8264 Src.getOperand(0).getValueType() == BlockVT) {
8265 for (unsigned i = 0; i < Src.getNumOperands(); i++)
8266 Blocks.push_back(Src.getOperand(i));
8267 } else if (SrcBits > BlockBits) {
8268 // Wider than one register: extract each BlockBits-wide sub-vector.
8269 for (unsigned i = 0; i < SrcBits / BlockBits; i++)
8270 Blocks.push_back(
8271 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, BlockVT, Src,
8272 DAG.getVectorIdxConstant(i * BlockNumElts, DL)));
8273 } else {
8274 BlockBits = SrcBits;
8275 Blocks.push_back(Src);
8276 }
8277
8278 MVT NativeVT = BlockBits == 256 ? MVT::v8i32 : MVT::v4i32;
8280 for (unsigned i = 0; i < Blocks.size(); i += 2) {
8281 SDValue Lo = Blocks[i];
8282 SDValue Hi = Blocks.size() > 1 ? Blocks[i + 1] : Lo;
8283 SDValue Res = DAG.getNode(LoongArchISD::VFTINTRZ, DL, NativeVT, Hi, Lo);
8284
8285 if (BlockBits == 256) {
8286 SDValue Undef = DAG.getUNDEF(Res.getValueType());
8287 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
8288 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
8289 Res = DAG.getBitcast(NativeVT, Res);
8290 }
8291
8292 Parts.push_back(Res);
8293 }
8294
8295 if (Blocks.size() == 1)
8296 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, DstVT, Parts[0],
8297 DAG.getVectorIdxConstant(0, DL));
8298 return DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Parts);
8299}
8300
8301// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
8302// logical operations, like in the example below.
8303// or (and (truncate x, truncate y)),
8304// (xor (truncate z, build_vector (constants)))
8305// Given a target type \p VT, we generate
8306// or (and x, y), (xor z, zext(build_vector (constants)))
8307// given x, y and z are of type \p VT. We can do so, if operands are either
8308// truncates from VT types, the second operand is a vector of constants, can
8309// be recursively promoted or is an existing extension we can extend further.
8311 SelectionDAG &DAG,
8312 const LoongArchSubtarget &Subtarget,
8313 unsigned Depth) {
8314 // Limit recursion to avoid excessive compile times.
8316 return SDValue();
8317
8318 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
8319 return SDValue();
8320
8321 SDValue N0 = N.getOperand(0);
8322 SDValue N1 = N.getOperand(1);
8323
8324 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
8325 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
8326 return SDValue();
8327
8328 if (SDValue NN0 =
8329 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
8330 N0 = NN0;
8331 else {
8332 // The left side has to be a 'trunc'.
8333 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
8334 N0.getOperand(0).getValueType() == VT;
8335 if (LHSTrunc)
8336 N0 = N0.getOperand(0);
8337 else
8338 return SDValue();
8339 }
8340
8341 if (SDValue NN1 =
8342 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
8343 N1 = NN1;
8344 else {
8345 // The right side has to be a 'trunc', a (foldable) constant or an
8346 // existing extension we can extend further.
8347 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
8348 N1.getOperand(0).getValueType() == VT;
8349 if (RHSTrunc)
8350 N1 = N1.getOperand(0);
8351 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
8352 Subtarget.hasExtLASX() && N1.hasOneUse())
8353 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
8354 // On 32-bit platform, i64 is an illegal integer scalar type, and
8355 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
8356 // future.
8357 else if (SDValue Cst =
8359 N1 = Cst;
8360 else
8361 return SDValue();
8362 }
8363
8364 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
8365}
8366
8367// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
8368// is LSX-sized register. In most cases we actually compare or select LASX-sized
8369// registers and mixing the two types creates horrible code. This method
8370// optimizes some of the transition sequences.
8372 SelectionDAG &DAG,
8373 const LoongArchSubtarget &Subtarget) {
8374 EVT VT = N.getValueType();
8375 assert(VT.isVector() && "Expected vector type");
8376 assert((N.getOpcode() == ISD::ANY_EXTEND ||
8377 N.getOpcode() == ISD::ZERO_EXTEND ||
8378 N.getOpcode() == ISD::SIGN_EXTEND) &&
8379 "Invalid Node");
8380
8381 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
8382 return SDValue();
8383
8384 SDValue Narrow = N.getOperand(0);
8385 EVT NarrowVT = Narrow.getValueType();
8386
8387 // Generate the wide operation.
8388 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
8389 if (!Op)
8390 return SDValue();
8391 switch (N.getOpcode()) {
8392 default:
8393 llvm_unreachable("Unexpected opcode");
8394 case ISD::ANY_EXTEND:
8395 return Op;
8396 case ISD::ZERO_EXTEND:
8397 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
8398 case ISD::SIGN_EXTEND:
8399 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8400 DAG.getValueType(NarrowVT));
8401 }
8402}
8403
8406 const LoongArchSubtarget &Subtarget) {
8407 EVT VT = N->getValueType(0);
8408 SDLoc DL(N);
8409
8410 if (VT.isVector())
8411 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
8412 return R;
8413
8414 return SDValue();
8415}
8416
8417static SDValue
8420 const LoongArchSubtarget &Subtarget) {
8421 SDLoc DL(N);
8422 EVT VT = N->getValueType(0);
8423
8424 if (VT.isVector() && N->getNumOperands() == 2)
8425 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
8426 return R;
8427
8428 return SDValue();
8429}
8430
8433 const LoongArchSubtarget &Subtarget) {
8434 if (DCI.isBeforeLegalizeOps())
8435 return SDValue();
8436
8437 EVT VT = N->getValueType(0);
8438 if (!VT.isVector())
8439 return SDValue();
8440
8441 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
8442 return SDValue();
8443
8444 EVT EltVT = VT.getVectorElementType();
8445 if (!EltVT.isInteger())
8446 return SDValue();
8447
8448 SDValue Cond = N->getOperand(0);
8449 SDValue TrueVal = N->getOperand(1);
8450 SDValue FalseVal = N->getOperand(2);
8451
8452 // match:
8453 //
8454 // vselect (setcc shift, 0, seteq),
8455 // x,
8456 // rounded_shift
8457
8458 if (Cond.getOpcode() != ISD::SETCC)
8459 return SDValue();
8460
8461 if (!ISD::isConstantSplatVectorAllZeros(Cond.getOperand(1).getNode()))
8462 return SDValue();
8463
8464 auto *CC = cast<CondCodeSDNode>(Cond.getOperand(2));
8465 if (CC->get() != ISD::SETEQ)
8466 return SDValue();
8467
8468 SDValue Shift = Cond.getOperand(0);
8469
8470 // True branch must be original value:
8471 //
8472 // vselect cond, x, ...
8473
8474 SDValue X = TrueVal;
8475
8476 // Now match rounded shift pattern:
8477 //
8478 // add
8479 // (and
8480 // (srl X, shift-1)
8481 // 1)
8482 // (srl/sra X, shift)
8483
8484 if (FalseVal.getOpcode() != ISD::ADD)
8485 return SDValue();
8486
8487 SDValue Add0 = FalseVal.getOperand(0);
8488 SDValue Add1 = FalseVal.getOperand(1);
8489 SDValue And;
8490 SDValue Shr;
8491
8492 if (Add0.getOpcode() == ISD::AND) {
8493 And = Add0;
8494 Shr = Add1;
8495 } else if (Add1.getOpcode() == ISD::AND) {
8496 And = Add1;
8497 Shr = Add0;
8498 } else {
8499 return SDValue();
8500 }
8501
8502 // match:
8503 //
8504 // srl/sra X, shift
8505
8506 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
8507 return SDValue();
8508
8509 if (Shr.getOperand(0) != X)
8510 return SDValue();
8511
8512 if (Shr.getOperand(1) != Shift)
8513 return SDValue();
8514
8515 // match:
8516 //
8517 // and
8518 // (srl X, shift-1)
8519 // 1
8520
8521 SDValue Srl = And.getOperand(0);
8522 SDValue One = And.getOperand(1);
8523 APInt SplatVal;
8524
8525 if (Srl.getOpcode() != ISD::SRL)
8526 return SDValue();
8527
8528 One = peekThroughBitcasts(One);
8529 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
8530 return SDValue();
8531
8532 if (SplatVal != 1)
8533 return SDValue();
8534
8535 if (Srl.getOperand(0) != X)
8536 return SDValue();
8537
8538 // match:
8539 //
8540 // shift-1
8541
8542 SDValue ShiftMinus1 = Srl.getOperand(1);
8543
8544 if (ShiftMinus1.getOpcode() != ISD::ADD)
8545 return SDValue();
8546
8547 if (ShiftMinus1.getOperand(0) != Shift)
8548 return SDValue();
8549
8551 return SDValue();
8552
8553 // We matched a rounded right shift pattern and can lower it
8554 // to a single vector rounded shift instruction.
8555
8556 SDLoc DL(N);
8557 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
8558 : LoongArchISD::VSRAR,
8559 DL, VT, X, Shift);
8560}
8561
8563 DAGCombinerInfo &DCI) const {
8564 SelectionDAG &DAG = DCI.DAG;
8565 switch (N->getOpcode()) {
8566 default:
8567 break;
8568 case ISD::ADD:
8569 return performADDCombine(N, DAG, DCI, Subtarget);
8570 case ISD::AND:
8571 return performANDCombine(N, DAG, DCI, Subtarget);
8572 case ISD::OR:
8573 return performORCombine(N, DAG, DCI, Subtarget);
8574 case ISD::SETCC:
8575 return performSETCCCombine(N, DAG, DCI, Subtarget);
8576 case ISD::SHL:
8577 return performSHLCombine(N, DAG, DCI, Subtarget);
8578 case ISD::SRL:
8579 return performSRLCombine(N, DAG, DCI, Subtarget);
8580 case ISD::SUB:
8581 return performSUBCombine(N, DAG, DCI, Subtarget);
8582 case ISD::BITCAST:
8583 return performBITCASTCombine(N, DAG, DCI, Subtarget);
8584 case ISD::ANY_EXTEND:
8585 case ISD::ZERO_EXTEND:
8586 case ISD::SIGN_EXTEND:
8587 return performEXTENDCombine(N, DAG, DCI, Subtarget);
8588 case ISD::SINT_TO_FP:
8589 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8590 case ISD::UINT_TO_FP:
8591 return performUINT_TO_FPCombine(N, DAG, DCI, Subtarget);
8592 case ISD::FP_TO_SINT:
8593 case ISD::FP_TO_UINT:
8594 return performFP_TO_INTCombine(N, DAG, DCI, Subtarget);
8595 case LoongArchISD::BITREV_W:
8596 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
8597 case LoongArchISD::BR_CC:
8598 return performBR_CCCombine(N, DAG, DCI, Subtarget);
8599 case LoongArchISD::SELECT_CC:
8600 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
8602 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
8603 case LoongArchISD::MOVGR2FR_W_LA64:
8604 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
8605 case LoongArchISD::MOVFR2GR_S_LA64:
8606 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
8607 case LoongArchISD::CRC_W_B_W:
8608 case LoongArchISD::CRC_W_H_W:
8609 case LoongArchISD::CRCC_W_B_W:
8610 case LoongArchISD::CRCC_W_H_W:
8611 case LoongArchISD::VMSKLTZ:
8612 case LoongArchISD::XVMSKLTZ:
8613 return performDemandedBitsCombine(N, DAG, DCI);
8614 case LoongArchISD::SPLIT_PAIR_F64:
8615 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
8616 case LoongArchISD::VANDN:
8617 return performVANDNCombine(N, DAG, DCI, Subtarget);
8619 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
8620 case ISD::VSELECT:
8621 return performVSELECTCombine(N, DAG, DCI, Subtarget);
8622 case LoongArchISD::VPACKEV:
8623 case LoongArchISD::VPERMI:
8624 if (SDValue Result =
8625 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
8626 return Result;
8627 }
8628 return SDValue();
8629}
8630
8633 if (!ZeroDivCheck)
8634 return MBB;
8635
8636 // Build instructions:
8637 // MBB:
8638 // div(or mod) $dst, $dividend, $divisor
8639 // bne $divisor, $zero, SinkMBB
8640 // BreakMBB:
8641 // break 7 // BRK_DIVZERO
8642 // SinkMBB:
8643 // fallthrough
8644 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
8645 MachineFunction::iterator It = ++MBB->getIterator();
8646 MachineFunction *MF = MBB->getParent();
8647 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8648 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8649 MF->insert(It, BreakMBB);
8650 MF->insert(It, SinkMBB);
8651
8652 // Transfer the remainder of MBB and its successor edges to SinkMBB.
8653 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
8654 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8655
8656 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
8657 DebugLoc DL = MI.getDebugLoc();
8658 MachineOperand &Divisor = MI.getOperand(2);
8659 Register DivisorReg = Divisor.getReg();
8660
8661 // MBB:
8662 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
8663 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
8664 .addReg(LoongArch::R0)
8665 .addMBB(SinkMBB);
8666 MBB->addSuccessor(BreakMBB);
8667 MBB->addSuccessor(SinkMBB);
8668
8669 // BreakMBB:
8670 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
8671 // definition of BRK_DIVZERO.
8672 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
8673 BreakMBB->addSuccessor(SinkMBB);
8674
8675 // Clear Divisor's kill flag.
8676 Divisor.setIsKill(false);
8677
8678 return SinkMBB;
8679}
8680
8681static MachineBasicBlock *
8683 const LoongArchSubtarget &Subtarget) {
8684 unsigned CondOpc;
8685 switch (MI.getOpcode()) {
8686 default:
8687 llvm_unreachable("Unexpected opcode");
8688 case LoongArch::PseudoVBZ:
8689 CondOpc = LoongArch::VSETEQZ_V;
8690 break;
8691 case LoongArch::PseudoVBZ_B:
8692 CondOpc = LoongArch::VSETANYEQZ_B;
8693 break;
8694 case LoongArch::PseudoVBZ_H:
8695 CondOpc = LoongArch::VSETANYEQZ_H;
8696 break;
8697 case LoongArch::PseudoVBZ_W:
8698 CondOpc = LoongArch::VSETANYEQZ_W;
8699 break;
8700 case LoongArch::PseudoVBZ_D:
8701 CondOpc = LoongArch::VSETANYEQZ_D;
8702 break;
8703 case LoongArch::PseudoVBNZ:
8704 CondOpc = LoongArch::VSETNEZ_V;
8705 break;
8706 case LoongArch::PseudoVBNZ_B:
8707 CondOpc = LoongArch::VSETALLNEZ_B;
8708 break;
8709 case LoongArch::PseudoVBNZ_H:
8710 CondOpc = LoongArch::VSETALLNEZ_H;
8711 break;
8712 case LoongArch::PseudoVBNZ_W:
8713 CondOpc = LoongArch::VSETALLNEZ_W;
8714 break;
8715 case LoongArch::PseudoVBNZ_D:
8716 CondOpc = LoongArch::VSETALLNEZ_D;
8717 break;
8718 case LoongArch::PseudoXVBZ:
8719 CondOpc = LoongArch::XVSETEQZ_V;
8720 break;
8721 case LoongArch::PseudoXVBZ_B:
8722 CondOpc = LoongArch::XVSETANYEQZ_B;
8723 break;
8724 case LoongArch::PseudoXVBZ_H:
8725 CondOpc = LoongArch::XVSETANYEQZ_H;
8726 break;
8727 case LoongArch::PseudoXVBZ_W:
8728 CondOpc = LoongArch::XVSETANYEQZ_W;
8729 break;
8730 case LoongArch::PseudoXVBZ_D:
8731 CondOpc = LoongArch::XVSETANYEQZ_D;
8732 break;
8733 case LoongArch::PseudoXVBNZ:
8734 CondOpc = LoongArch::XVSETNEZ_V;
8735 break;
8736 case LoongArch::PseudoXVBNZ_B:
8737 CondOpc = LoongArch::XVSETALLNEZ_B;
8738 break;
8739 case LoongArch::PseudoXVBNZ_H:
8740 CondOpc = LoongArch::XVSETALLNEZ_H;
8741 break;
8742 case LoongArch::PseudoXVBNZ_W:
8743 CondOpc = LoongArch::XVSETALLNEZ_W;
8744 break;
8745 case LoongArch::PseudoXVBNZ_D:
8746 CondOpc = LoongArch::XVSETALLNEZ_D;
8747 break;
8748 }
8749
8750 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8751 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8752 DebugLoc DL = MI.getDebugLoc();
8755
8756 MachineFunction *F = BB->getParent();
8757 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
8758 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
8759 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
8760
8761 F->insert(It, FalseBB);
8762 F->insert(It, TrueBB);
8763 F->insert(It, SinkBB);
8764
8765 // Transfer the remainder of MBB and its successor edges to Sink.
8766 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
8768
8769 // Insert the real instruction to BB.
8770 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
8771 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
8772
8773 // Insert branch.
8774 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
8775 BB->addSuccessor(FalseBB);
8776 BB->addSuccessor(TrueBB);
8777
8778 // FalseBB.
8779 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8780 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
8781 .addReg(LoongArch::R0)
8782 .addImm(0);
8783 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
8784 FalseBB->addSuccessor(SinkBB);
8785
8786 // TrueBB.
8787 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8788 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
8789 .addReg(LoongArch::R0)
8790 .addImm(1);
8791 TrueBB->addSuccessor(SinkBB);
8792
8793 // SinkBB: merge the results.
8794 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
8795 MI.getOperand(0).getReg())
8796 .addReg(RD1)
8797 .addMBB(FalseBB)
8798 .addReg(RD2)
8799 .addMBB(TrueBB);
8800
8801 // The pseudo instruction is gone now.
8802 MI.eraseFromParent();
8803 return SinkBB;
8804}
8805
8806static MachineBasicBlock *
8808 const LoongArchSubtarget &Subtarget) {
8809 unsigned InsOp;
8810 unsigned BroadcastOp;
8811 unsigned HalfSize;
8812 switch (MI.getOpcode()) {
8813 default:
8814 llvm_unreachable("Unexpected opcode");
8815 case LoongArch::PseudoXVINSGR2VR_B:
8816 HalfSize = 16;
8817 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8818 InsOp = LoongArch::XVEXTRINS_B;
8819 break;
8820 case LoongArch::PseudoXVINSGR2VR_H:
8821 HalfSize = 8;
8822 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8823 InsOp = LoongArch::XVEXTRINS_H;
8824 break;
8825 }
8826 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8827 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8828 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8829 DebugLoc DL = MI.getDebugLoc();
8831 // XDst = vector_insert XSrc, Elt, Idx
8832 Register XDst = MI.getOperand(0).getReg();
8833 Register XSrc = MI.getOperand(1).getReg();
8834 Register Elt = MI.getOperand(2).getReg();
8835 unsigned Idx = MI.getOperand(3).getImm();
8836
8837 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
8838 Idx < HalfSize) {
8839 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
8840 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
8841
8842 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
8843 .addReg(XSrc, {}, LoongArch::sub_128);
8844 BuildMI(*BB, MI, DL,
8845 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
8846 : LoongArch::VINSGR2VR_B),
8847 ScratchSubReg2)
8848 .addReg(ScratchSubReg1)
8849 .addReg(Elt)
8850 .addImm(Idx);
8851
8852 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
8853 .addReg(ScratchSubReg2)
8854 .addImm(LoongArch::sub_128);
8855 } else {
8856 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8857 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8858
8859 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
8860
8861 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
8862 .addReg(ScratchReg1)
8863 .addReg(XSrc)
8864 .addImm(Idx >= HalfSize ? 48 : 18);
8865
8866 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
8867 .addReg(XSrc)
8868 .addReg(ScratchReg2)
8869 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8870 }
8871
8872 MI.eraseFromParent();
8873 return BB;
8874}
8875
8878 const LoongArchSubtarget &Subtarget) {
8879 assert(Subtarget.hasExtLSX());
8880 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8881 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8882 DebugLoc DL = MI.getDebugLoc();
8884 Register Dst = MI.getOperand(0).getReg();
8885 Register Src = MI.getOperand(1).getReg();
8886
8887 unsigned BroadcastOp, CTOp, PickOp;
8888 switch (MI.getOpcode()) {
8889 default:
8890 llvm_unreachable("Unexpected opcode");
8891 case LoongArch::PseudoCTPOP_B:
8892 BroadcastOp = LoongArch::VREPLGR2VR_B;
8893 CTOp = LoongArch::VPCNT_B;
8894 PickOp = LoongArch::VPICKVE2GR_B;
8895 break;
8896 case LoongArch::PseudoCTPOP_H:
8897 case LoongArch::PseudoCTPOP_H_LA32:
8898 BroadcastOp = LoongArch::VREPLGR2VR_H;
8899 CTOp = LoongArch::VPCNT_H;
8900 PickOp = LoongArch::VPICKVE2GR_H;
8901 break;
8902 case LoongArch::PseudoCTPOP_W:
8903 case LoongArch::PseudoCTPOP_W_LA32:
8904 BroadcastOp = LoongArch::VREPLGR2VR_W;
8905 CTOp = LoongArch::VPCNT_W;
8906 PickOp = LoongArch::VPICKVE2GR_W;
8907 break;
8908 case LoongArch::PseudoCTPOP_D:
8909 BroadcastOp = LoongArch::VREPLGR2VR_D;
8910 CTOp = LoongArch::VPCNT_D;
8911 PickOp = LoongArch::VPICKVE2GR_D;
8912 break;
8913 }
8914
8915 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8916 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8917 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src);
8918 BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1);
8919 BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0);
8920
8921 MI.eraseFromParent();
8922 return BB;
8923}
8924
8925static MachineBasicBlock *
8927 const LoongArchSubtarget &Subtarget) {
8928 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8929 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8930 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8932 Register Dst = MI.getOperand(0).getReg();
8933 Register Src = MI.getOperand(1).getReg();
8934 DebugLoc DL = MI.getDebugLoc();
8935 unsigned EleBits = 8;
8936 unsigned NotOpc = 0;
8937 unsigned MskOpc;
8938
8939 switch (MI.getOpcode()) {
8940 default:
8941 llvm_unreachable("Unexpected opcode");
8942 case LoongArch::PseudoVMSKLTZ_B:
8943 MskOpc = LoongArch::VMSKLTZ_B;
8944 break;
8945 case LoongArch::PseudoVMSKLTZ_H:
8946 MskOpc = LoongArch::VMSKLTZ_H;
8947 EleBits = 16;
8948 break;
8949 case LoongArch::PseudoVMSKLTZ_W:
8950 MskOpc = LoongArch::VMSKLTZ_W;
8951 EleBits = 32;
8952 break;
8953 case LoongArch::PseudoVMSKLTZ_D:
8954 MskOpc = LoongArch::VMSKLTZ_D;
8955 EleBits = 64;
8956 break;
8957 case LoongArch::PseudoVMSKGEZ_B:
8958 MskOpc = LoongArch::VMSKGEZ_B;
8959 break;
8960 case LoongArch::PseudoVMSKEQZ_B:
8961 MskOpc = LoongArch::VMSKNZ_B;
8962 NotOpc = LoongArch::VNOR_V;
8963 break;
8964 case LoongArch::PseudoVMSKNEZ_B:
8965 MskOpc = LoongArch::VMSKNZ_B;
8966 break;
8967 case LoongArch::PseudoXVMSKLTZ_B:
8968 MskOpc = LoongArch::XVMSKLTZ_B;
8969 RC = &LoongArch::LASX256RegClass;
8970 break;
8971 case LoongArch::PseudoXVMSKLTZ_H:
8972 MskOpc = LoongArch::XVMSKLTZ_H;
8973 RC = &LoongArch::LASX256RegClass;
8974 EleBits = 16;
8975 break;
8976 case LoongArch::PseudoXVMSKLTZ_W:
8977 MskOpc = LoongArch::XVMSKLTZ_W;
8978 RC = &LoongArch::LASX256RegClass;
8979 EleBits = 32;
8980 break;
8981 case LoongArch::PseudoXVMSKLTZ_D:
8982 MskOpc = LoongArch::XVMSKLTZ_D;
8983 RC = &LoongArch::LASX256RegClass;
8984 EleBits = 64;
8985 break;
8986 case LoongArch::PseudoXVMSKGEZ_B:
8987 MskOpc = LoongArch::XVMSKGEZ_B;
8988 RC = &LoongArch::LASX256RegClass;
8989 break;
8990 case LoongArch::PseudoXVMSKEQZ_B:
8991 MskOpc = LoongArch::XVMSKNZ_B;
8992 NotOpc = LoongArch::XVNOR_V;
8993 RC = &LoongArch::LASX256RegClass;
8994 break;
8995 case LoongArch::PseudoXVMSKNEZ_B:
8996 MskOpc = LoongArch::XVMSKNZ_B;
8997 RC = &LoongArch::LASX256RegClass;
8998 break;
8999 }
9000
9001 Register Msk = MRI.createVirtualRegister(RC);
9002 if (NotOpc) {
9003 Register Tmp = MRI.createVirtualRegister(RC);
9004 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
9005 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
9006 .addReg(Tmp, RegState::Kill)
9007 .addReg(Tmp, RegState::Kill);
9008 } else {
9009 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
9010 }
9011
9012 if (TRI->getRegSizeInBits(*RC) > 128) {
9013 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
9014 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
9015 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
9016 .addReg(Msk)
9017 .addImm(0);
9018 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
9019 .addReg(Msk, RegState::Kill)
9020 .addImm(4);
9021 BuildMI(*BB, MI, DL,
9022 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
9023 : LoongArch::BSTRINS_W),
9024 Dst)
9027 .addImm(256 / EleBits - 1)
9028 .addImm(128 / EleBits);
9029 } else {
9030 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
9031 .addReg(Msk, RegState::Kill)
9032 .addImm(0);
9033 }
9034
9035 MI.eraseFromParent();
9036 return BB;
9037}
9038
9039static MachineBasicBlock *
9041 const LoongArchSubtarget &Subtarget) {
9042 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
9043 "Unexpected instruction");
9044
9045 MachineFunction &MF = *BB->getParent();
9046 DebugLoc DL = MI.getDebugLoc();
9048 Register LoReg = MI.getOperand(0).getReg();
9049 Register HiReg = MI.getOperand(1).getReg();
9050 Register SrcReg = MI.getOperand(2).getReg();
9051
9052 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
9053 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
9054 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
9055 MI.eraseFromParent(); // The pseudo instruction is gone now.
9056 return BB;
9057}
9058
9059static MachineBasicBlock *
9061 const LoongArchSubtarget &Subtarget) {
9062 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
9063 "Unexpected instruction");
9064
9065 MachineFunction &MF = *BB->getParent();
9066 DebugLoc DL = MI.getDebugLoc();
9069 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
9070 Register DstReg = MI.getOperand(0).getReg();
9071 Register LoReg = MI.getOperand(1).getReg();
9072 Register HiReg = MI.getOperand(2).getReg();
9073
9074 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
9075 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
9076 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
9077 .addReg(TmpReg, RegState::Kill)
9078 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
9079 MI.eraseFromParent(); // The pseudo instruction is gone now.
9080 return BB;
9081}
9082
9084 switch (MI.getOpcode()) {
9085 default:
9086 return false;
9087 case LoongArch::Select_GPR_Using_CC_GPR:
9088 return true;
9089 }
9090}
9091
9092static MachineBasicBlock *
9094 const LoongArchSubtarget &Subtarget) {
9095 // To "insert" Select_* instructions, we actually have to insert the triangle
9096 // control-flow pattern. The incoming instructions know the destination vreg
9097 // to set, the condition code register to branch on, the true/false values to
9098 // select between, and the condcode to use to select the appropriate branch.
9099 //
9100 // We produce the following control flow:
9101 // HeadMBB
9102 // | \
9103 // | IfFalseMBB
9104 // | /
9105 // TailMBB
9106 //
9107 // When we find a sequence of selects we attempt to optimize their emission
9108 // by sharing the control flow. Currently we only handle cases where we have
9109 // multiple selects with the exact same condition (same LHS, RHS and CC).
9110 // The selects may be interleaved with other instructions if the other
9111 // instructions meet some requirements we deem safe:
9112 // - They are not pseudo instructions.
9113 // - They are debug instructions. Otherwise,
9114 // - They do not have side-effects, do not access memory and their inputs do
9115 // not depend on the results of the select pseudo-instructions.
9116 // The TrueV/FalseV operands of the selects cannot depend on the result of
9117 // previous selects in the sequence.
9118 // These conditions could be further relaxed. See the X86 target for a
9119 // related approach and more information.
9120
9121 Register LHS = MI.getOperand(1).getReg();
9122 Register RHS;
9123 if (MI.getOperand(2).isReg())
9124 RHS = MI.getOperand(2).getReg();
9125 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
9126
9127 SmallVector<MachineInstr *, 4> SelectDebugValues;
9128 SmallSet<Register, 4> SelectDests;
9129 SelectDests.insert(MI.getOperand(0).getReg());
9130
9131 MachineInstr *LastSelectPseudo = &MI;
9132 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
9133 SequenceMBBI != E; ++SequenceMBBI) {
9134 if (SequenceMBBI->isDebugInstr())
9135 continue;
9136 if (isSelectPseudo(*SequenceMBBI)) {
9137 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
9138 !SequenceMBBI->getOperand(2).isReg() ||
9139 SequenceMBBI->getOperand(2).getReg() != RHS ||
9140 SequenceMBBI->getOperand(3).getImm() != CC ||
9141 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
9142 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
9143 break;
9144 LastSelectPseudo = &*SequenceMBBI;
9145 SequenceMBBI->collectDebugValues(SelectDebugValues);
9146 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
9147 continue;
9148 }
9149 if (SequenceMBBI->hasUnmodeledSideEffects() ||
9150 SequenceMBBI->mayLoadOrStore() ||
9151 SequenceMBBI->usesCustomInsertionHook())
9152 break;
9153 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
9154 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
9155 }))
9156 break;
9157 }
9158
9159 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
9160 const BasicBlock *LLVM_BB = BB->getBasicBlock();
9161 DebugLoc DL = MI.getDebugLoc();
9163
9164 MachineBasicBlock *HeadMBB = BB;
9165 MachineFunction *F = BB->getParent();
9166 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
9167 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
9168
9169 F->insert(I, IfFalseMBB);
9170 F->insert(I, TailMBB);
9171
9172 // Set the call frame size on entry to the new basic blocks.
9173 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
9174 IfFalseMBB->setCallFrameSize(CallFrameSize);
9175 TailMBB->setCallFrameSize(CallFrameSize);
9176
9177 // Transfer debug instructions associated with the selects to TailMBB.
9178 for (MachineInstr *DebugInstr : SelectDebugValues) {
9179 TailMBB->push_back(DebugInstr->removeFromParent());
9180 }
9181
9182 // Move all instructions after the sequence to TailMBB.
9183 TailMBB->splice(TailMBB->end(), HeadMBB,
9184 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
9185 // Update machine-CFG edges by transferring all successors of the current
9186 // block to the new block which will contain the Phi nodes for the selects.
9187 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
9188 // Set the successors for HeadMBB.
9189 HeadMBB->addSuccessor(IfFalseMBB);
9190 HeadMBB->addSuccessor(TailMBB);
9191
9192 // Insert appropriate branch.
9193 if (MI.getOperand(2).isImm())
9194 BuildMI(HeadMBB, DL, TII.get(CC))
9195 .addReg(LHS)
9196 .addImm(MI.getOperand(2).getImm())
9197 .addMBB(TailMBB);
9198 else
9199 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
9200
9201 // IfFalseMBB just falls through to TailMBB.
9202 IfFalseMBB->addSuccessor(TailMBB);
9203
9204 // Create PHIs for all of the select pseudo-instructions.
9205 auto SelectMBBI = MI.getIterator();
9206 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
9207 auto InsertionPoint = TailMBB->begin();
9208 while (SelectMBBI != SelectEnd) {
9209 auto Next = std::next(SelectMBBI);
9210 if (isSelectPseudo(*SelectMBBI)) {
9211 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
9212 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
9213 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
9214 .addReg(SelectMBBI->getOperand(4).getReg())
9215 .addMBB(HeadMBB)
9216 .addReg(SelectMBBI->getOperand(5).getReg())
9217 .addMBB(IfFalseMBB);
9218 SelectMBBI->eraseFromParent();
9219 }
9220 SelectMBBI = Next;
9221 }
9222
9223 F->getProperties().resetNoPHIs();
9224 return TailMBB;
9225}
9226
9227MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
9228 MachineInstr &MI, MachineBasicBlock *BB) const {
9229 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
9230 DebugLoc DL = MI.getDebugLoc();
9231
9232 switch (MI.getOpcode()) {
9233 default:
9234 llvm_unreachable("Unexpected instr type to insert");
9235 case LoongArch::DIV_W:
9236 case LoongArch::DIV_WU:
9237 case LoongArch::MOD_W:
9238 case LoongArch::MOD_WU:
9239 case LoongArch::DIV_D:
9240 case LoongArch::DIV_DU:
9241 case LoongArch::MOD_D:
9242 case LoongArch::MOD_DU:
9243 return insertDivByZeroTrap(MI, BB);
9244 break;
9245 case LoongArch::WRFCSR: {
9246 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
9247 LoongArch::FCSR0 + MI.getOperand(0).getImm())
9248 .addReg(MI.getOperand(1).getReg());
9249 MI.eraseFromParent();
9250 return BB;
9251 }
9252 case LoongArch::RDFCSR: {
9253 MachineInstr *ReadFCSR =
9254 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
9255 MI.getOperand(0).getReg())
9256 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
9257 ReadFCSR->getOperand(1).setIsUndef();
9258 MI.eraseFromParent();
9259 return BB;
9260 }
9261 case LoongArch::Select_GPR_Using_CC_GPR:
9262 return emitSelectPseudo(MI, BB, Subtarget);
9263 case LoongArch::BuildPairF64Pseudo:
9264 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
9265 case LoongArch::SplitPairF64Pseudo:
9266 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
9267 case LoongArch::PseudoVBZ:
9268 case LoongArch::PseudoVBZ_B:
9269 case LoongArch::PseudoVBZ_H:
9270 case LoongArch::PseudoVBZ_W:
9271 case LoongArch::PseudoVBZ_D:
9272 case LoongArch::PseudoVBNZ:
9273 case LoongArch::PseudoVBNZ_B:
9274 case LoongArch::PseudoVBNZ_H:
9275 case LoongArch::PseudoVBNZ_W:
9276 case LoongArch::PseudoVBNZ_D:
9277 case LoongArch::PseudoXVBZ:
9278 case LoongArch::PseudoXVBZ_B:
9279 case LoongArch::PseudoXVBZ_H:
9280 case LoongArch::PseudoXVBZ_W:
9281 case LoongArch::PseudoXVBZ_D:
9282 case LoongArch::PseudoXVBNZ:
9283 case LoongArch::PseudoXVBNZ_B:
9284 case LoongArch::PseudoXVBNZ_H:
9285 case LoongArch::PseudoXVBNZ_W:
9286 case LoongArch::PseudoXVBNZ_D:
9287 return emitVecCondBranchPseudo(MI, BB, Subtarget);
9288 case LoongArch::PseudoXVINSGR2VR_B:
9289 case LoongArch::PseudoXVINSGR2VR_H:
9290 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
9291 case LoongArch::PseudoCTPOP_B:
9292 case LoongArch::PseudoCTPOP_H:
9293 case LoongArch::PseudoCTPOP_W:
9294 case LoongArch::PseudoCTPOP_D:
9295 case LoongArch::PseudoCTPOP_H_LA32:
9296 case LoongArch::PseudoCTPOP_W_LA32:
9297 return emitPseudoCTPOP(MI, BB, Subtarget);
9298 case LoongArch::PseudoVMSKLTZ_B:
9299 case LoongArch::PseudoVMSKLTZ_H:
9300 case LoongArch::PseudoVMSKLTZ_W:
9301 case LoongArch::PseudoVMSKLTZ_D:
9302 case LoongArch::PseudoVMSKGEZ_B:
9303 case LoongArch::PseudoVMSKEQZ_B:
9304 case LoongArch::PseudoVMSKNEZ_B:
9305 case LoongArch::PseudoXVMSKLTZ_B:
9306 case LoongArch::PseudoXVMSKLTZ_H:
9307 case LoongArch::PseudoXVMSKLTZ_W:
9308 case LoongArch::PseudoXVMSKLTZ_D:
9309 case LoongArch::PseudoXVMSKGEZ_B:
9310 case LoongArch::PseudoXVMSKEQZ_B:
9311 case LoongArch::PseudoXVMSKNEZ_B:
9312 return emitPseudoVMSKCOND(MI, BB, Subtarget);
9313 case TargetOpcode::STATEPOINT:
9314 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
9315 // while bl call instruction (where statepoint will be lowered at the
9316 // end) has implicit def. This def is early-clobber as it will be set at
9317 // the moment of the call and earlier than any use is read.
9318 // Add this implicit dead def here as a workaround.
9319 MI.addOperand(*MI.getMF(),
9321 LoongArch::R1, /*isDef*/ true,
9322 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
9323 /*isUndef*/ false, /*isEarlyClobber*/ true));
9324 if (!Subtarget.is64Bit())
9325 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
9326 return emitPatchPoint(MI, BB);
9327 case LoongArch::PROBED_STACKALLOC_DYN:
9328 return emitDynamicProbedAlloc(MI, BB);
9329 }
9330}
9331
9333 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
9334 unsigned *Fast) const {
9335 if (!Subtarget.hasUAL())
9336 return false;
9337
9338 // TODO: set reasonable speed number.
9339 if (Fast)
9340 *Fast = 1;
9341 return true;
9342}
9343
9344//===----------------------------------------------------------------------===//
9345// Calling Convention Implementation
9346//===----------------------------------------------------------------------===//
9347
9348// Eight general-purpose registers a0-a7 used for passing integer arguments,
9349// with a0-a1 reused to return values. Generally, the GPRs are used to pass
9350// fixed-point arguments, and floating-point arguments when no FPR is available
9351// or with soft float ABI.
9352const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
9353 LoongArch::R7, LoongArch::R8, LoongArch::R9,
9354 LoongArch::R10, LoongArch::R11};
9355
9356// PreserveNone calling convention:
9357// Arguments may be passed in any general-purpose registers except:
9358// - R1 : return address register
9359// - R22 : frame pointer
9360// - R31 : base pointer
9361//
9362// All general-purpose registers are treated as caller-saved,
9363// except R1 (RA) and R22 (FP).
9364//
9365// Non-volatile registers are allocated first so that a function
9366// can call normal functions without having to spill and reload
9367// argument registers.
9369 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
9370 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
9371 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
9372 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
9373 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
9374 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
9375 LoongArch::R20};
9376
9377// Eight floating-point registers fa0-fa7 used for passing floating-point
9378// arguments, and fa0-fa1 are also used to return values.
9379const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
9380 LoongArch::F3, LoongArch::F4, LoongArch::F5,
9381 LoongArch::F6, LoongArch::F7};
9382// FPR32 and FPR64 alias each other.
9384 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
9385 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
9386
9387const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
9388 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
9389 LoongArch::VR6, LoongArch::VR7};
9390
9391const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
9392 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
9393 LoongArch::XR6, LoongArch::XR7};
9394
9396 switch (State.getCallingConv()) {
9398 if (!State.isVarArg())
9399 return State.AllocateReg(PreserveNoneArgGPRs);
9400 [[fallthrough]];
9401 default:
9402 return State.AllocateReg(ArgGPRs);
9403 }
9404}
9405
9406// Pass a 2*GRLen argument that has been split into two GRLen values through
9407// registers or the stack as necessary.
9408static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
9409 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
9410 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
9411 ISD::ArgFlagsTy ArgFlags2) {
9412 unsigned GRLenInBytes = GRLen / 8;
9413 if (Register Reg = allocateArgGPR(State)) {
9414 // At least one half can be passed via register.
9415 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
9416 VA1.getLocVT(), CCValAssign::Full));
9417 } else {
9418 // Both halves must be passed on the stack, with proper alignment.
9419 Align StackAlign =
9420 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
9421 State.addLoc(
9423 State.AllocateStack(GRLenInBytes, StackAlign),
9424 VA1.getLocVT(), CCValAssign::Full));
9425 State.addLoc(CCValAssign::getMem(
9426 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
9427 LocVT2, CCValAssign::Full));
9428 return false;
9429 }
9430 if (Register Reg = allocateArgGPR(State)) {
9431 // The second half can also be passed via register.
9432 State.addLoc(
9433 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
9434 } else {
9435 // The second half is passed via the stack, without additional alignment.
9436 State.addLoc(CCValAssign::getMem(
9437 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
9438 LocVT2, CCValAssign::Full));
9439 }
9440 return false;
9441}
9442
9443// Implements the LoongArch calling convention. Returns true upon failure.
9445 unsigned ValNo, MVT ValVT,
9446 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
9447 CCState &State, bool IsRet, Type *OrigTy) {
9448 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
9449 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
9450 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
9451 MVT LocVT = ValVT;
9452
9453 // Any return value split into more than two values can't be returned
9454 // directly.
9455 if (IsRet && ValNo > 1)
9456 return true;
9457
9458 // If passing a variadic argument, or if no FPR is available.
9459 bool UseGPRForFloat = true;
9460
9461 switch (ABI) {
9462 default:
9463 llvm_unreachable("Unexpected ABI");
9464 break;
9469 UseGPRForFloat = ArgFlags.isVarArg();
9470 break;
9473 break;
9474 }
9475
9476 // If this is a variadic argument, the LoongArch calling convention requires
9477 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
9478 // byte alignment. An aligned register should be used regardless of whether
9479 // the original argument was split during legalisation or not. The argument
9480 // will not be passed by registers if the original type is larger than
9481 // 2*GRLen, so the register alignment rule does not apply.
9482 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
9483 if (ArgFlags.isVarArg() &&
9484 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
9485 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
9486 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
9487 // Skip 'odd' register if necessary.
9488 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
9489 State.AllocateReg(ArgGPRs);
9490 }
9491
9492 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
9493 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
9494 State.getPendingArgFlags();
9495
9496 assert(PendingLocs.size() == PendingArgFlags.size() &&
9497 "PendingLocs and PendingArgFlags out of sync");
9498
9499 // FPR32 and FPR64 alias each other.
9500 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
9501 UseGPRForFloat = true;
9502
9503 if (UseGPRForFloat && ValVT == MVT::f32) {
9504 LocVT = GRLenVT;
9505 LocInfo = CCValAssign::BCvt;
9506 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
9507 LocVT = MVT::i64;
9508 LocInfo = CCValAssign::BCvt;
9509 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
9510 // Handle passing f64 on LA32D with a soft float ABI or when floating point
9511 // registers are exhausted.
9512 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
9513 // Depending on available argument GPRS, f64 may be passed in a pair of
9514 // GPRs, split between a GPR and the stack, or passed completely on the
9515 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
9516 // cases.
9517 MCRegister Reg = allocateArgGPR(State);
9518 if (!Reg) {
9519 int64_t StackOffset = State.AllocateStack(8, Align(8));
9520 State.addLoc(
9521 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9522 return false;
9523 }
9524 LocVT = MVT::i32;
9525 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9526 MCRegister HiReg = allocateArgGPR(State);
9527 if (HiReg) {
9528 State.addLoc(
9529 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
9530 } else {
9531 int64_t StackOffset = State.AllocateStack(4, Align(4));
9532 State.addLoc(
9533 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9534 }
9535 return false;
9536 }
9537
9538 // Split arguments might be passed indirectly, so keep track of the pending
9539 // values.
9540 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
9541 LocVT = GRLenVT;
9542 LocInfo = CCValAssign::Indirect;
9543 PendingLocs.push_back(
9544 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
9545 PendingArgFlags.push_back(ArgFlags);
9546 if (!ArgFlags.isSplitEnd()) {
9547 return false;
9548 }
9549 }
9550
9551 // If the split argument only had two elements, it should be passed directly
9552 // in registers or on the stack.
9553 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
9554 PendingLocs.size() <= 2) {
9555 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
9556 // Apply the normal calling convention rules to the first half of the
9557 // split argument.
9558 CCValAssign VA = PendingLocs[0];
9559 ISD::ArgFlagsTy AF = PendingArgFlags[0];
9560 PendingLocs.clear();
9561 PendingArgFlags.clear();
9562 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
9563 ArgFlags);
9564 }
9565
9566 // Allocate to a register if possible, or else a stack slot.
9567 Register Reg;
9568 unsigned StoreSizeBytes = GRLen / 8;
9569 Align StackAlign = Align(GRLen / 8);
9570
9571 if (ValVT == MVT::f32 && !UseGPRForFloat) {
9572 Reg = State.AllocateReg(ArgFPR32s);
9573 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
9574 Reg = State.AllocateReg(ArgFPR64s);
9575 } else if (ValVT.is128BitVector()) {
9576 Reg = State.AllocateReg(ArgVRs);
9577 UseGPRForFloat = false;
9578 StoreSizeBytes = 16;
9579 StackAlign = Align(16);
9580 } else if (ValVT.is256BitVector()) {
9581 Reg = State.AllocateReg(ArgXRs);
9582 UseGPRForFloat = false;
9583 StoreSizeBytes = 32;
9584 StackAlign = Align(32);
9585 } else {
9586 Reg = allocateArgGPR(State);
9587 }
9588
9589 unsigned StackOffset =
9590 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
9591
9592 // If we reach this point and PendingLocs is non-empty, we must be at the
9593 // end of a split argument that must be passed indirectly.
9594 if (!PendingLocs.empty()) {
9595 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
9596 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
9597 for (auto &It : PendingLocs) {
9598 if (Reg)
9599 It.convertToReg(Reg);
9600 else
9601 It.convertToMem(StackOffset);
9602 State.addLoc(It);
9603 }
9604 PendingLocs.clear();
9605 PendingArgFlags.clear();
9606 return false;
9607 }
9608 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
9609 "Expected an GRLenVT at this stage");
9610
9611 if (Reg) {
9612 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9613 return false;
9614 }
9615
9616 // When a floating-point value is passed on the stack, no bit-cast is needed.
9617 if (ValVT.isFloatingPoint()) {
9618 LocVT = ValVT;
9619 LocInfo = CCValAssign::Full;
9620 }
9621
9622 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
9623 return false;
9624}
9625
9626void LoongArchTargetLowering::analyzeInputArgs(
9627 MachineFunction &MF, CCState &CCInfo,
9628 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
9629 LoongArchCCAssignFn Fn) const {
9630 FunctionType *FType = MF.getFunction().getFunctionType();
9631 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
9632 MVT ArgVT = Ins[i].VT;
9633 Type *ArgTy = nullptr;
9634 if (IsRet)
9635 ArgTy = FType->getReturnType();
9636 else if (Ins[i].isOrigArg())
9637 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
9639 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9640 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
9641 CCInfo, IsRet, ArgTy)) {
9642 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
9643 << '\n');
9644 llvm_unreachable("");
9645 }
9646 }
9647}
9648
9649void LoongArchTargetLowering::analyzeOutputArgs(
9650 MachineFunction &MF, CCState &CCInfo,
9651 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9652 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
9653 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9654 MVT ArgVT = Outs[i].VT;
9655 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9657 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9658 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
9659 CCInfo, IsRet, OrigTy)) {
9660 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
9661 << "\n");
9662 llvm_unreachable("");
9663 }
9664 }
9665}
9666
9667// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9668// values.
9670 const CCValAssign &VA, const SDLoc &DL) {
9671 switch (VA.getLocInfo()) {
9672 default:
9673 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9674 case CCValAssign::Full:
9676 break;
9677 case CCValAssign::BCvt:
9678 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9679 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
9680 else
9681 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9682 break;
9683 }
9684 return Val;
9685}
9686
9688 const CCValAssign &VA, const SDLoc &DL,
9689 const ISD::InputArg &In,
9690 const LoongArchTargetLowering &TLI) {
9693 EVT LocVT = VA.getLocVT();
9694 SDValue Val;
9695 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9696 Register VReg = RegInfo.createVirtualRegister(RC);
9697 RegInfo.addLiveIn(VA.getLocReg(), VReg);
9698 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9699
9700 // If input is sign extended from 32 bits, note it for the OptW pass.
9701 if (In.isOrigArg()) {
9702 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
9703 if (OrigArg->getType()->isIntegerTy()) {
9704 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
9705 // An input zero extended from i31 can also be considered sign extended.
9706 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
9707 (BitWidth < 32 && In.Flags.isZExt())) {
9710 LAFI->addSExt32Register(VReg);
9711 }
9712 }
9713 }
9714
9715 return convertLocVTToValVT(DAG, Val, VA, DL);
9716}
9717
9718// The caller is responsible for loading the full value if the argument is
9719// passed with CCValAssign::Indirect.
9721 const CCValAssign &VA, const SDLoc &DL) {
9723 MachineFrameInfo &MFI = MF.getFrameInfo();
9724 EVT ValVT = VA.getValVT();
9725 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9726 /*IsImmutable=*/true);
9727 SDValue FIN = DAG.getFrameIndex(
9729
9730 ISD::LoadExtType ExtType;
9731 switch (VA.getLocInfo()) {
9732 default:
9733 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9734 case CCValAssign::Full:
9736 case CCValAssign::BCvt:
9737 ExtType = ISD::NON_EXTLOAD;
9738 break;
9739 }
9740 return DAG.getExtLoad(
9741 ExtType, DL, VA.getLocVT(), Chain, FIN,
9743}
9744
9746 const CCValAssign &VA,
9747 const CCValAssign &HiVA,
9748 const SDLoc &DL) {
9749 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9750 "Unexpected VA");
9752 MachineFrameInfo &MFI = MF.getFrameInfo();
9754
9755 assert(VA.isRegLoc() && "Expected register VA assignment");
9756
9757 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9758 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9759 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9760 SDValue Hi;
9761 if (HiVA.isMemLoc()) {
9762 // Second half of f64 is passed on the stack.
9763 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
9764 /*IsImmutable=*/true);
9765 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9766 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9768 } else {
9769 // Second half of f64 is passed in another GPR.
9770 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9771 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
9772 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9773 }
9774 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
9775}
9776
9778 const CCValAssign &VA, const SDLoc &DL) {
9779 EVT LocVT = VA.getLocVT();
9780
9781 switch (VA.getLocInfo()) {
9782 default:
9783 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9784 case CCValAssign::Full:
9785 break;
9786 case CCValAssign::BCvt:
9787 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9788 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
9789 else
9790 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9791 break;
9792 }
9793 return Val;
9794}
9795
9796static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9797 CCValAssign::LocInfo LocInfo,
9798 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
9799 CCState &State) {
9800 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9801 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
9802 // s0 s1 s2 s3 s4 s5 s6 s7 s8
9803 static const MCPhysReg GPRList[] = {
9804 LoongArch::R23, LoongArch::R24, LoongArch::R25,
9805 LoongArch::R26, LoongArch::R27, LoongArch::R28,
9806 LoongArch::R29, LoongArch::R30, LoongArch::R31};
9807 if (MCRegister Reg = State.AllocateReg(GPRList)) {
9808 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9809 return false;
9810 }
9811 }
9812
9813 if (LocVT == MVT::f32) {
9814 // Pass in STG registers: F1, F2, F3, F4
9815 // fs0,fs1,fs2,fs3
9816 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9817 LoongArch::F26, LoongArch::F27};
9818 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
9819 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9820 return false;
9821 }
9822 }
9823
9824 if (LocVT == MVT::f64) {
9825 // Pass in STG registers: D1, D2, D3, D4
9826 // fs4,fs5,fs6,fs7
9827 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9828 LoongArch::F30_64, LoongArch::F31_64};
9829 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
9830 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9831 return false;
9832 }
9833 }
9834
9835 report_fatal_error("No registers left in GHC calling convention");
9836 return true;
9837}
9838
9839// Transform physical registers into virtual registers.
9841 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9842 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9843 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9844
9846
9847 switch (CallConv) {
9848 default:
9849 llvm_unreachable("Unsupported calling convention");
9850 case CallingConv::C:
9851 case CallingConv::Fast:
9854 break;
9855 case CallingConv::GHC:
9856 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
9857 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
9859 "GHC calling convention requires the F and D extensions");
9860 }
9861
9862 const Function &Func = MF.getFunction();
9863 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9864 MVT GRLenVT = Subtarget.getGRLenVT();
9865 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9866
9867 // Check if this function has any musttail calls. If so, incoming indirect
9868 // arg pointers must be saved in virtual registers so they survive across
9869 // basic blocks (the SelectionDAG is cleared between BBs). Only do this
9870 // when needed to avoid adding register pressure to non-musttail functions.
9871 bool HasMusttail = llvm::any_of(Func, [](const BasicBlock &BB) {
9872 return llvm::any_of(BB, [](const Instruction &I) {
9873 if (const auto *CI = dyn_cast<CallInst>(&I))
9874 return CI->isMustTailCall();
9875 return false;
9876 });
9877 });
9878 // Used with varargs to acumulate store chains.
9879 std::vector<SDValue> OutChains;
9880
9881 // Assign locations to all of the incoming arguments.
9883 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9884
9885 if (CallConv == CallingConv::GHC)
9887 else
9888 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
9889
9890 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9891 CCValAssign &VA = ArgLocs[i];
9892 SDValue ArgValue;
9893 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9894 // case.
9895 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9896 assert(VA.needsCustom());
9897 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
9898 } else if (VA.isRegLoc())
9899 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
9900 else
9901 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9902 if (VA.getLocInfo() == CCValAssign::Indirect) {
9903 // If the original argument was split and passed by reference, we need to
9904 // load all parts of it here (using the same address).
9905 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9907 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9908 if (HasMusttail) {
9911 Register VReg =
9912 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
9913 Chain = DAG.getCopyToReg(Chain, DL, VReg, ArgValue);
9914 LAFI->setIncomingIndirectArg(ArgIndex, VReg);
9915 }
9916 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9917 assert(ArgPartOffset == 0);
9918 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9919 CCValAssign &PartVA = ArgLocs[i + 1];
9920 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9921 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9922 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9923 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9925 ++i;
9926 ++InsIdx;
9927 }
9928 continue;
9929 }
9930 InVals.push_back(ArgValue);
9931 }
9932
9933 if (IsVarArg) {
9935 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9936 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9937 MachineFrameInfo &MFI = MF.getFrameInfo();
9938 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9939 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9940
9941 // Offset of the first variable argument from stack pointer, and size of
9942 // the vararg save area. For now, the varargs save area is either zero or
9943 // large enough to hold a0-a7.
9944 int VaArgOffset, VarArgsSaveSize;
9945
9946 // If all registers are allocated, then all varargs must be passed on the
9947 // stack and we don't need to save any argregs.
9948 if (ArgRegs.size() == Idx) {
9949 VaArgOffset = CCInfo.getStackSize();
9950 VarArgsSaveSize = 0;
9951 } else {
9952 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9953 VaArgOffset = -VarArgsSaveSize;
9954 }
9955
9956 // Record the frame index of the first variable argument
9957 // which is a value necessary to VASTART.
9958 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9959 LoongArchFI->setVarArgsFrameIndex(FI);
9960
9961 // If saving an odd number of registers then create an extra stack slot to
9962 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9963 // offsets to even-numbered registered remain 2*GRLen-aligned.
9964 if (Idx % 2) {
9965 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
9966 true);
9967 VarArgsSaveSize += GRLenInBytes;
9968 }
9969
9970 // Copy the integer registers that may have been used for passing varargs
9971 // to the vararg save area.
9972 for (unsigned I = Idx; I < ArgRegs.size();
9973 ++I, VaArgOffset += GRLenInBytes) {
9974 const Register Reg = RegInfo.createVirtualRegister(RC);
9975 RegInfo.addLiveIn(ArgRegs[I], Reg);
9976 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
9977 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9978 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9979 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9981 cast<StoreSDNode>(Store.getNode())
9982 ->getMemOperand()
9983 ->setValue((Value *)nullptr);
9984 OutChains.push_back(Store);
9985 }
9986 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9987 }
9988
9989 // All stores are grouped in one node to allow the matching between
9990 // the size of Ins and InVals. This only happens for vararg functions.
9991 if (!OutChains.empty()) {
9992 OutChains.push_back(Chain);
9993 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9994 }
9995
9996 return Chain;
9997}
9998
10000 return CI->isTailCall();
10001}
10002
10003// Check if the return value is used as only a return value, as otherwise
10004// we can't perform a tail-call.
10006 SDValue &Chain) const {
10007 if (N->getNumValues() != 1)
10008 return false;
10009 if (!N->hasNUsesOfValue(1, 0))
10010 return false;
10011
10012 SDNode *Copy = *N->user_begin();
10013 if (Copy->getOpcode() != ISD::CopyToReg)
10014 return false;
10015
10016 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
10017 // isn't safe to perform a tail call.
10018 if (Copy->getGluedNode())
10019 return false;
10020
10021 // The copy must be used by a LoongArchISD::RET, and nothing else.
10022 bool HasRet = false;
10023 for (SDNode *Node : Copy->users()) {
10024 if (Node->getOpcode() != LoongArchISD::RET)
10025 return false;
10026 HasRet = true;
10027 }
10028
10029 if (!HasRet)
10030 return false;
10031
10032 Chain = Copy->getOperand(0);
10033 return true;
10034}
10035
10036// Check whether the call is eligible for tail call optimization.
10037bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
10038 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
10039 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
10040
10041 auto CalleeCC = CLI.CallConv;
10042 auto &Outs = CLI.Outs;
10043 auto &Caller = MF.getFunction();
10044 auto CallerCC = Caller.getCallingConv();
10045
10046 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
10047
10048 // Byval parameters hand the function a pointer directly into the stack area
10049 // we want to reuse during a tail call. Working around this *is* possible
10050 // but less efficient and uglier in LowerCall. For musttail, there is no
10051 // workaround today: a byval arg requires a local copy that becomes invalid
10052 // after the tail call deallocates the caller's frame, so rejecting here
10053 // (and triggering reportFatalInternalError in LowerCall) is safer than
10054 // miscompiling.
10055 for (auto &Arg : Outs)
10056 if (Arg.Flags.isByVal())
10057 return false;
10058
10059 // musttail bypasses the remaining checks: the checks either reject cases
10060 // we handle specially (indirect args are forwarded via incoming pointers,
10061 // stack-passed args reuse the matching incoming layout, sret is forwarded
10062 // like any other pointer arg) or are optimizations not applicable to
10063 // mandatory tail calls.
10064 if (IsMustTail)
10065 return true;
10066
10067 // Do not tail call opt if the stack is used to pass parameters.
10068 if (CCInfo.getStackSize() != 0)
10069 return false;
10070
10071 // Do not tail call opt if any parameters need to be passed indirectly.
10072 for (auto &VA : ArgLocs)
10073 if (VA.getLocInfo() == CCValAssign::Indirect)
10074 return false;
10075
10076 // Do not tail call opt if either caller or callee uses struct return
10077 // semantics.
10078 auto IsCallerStructRet = Caller.hasStructRetAttr();
10079 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
10080 if (IsCallerStructRet || IsCalleeStructRet)
10081 return false;
10082
10083 // The callee has to preserve all registers the caller needs to preserve.
10084 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
10085 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
10086 if (CalleeCC != CallerCC) {
10087 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
10088 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
10089 return false;
10090 }
10091 return true;
10092}
10093
10095 return DAG.getDataLayout().getPrefTypeAlign(
10096 VT.getTypeForEVT(*DAG.getContext()));
10097}
10098
10099// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
10100// and output parameter nodes.
10101SDValue
10103 SmallVectorImpl<SDValue> &InVals) const {
10104 SelectionDAG &DAG = CLI.DAG;
10105 SDLoc &DL = CLI.DL;
10107 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
10109 SDValue Chain = CLI.Chain;
10110 SDValue Callee = CLI.Callee;
10111 CallingConv::ID CallConv = CLI.CallConv;
10112 bool IsVarArg = CLI.IsVarArg;
10113 EVT PtrVT = getPointerTy(DAG.getDataLayout());
10114 MVT GRLenVT = Subtarget.getGRLenVT();
10115 bool &IsTailCall = CLI.IsTailCall;
10116
10118
10119 // Analyze the operands of the call, assigning locations to each operand.
10121 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
10122
10123 if (CallConv == CallingConv::GHC)
10124 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
10125 else
10126 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
10127
10128 // Check if it's really possible to do a tail call.
10129 if (IsTailCall)
10130 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
10131
10132 if (IsTailCall)
10133 ++NumTailCalls;
10134 else if (CLI.CB && CLI.CB->isMustTailCall())
10135 report_fatal_error("failed to perform tail call elimination on a call "
10136 "site marked musttail");
10137
10138 // Get a count of how many bytes are to be pushed on the stack.
10139 unsigned NumBytes = ArgCCInfo.getStackSize();
10140
10141 // Create local copies for byval args.
10142 SmallVector<SDValue> ByValArgs;
10143 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10144 ISD::ArgFlagsTy Flags = Outs[i].Flags;
10145 if (!Flags.isByVal())
10146 continue;
10147
10148 SDValue Arg = OutVals[i];
10149 unsigned Size = Flags.getByValSize();
10150 Align Alignment = Flags.getNonZeroByValAlign();
10151
10152 int FI =
10153 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
10154 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
10155 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
10156
10157 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment, Alignment,
10158 /*IsVolatile=*/false,
10159 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
10161 ByValArgs.push_back(FIPtr);
10162 }
10163
10164 if (!IsTailCall)
10165 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
10166
10167 // Copy argument values to their designated locations.
10169 SmallVector<SDValue> MemOpChains;
10170 SDValue StackPtr;
10171 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
10172 ++i, ++OutIdx) {
10173 CCValAssign &VA = ArgLocs[i];
10174 SDValue ArgValue = OutVals[OutIdx];
10175 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
10176
10177 // Handle passing f64 on LA32D with a soft float ABI as a special case.
10178 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10179 assert(VA.isRegLoc() && "Expected register VA assignment");
10180 assert(VA.needsCustom());
10181 SDValue SplitF64 =
10182 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10183 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
10184 SDValue Lo = SplitF64.getValue(0);
10185 SDValue Hi = SplitF64.getValue(1);
10186
10187 Register RegLo = VA.getLocReg();
10188 RegsToPass.push_back(std::make_pair(RegLo, Lo));
10189
10190 // Get the CCValAssign for the Hi part.
10191 CCValAssign &HiVA = ArgLocs[++i];
10192
10193 if (HiVA.isMemLoc()) {
10194 // Second half of f64 is passed on the stack.
10195 if (!StackPtr.getNode())
10196 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
10198 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10199 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
10200 // Emit the store.
10201 MemOpChains.push_back(DAG.getStore(
10202 Chain, DL, Hi, Address,
10204 } else {
10205 // Second half of f64 is passed in another GPR.
10206 Register RegHigh = HiVA.getLocReg();
10207 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
10208 }
10209 continue;
10210 }
10211
10212 // Promote the value if needed.
10213 // For now, only handle fully promoted and indirect arguments.
10214 if (VA.getLocInfo() == CCValAssign::Indirect) {
10215 // For musttail calls, reuse incoming indirect pointers instead of
10216 // creating new stack temporaries. The incoming pointers point to the
10217 // caller's caller's frame, which remains valid after a tail call.
10218 if (IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
10221 unsigned CallArgIdx = Outs[OutIdx].OrigArgIndex;
10222
10223 // Resolve which formal parameter is being passed at this call
10224 // position.
10225 //
10226 // FIXME: Ins[].OrigArgIndex is Argument::getArgNo() (unfiltered),
10227 // but Outs[].OrigArgIndex is an index into a filtered arg list
10228 // (empty types removed, via CallLoweringInfo in the target-
10229 // independent layer). IncomingIndirectArgs is keyed by the
10230 // caller's unfiltered Argument::getArgNo(), so we have to walk
10231 // the caller's formals (same filter) to translate the index.
10232 // This target-independent asymmetry should be normalized so
10233 // backends do not need to re-derive the mapping.
10234 //
10235 // Steps:
10236 // 1. Find the call operand at filtered position CallArgIdx.
10237 // 2. If it is an Argument, use getArgNo() directly (same filter
10238 // for caller formals and call operands).
10239 // 3. Otherwise (computed value), walk the caller's formals and
10240 // skip empty types to map the filtered index to getArgNo().
10241 const Argument *FormalArg = nullptr;
10242 unsigned FilteredIdx = 0;
10243 for (const auto &CallArg : CLI.CB->args()) {
10244 if (CallArg->getType()->isEmptyTy())
10245 continue;
10246 if (FilteredIdx == CallArgIdx) {
10247 FormalArg = dyn_cast<Argument>(CallArg);
10248 break;
10249 }
10250 ++FilteredIdx;
10251 }
10252
10253 // For forwarded args, getArgNo() gives the unfiltered index directly.
10254 // For computed args, walk the caller's formals to resolve it.
10255 unsigned FormalArgIdx = CallArgIdx;
10256 if (FormalArg) {
10257 FormalArgIdx = FormalArg->getArgNo();
10258 } else {
10259 FilteredIdx = 0;
10260 for (const auto &Arg : MF.getFunction().args()) {
10261 if (Arg.getType()->isEmptyTy())
10262 continue;
10263 if (FilteredIdx == CallArgIdx) {
10264 FormalArgIdx = Arg.getArgNo();
10265 break;
10266 }
10267 ++FilteredIdx;
10268 }
10269 }
10270
10271 Register VReg = LAFI->getIncomingIndirectArg(FormalArgIdx);
10272 SDValue CopyOp = DAG.getCopyFromReg(Chain, DL, VReg, PtrVT);
10273 // Thread the CopyFromReg output chain through MemOpChains so the
10274 // TokenFactor below sequences the copy with any stores we emit
10275 // for this argument.
10276 MemOpChains.push_back(CopyOp.getValue(1));
10277 SDValue IncomingPtr = CopyOp;
10278
10279 if (!FormalArg) {
10280 // Computed value: store into the incoming indirect pointer for the
10281 // same-position formal parameter (musttail guarantees matching
10282 // prototypes, so types match). The pointer survives the tail call
10283 // since it points to the caller's caller's frame.
10284 //
10285 // The data-flow edge through IncomingPtr already prevents the
10286 // store from being scheduled before the CopyFromReg. Threading
10287 // CopyOp.getValue(1) (the copy's output chain) into the store
10288 // makes that ordering explicit on the chain edge as well, which
10289 // is the convention for memory ops chaining off their producers.
10290 MemOpChains.push_back(
10291 DAG.getStore(CopyOp.getValue(1), DL, ArgValue, IncomingPtr,
10293 // Store any split parts at their respective offsets.
10294 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10295 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10296 SDValue PartValue = OutVals[OutIdx + 1];
10297 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10298 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10299 SDValue Addr =
10300 DAG.getNode(ISD::ADD, DL, PtrVT, IncomingPtr, Offset);
10301 MemOpChains.push_back(
10302 DAG.getStore(CopyOp.getValue(1), DL, PartValue, Addr,
10304 ++i;
10305 ++OutIdx;
10306 }
10307 }
10308 ArgValue = IncomingPtr;
10309
10310 // Skip any remaining split parts (for forwarded args, they are
10311 // covered by the forwarded pointer).
10312 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == CallArgIdx) {
10313 ++i;
10314 ++OutIdx;
10315 }
10316 } else {
10317 // Store the argument in a stack slot and pass its address.
10318 Align StackAlign =
10319 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
10320 getPrefTypeAlign(ArgValue.getValueType(), DAG));
10321 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
10322 // If the original argument was split and passed by reference, we need
10323 // to store the required parts of it here (and pass just one address).
10324 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
10325 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
10326 assert(ArgPartOffset == 0);
10327 // Calculate the total size to store. We don't have access to what we're
10328 // actually storing other than performing the loop and collecting the
10329 // info.
10331 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
10332 SDValue PartValue = OutVals[OutIdx + 1];
10333 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
10334 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
10335 EVT PartVT = PartValue.getValueType();
10336 StoredSize += PartVT.getStoreSize();
10337 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
10338 Parts.push_back(std::make_pair(PartValue, Offset));
10339 ++i;
10340 ++OutIdx;
10341 }
10342 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
10343 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
10344 MemOpChains.push_back(
10345 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
10347 for (const auto &Part : Parts) {
10348 SDValue PartValue = Part.first;
10349 SDValue PartOffset = Part.second;
10351 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
10352 MemOpChains.push_back(
10353 DAG.getStore(Chain, DL, PartValue, Address,
10355 }
10356 ArgValue = SpillSlot;
10357 }
10358 } else {
10359 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
10360 }
10361
10362 // Use local copy if it is a byval arg.
10363 if (Flags.isByVal())
10364 ArgValue = ByValArgs[j++];
10365
10366 if (VA.isRegLoc()) {
10367 // Queue up the argument copies and emit them at the end.
10368 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
10369 } else {
10370 assert(VA.isMemLoc() && "Argument not register or memory");
10371 assert((!IsTailCall || (CLI.CB && CLI.CB->isMustTailCall())) &&
10372 "Tail call not allowed if stack is used for passing parameters");
10373
10374 // Work out the address of the stack slot.
10375 if (!StackPtr.getNode())
10376 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
10378 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
10380
10381 // Emit the store.
10382 MemOpChains.push_back(
10383 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
10384 }
10385 }
10386
10387 // Join the stores, which are independent of one another.
10388 if (!MemOpChains.empty())
10389 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
10390
10391 SDValue Glue;
10392
10393 // Build a sequence of copy-to-reg nodes, chained and glued together.
10394 for (auto &Reg : RegsToPass) {
10395 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
10396 Glue = Chain.getValue(1);
10397 }
10398
10399 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
10400 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
10401 // split it and then direct call can be matched by PseudoCALL_SMALL.
10403 const GlobalValue *GV = S->getGlobal();
10404 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
10407 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
10408 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
10409 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
10412 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
10413 }
10414
10415 // The first call operand is the chain and the second is the target address.
10417 Ops.push_back(Chain);
10418 Ops.push_back(Callee);
10419
10420 // Add argument registers to the end of the list so that they are
10421 // known live into the call.
10422 for (auto &Reg : RegsToPass)
10423 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
10424
10425 if (!IsTailCall) {
10426 // Add a register mask operand representing the call-preserved registers.
10427 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
10428 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
10429 assert(Mask && "Missing call preserved mask for calling convention");
10430 Ops.push_back(DAG.getRegisterMask(Mask));
10431 }
10432
10433 // Glue the call to the argument copies, if any.
10434 if (Glue.getNode())
10435 Ops.push_back(Glue);
10436
10437 // Emit the call.
10438 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
10439 unsigned Op;
10440 switch (DAG.getTarget().getCodeModel()) {
10441 default:
10442 report_fatal_error("Unsupported code model");
10443 case CodeModel::Small:
10444 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
10445 break;
10446 case CodeModel::Medium:
10447 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
10448 break;
10449 case CodeModel::Large:
10450 assert(Subtarget.is64Bit() && "Large code model requires LA64");
10451 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
10452 break;
10453 }
10454
10455 if (IsTailCall) {
10457 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
10458 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
10459 return Ret;
10460 }
10461
10462 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
10463 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
10464 Glue = Chain.getValue(1);
10465
10466 // Mark the end of the call, which is glued to the call itself.
10467 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
10468 Glue = Chain.getValue(1);
10469
10470 // Assign locations to each value returned by this call.
10472 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
10473 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
10474
10475 // Copy all of the result registers out of their specified physreg.
10476 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
10477 auto &VA = RVLocs[i];
10478 // Copy the value out.
10479 SDValue RetValue =
10480 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
10481 // Glue the RetValue to the end of the call sequence.
10482 Chain = RetValue.getValue(1);
10483 Glue = RetValue.getValue(2);
10484
10485 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10486 assert(VA.needsCustom());
10487 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
10488 MVT::i32, Glue);
10489 Chain = RetValue2.getValue(1);
10490 Glue = RetValue2.getValue(2);
10491 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
10492 RetValue, RetValue2);
10493 } else
10494 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
10495
10496 InVals.push_back(RetValue);
10497 }
10498
10499 return Chain;
10500}
10501
10503 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
10504 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
10505 const Type *RetTy) const {
10507 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
10508
10509 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
10510 LoongArchABI::ABI ABI =
10511 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
10512 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
10513 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
10514 return false;
10515 }
10516 return true;
10517}
10518
10520 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
10522 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
10523 SelectionDAG &DAG) const {
10524 // Stores the assignment of the return value to a location.
10526
10527 // Info about the registers and stack slot.
10528 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
10529 *DAG.getContext());
10530
10531 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
10532 nullptr, CC_LoongArch);
10533 if (CallConv == CallingConv::GHC && !RVLocs.empty())
10534 report_fatal_error("GHC functions return void only");
10535 SDValue Glue;
10536 SmallVector<SDValue, 4> RetOps(1, Chain);
10537
10538 // Copy the result values into the output registers.
10539 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
10540 SDValue Val = OutVals[OutIdx];
10541 CCValAssign &VA = RVLocs[i];
10542 assert(VA.isRegLoc() && "Can only return in registers!");
10543
10544 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
10545 // Handle returning f64 on LA32D with a soft float ABI.
10546 assert(VA.isRegLoc() && "Expected return via registers");
10547 assert(VA.needsCustom());
10548 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
10549 DAG.getVTList(MVT::i32, MVT::i32), Val);
10550 SDValue Lo = SplitF64.getValue(0);
10551 SDValue Hi = SplitF64.getValue(1);
10552 Register RegLo = VA.getLocReg();
10553 Register RegHi = RVLocs[++i].getLocReg();
10554
10555 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
10556 Glue = Chain.getValue(1);
10557 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
10558 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
10559 Glue = Chain.getValue(1);
10560 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
10561 } else {
10562 // Handle a 'normal' return.
10563 Val = convertValVTToLocVT(DAG, Val, VA, DL);
10564 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
10565
10566 // Guarantee that all emitted copies are stuck together.
10567 Glue = Chain.getValue(1);
10568 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
10569 }
10570 }
10571
10572 RetOps[0] = Chain; // Update chain.
10573
10574 // Add the glue node if we have it.
10575 if (Glue.getNode())
10576 RetOps.push_back(Glue);
10577
10578 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
10579}
10580
10581// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
10582// Note: The following prefixes are excluded:
10583// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
10584// as they can be represented using [x]vrepli.[whb]
10586 const APInt &SplatValue, const unsigned SplatBitSize) const {
10587 uint64_t RequiredImm = 0;
10588 uint64_t V = SplatValue.getZExtValue();
10589 if (SplatBitSize == 16 && !(V & 0x00FF)) {
10590 // 4'b0101
10591 RequiredImm = (0b10101 << 8) | (V >> 8);
10592 return {true, RequiredImm};
10593 } else if (SplatBitSize == 32) {
10594 // 4'b0001
10595 if (!(V & 0xFFFF00FF)) {
10596 RequiredImm = (0b10001 << 8) | (V >> 8);
10597 return {true, RequiredImm};
10598 }
10599 // 4'b0010
10600 if (!(V & 0xFF00FFFF)) {
10601 RequiredImm = (0b10010 << 8) | (V >> 16);
10602 return {true, RequiredImm};
10603 }
10604 // 4'b0011
10605 if (!(V & 0x00FFFFFF)) {
10606 RequiredImm = (0b10011 << 8) | (V >> 24);
10607 return {true, RequiredImm};
10608 }
10609 // 4'b0110
10610 if ((V & 0xFFFF00FF) == 0xFF) {
10611 RequiredImm = (0b10110 << 8) | (V >> 8);
10612 return {true, RequiredImm};
10613 }
10614 // 4'b0111
10615 if ((V & 0xFF00FFFF) == 0xFFFF) {
10616 RequiredImm = (0b10111 << 8) | (V >> 16);
10617 return {true, RequiredImm};
10618 }
10619 // 4'b1010
10620 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
10621 RequiredImm =
10622 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10623 return {true, RequiredImm};
10624 }
10625 } else if (SplatBitSize == 64) {
10626 // 4'b1011
10627 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
10628 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
10629 RequiredImm =
10630 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
10631 return {true, RequiredImm};
10632 }
10633 // 4'b1100
10634 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
10635 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
10636 RequiredImm =
10637 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
10638 return {true, RequiredImm};
10639 }
10640 // 4'b1001
10641 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
10642 uint8_t res = 0;
10643 for (int i = 0; i < 8; ++i) {
10644 uint8_t byte = x & 0xFF;
10645 if (byte == 0 || byte == 0xFF)
10646 res |= ((byte & 1) << i);
10647 else
10648 return {false, 0};
10649 x >>= 8;
10650 }
10651 return {true, res};
10652 };
10653 auto [IsSame, Suffix] = sameBitsPreByte(V);
10654 if (IsSame) {
10655 RequiredImm = (0b11001 << 8) | Suffix;
10656 return {true, RequiredImm};
10657 }
10658 }
10659 return {false, RequiredImm};
10660}
10661
10663 EVT VT) const {
10664 if (!Subtarget.hasExtLSX())
10665 return false;
10666
10667 if (VT == MVT::f32) {
10668 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
10669 return (masked == 0x3e000000 || masked == 0x40000000);
10670 }
10671
10672 if (VT == MVT::f64) {
10673 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
10674 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
10675 }
10676
10677 return false;
10678}
10679
10680bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
10681 bool ForCodeSize) const {
10682 // TODO: Maybe need more checks here after vector extension is supported.
10683 if (VT == MVT::f32 && !Subtarget.hasBasicF())
10684 return false;
10685 if (VT == MVT::f64 && !Subtarget.hasBasicD())
10686 return false;
10687 return (Imm.isZero() || Imm.isOne() || isFPImmVLDILegal(Imm, VT));
10688}
10689
10691 return true;
10692}
10693
10695 return true;
10696}
10697
10698bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
10699 const Instruction *I) const {
10700 if (!Subtarget.is64Bit())
10701 return isa<LoadInst>(I) || isa<StoreInst>(I);
10702
10703 if (isa<LoadInst>(I))
10704 return true;
10705
10706 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
10707 // require fences beacuse we can use amswap_db.[w/d].
10708 Type *Ty = I->getOperand(0)->getType();
10709 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
10710 unsigned Size = Ty->getIntegerBitWidth();
10711 return (Size == 8 || Size == 16);
10712 }
10713
10714 return false;
10715}
10716
10718 LLVMContext &Context,
10719 EVT VT) const {
10720 if (!VT.isVector())
10721 return getPointerTy(DL);
10723}
10724
10726 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
10727 // Do not merge to float value size (128 or 256 bits) if no implicit
10728 // float attribute is set.
10729 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
10730 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
10731 if (NoFloat)
10732 return MemVT.getSizeInBits() <= MaxIntSize;
10733
10734 // Make sure we don't merge greater than our maximum supported vector width.
10735 if (Subtarget.hasExtLASX())
10736 MaxIntSize = 256;
10737 else if (Subtarget.hasExtLSX())
10738 MaxIntSize = 128;
10739
10740 return MemVT.getSizeInBits() <= MaxIntSize;
10741}
10742
10744 EVT VT = Y.getValueType();
10745
10746 if (VT.isVector())
10747 return Subtarget.hasExtLSX() && VT.isInteger();
10748
10749 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
10750}
10751
10754 MachineFunction &MF, unsigned Intrinsic) const {
10755 switch (Intrinsic) {
10756 default:
10757 return;
10758 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
10759 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
10760 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
10761 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
10762 IntrinsicInfo Info;
10764 Info.memVT = MVT::i32;
10765 Info.ptrVal = I.getArgOperand(0);
10766 Info.offset = 0;
10767 Info.align = Align(4);
10770 Infos.push_back(Info);
10771 return;
10772 // TODO: Add more Intrinsics later.
10773 }
10774 }
10775}
10776
10777// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
10778// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
10779// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
10780// regression, we need to implement it manually.
10783
10785 Op == AtomicRMWInst::And) &&
10786 "Unable to expand");
10787 unsigned MinWordSize = 4;
10788
10789 IRBuilder<> Builder(AI);
10790 LLVMContext &Ctx = Builder.getContext();
10791 const DataLayout &DL = AI->getDataLayout();
10792 Type *ValueType = AI->getType();
10793 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
10794
10795 Value *Addr = AI->getPointerOperand();
10796 PointerType *PtrTy = cast<PointerType>(Addr->getType());
10797 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
10798
10799 Value *AlignedAddr = Builder.CreateIntrinsic(
10800 Intrinsic::ptrmask, {PtrTy, IntTy},
10801 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
10802 "AlignedAddr");
10803
10804 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
10805 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
10806 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
10807 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
10808 Value *Mask = Builder.CreateShl(
10809 ConstantInt::get(WordType,
10810 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
10811 ShiftAmt, "Mask");
10812 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
10813 Value *ValOperand_Shifted =
10814 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
10815 ShiftAmt, "ValOperand_Shifted");
10816 Value *NewOperand;
10817 if (Op == AtomicRMWInst::And)
10818 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
10819 else
10820 NewOperand = ValOperand_Shifted;
10821
10822 AtomicRMWInst *NewAI =
10823 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
10824 AI->getOrdering(), AI->getSyncScopeID());
10825
10826 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
10827 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
10828 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
10829 AI->replaceAllUsesWith(FinalOldResult);
10830 AI->eraseFromParent();
10831}
10832
10835 const AtomicRMWInst *AI) const {
10836 // TODO: Add more AtomicRMWInst that needs to be extended.
10837
10838 // Since floating-point operation requires a non-trivial set of data
10839 // operations, use CmpXChg to expand.
10840 if (AI->isFloatingPointOperation() ||
10846
10847 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
10850 AI->getOperation() == AtomicRMWInst::Sub)) {
10852 }
10853
10854 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10855 if (Subtarget.hasLAMCAS()) {
10856 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
10860 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
10862 }
10863
10864 if (Size == 8 || Size == 16)
10867}
10868
10869static Intrinsic::ID
10871 AtomicRMWInst::BinOp BinOp) {
10872 if (GRLen == 64) {
10873 switch (BinOp) {
10874 default:
10875 llvm_unreachable("Unexpected AtomicRMW BinOp");
10877 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
10878 case AtomicRMWInst::Add:
10879 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
10880 case AtomicRMWInst::Sub:
10881 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
10883 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
10885 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
10887 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
10888 case AtomicRMWInst::Max:
10889 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
10890 case AtomicRMWInst::Min:
10891 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
10892 // TODO: support other AtomicRMWInst.
10893 }
10894 }
10895
10896 if (GRLen == 32) {
10897 switch (BinOp) {
10898 default:
10899 llvm_unreachable("Unexpected AtomicRMW BinOp");
10901 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
10902 case AtomicRMWInst::Add:
10903 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
10904 case AtomicRMWInst::Sub:
10905 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
10907 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
10909 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
10911 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
10912 case AtomicRMWInst::Max:
10913 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
10914 case AtomicRMWInst::Min:
10915 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
10916 // TODO: support other AtomicRMWInst.
10917 }
10918 }
10919
10920 llvm_unreachable("Unexpected GRLen\n");
10921}
10922
10925 const AtomicCmpXchgInst *CI) const {
10926
10927 if (Subtarget.hasLAMCAS())
10929
10931 if (Size == 8 || Size == 16)
10934}
10935
10937 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10938 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10939 unsigned GRLen = Subtarget.getGRLen();
10940 AtomicOrdering FailOrd = CI->getFailureOrdering();
10941 Value *FailureOrdering =
10942 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
10943 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
10944 if (GRLen == 64) {
10945 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
10946 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10947 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10948 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10949 }
10950 Type *Tys[] = {AlignedAddr->getType()};
10951 Value *Result = Builder.CreateIntrinsic(
10952 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10953 if (GRLen == 64)
10954 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10955 return Result;
10956}
10957
10959 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10960 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10961 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10962 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10963 // mask, as this produces better code than the LL/SC loop emitted by
10964 // int_loongarch_masked_atomicrmw_xchg.
10965 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10968 if (CVal->isZero())
10969 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
10970 Builder.CreateNot(Mask, "Inv_Mask"),
10971 AI->getAlign(), Ord);
10972 if (CVal->isMinusOne())
10973 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
10974 AI->getAlign(), Ord);
10975 }
10976
10977 unsigned GRLen = Subtarget.getGRLen();
10978 Value *Ordering =
10979 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
10980 Type *Tys[] = {AlignedAddr->getType()};
10982 AI->getModule(),
10984
10985 if (GRLen == 64) {
10986 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10987 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10988 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10989 }
10990
10991 Value *Result;
10992
10993 // Must pass the shift amount needed to sign extend the loaded value prior
10994 // to performing a signed comparison for min/max. ShiftAmt is the number of
10995 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10996 // is the number of bits to left+right shift the value in order to
10997 // sign-extend.
10998 if (AI->getOperation() == AtomicRMWInst::Min ||
11000 const DataLayout &DL = AI->getDataLayout();
11001 unsigned ValWidth =
11002 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
11003 Value *SextShamt =
11004 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
11005 Result = Builder.CreateCall(LlwOpScwLoop,
11006 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
11007 } else {
11008 Result =
11009 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
11010 }
11011
11012 if (GRLen == 64)
11013 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
11014 return Result;
11015}
11016
11018 const MachineFunction &MF, EVT VT) const {
11019 VT = VT.getScalarType();
11020
11021 if (!VT.isSimple())
11022 return false;
11023
11024 switch (VT.getSimpleVT().SimpleTy) {
11025 case MVT::f32:
11026 case MVT::f64:
11027 return true;
11028 default:
11029 break;
11030 }
11031
11032 return false;
11033}
11034
11036 const Constant *PersonalityFn) const {
11037 return LoongArch::R4;
11038}
11039
11041 const Constant *PersonalityFn) const {
11042 return LoongArch::R5;
11043}
11044
11045//===----------------------------------------------------------------------===//
11046// Target Optimization Hooks
11047//===----------------------------------------------------------------------===//
11048
11050 const LoongArchSubtarget &Subtarget) {
11051 // Feature FRECIPE instrucions relative accuracy is 2^-14.
11052 // IEEE float has 23 digits and double has 52 digits.
11053 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
11054 return RefinementSteps;
11055}
11056
11057static bool
11059 assert(Subtarget.hasFrecipe() &&
11060 "Reciprocal estimate queried on unsupported target");
11061
11062 if (!VT.isSimple())
11063 return false;
11064
11065 switch (VT.getSimpleVT().SimpleTy) {
11066 case MVT::f32:
11067 // f32 is the base type for reciprocal estimate instructions.
11068 return true;
11069
11070 case MVT::f64:
11071 return Subtarget.hasBasicD();
11072
11073 case MVT::v4f32:
11074 case MVT::v2f64:
11075 return Subtarget.hasExtLSX();
11076
11077 case MVT::v8f32:
11078 case MVT::v4f64:
11079 return Subtarget.hasExtLASX();
11080
11081 default:
11082 return false;
11083 }
11084}
11085
11087 SelectionDAG &DAG, int Enabled,
11088 int &RefinementSteps,
11089 bool &UseOneConstNR,
11090 bool Reciprocal) const {
11092 "Enabled should never be Disabled here");
11093
11094 if (!Subtarget.hasFrecipe())
11095 return SDValue();
11096
11097 SDLoc DL(Operand);
11098 EVT VT = Operand.getValueType();
11099
11100 // Check supported types.
11101 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
11102 return SDValue();
11103
11104 // Handle refinement steps.
11105 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11106 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11107
11108 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
11109 UseOneConstNR = false;
11110 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
11111
11112 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
11113 // are needed (which rely on the reciprocal form), return the raw reciprocal
11114 // estimate.
11115 if (Reciprocal || RefinementSteps > 0)
11116 return Rsqrt;
11117
11118 // Otherwise, return sqrt(x) by multiplying with the operand.
11119 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
11120}
11121
11123 SelectionDAG &DAG,
11124 int Enabled,
11125 int &RefinementSteps) const {
11127 "Enabled should never be Disabled here");
11128
11129 if (!Subtarget.hasFrecipe())
11130 return SDValue();
11131
11132 SDLoc DL(Operand);
11133 EVT VT = Operand.getValueType();
11134
11135 // Check supported types.
11136 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
11137 return SDValue();
11138
11139 if (RefinementSteps == ReciprocalEstimate::Unspecified)
11140 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
11141
11142 // FRECIPE computes 1.0 / x.
11143 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
11144}
11145
11146//===----------------------------------------------------------------------===//
11147// LoongArch Inline Assembly Support
11148//===----------------------------------------------------------------------===//
11149
11151LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
11152 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
11153 //
11154 // 'f': A floating-point register (if available).
11155 // 'k': A memory operand whose address is formed by a base register and
11156 // (optionally scaled) index register.
11157 // 'l': A signed 16-bit constant.
11158 // 'm': A memory operand whose address is formed by a base register and
11159 // offset that is suitable for use in instructions with the same
11160 // addressing mode as st.w and ld.w.
11161 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
11162 // instruction)
11163 // 'I': A signed 12-bit constant (for arithmetic instructions).
11164 // 'J': Integer zero.
11165 // 'K': An unsigned 12-bit constant (for logic instructions).
11166 // "ZB": An address that is held in a general-purpose register. The offset is
11167 // zero.
11168 // "ZC": A memory operand whose address is formed by a base register and
11169 // offset that is suitable for use in instructions with the same
11170 // addressing mode as ll.w and sc.w.
11171 if (Constraint.size() == 1) {
11172 switch (Constraint[0]) {
11173 default:
11174 break;
11175 case 'f':
11176 case 'q':
11177 return C_RegisterClass;
11178 case 'l':
11179 case 'I':
11180 case 'J':
11181 case 'K':
11182 return C_Immediate;
11183 case 'k':
11184 return C_Memory;
11185 }
11186 }
11187
11188 if (Constraint == "ZC" || Constraint == "ZB")
11189 return C_Memory;
11190
11191 // 'm' is handled here.
11192 return TargetLowering::getConstraintType(Constraint);
11193}
11194
11195InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
11196 StringRef ConstraintCode) const {
11197 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
11201 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
11202}
11203
11204std::pair<unsigned, const TargetRegisterClass *>
11205LoongArchTargetLowering::getRegForInlineAsmConstraint(
11206 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
11207 // First, see if this is a constraint that directly corresponds to a LoongArch
11208 // register class.
11209 if (Constraint.size() == 1) {
11210 switch (Constraint[0]) {
11211 case 'r':
11212 // TODO: Support fixed vectors up to GRLen?
11213 if (VT.isVector())
11214 break;
11215 return std::make_pair(0U, &LoongArch::GPRRegClass);
11216 case 'q':
11217 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
11218 case 'f':
11219 if (Subtarget.hasBasicF() && VT == MVT::f32)
11220 return std::make_pair(0U, &LoongArch::FPR32RegClass);
11221 if (Subtarget.hasBasicD() && VT == MVT::f64)
11222 return std::make_pair(0U, &LoongArch::FPR64RegClass);
11223 if (Subtarget.hasExtLSX() &&
11224 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
11225 return std::make_pair(0U, &LoongArch::LSX128RegClass);
11226 if (Subtarget.hasExtLASX() &&
11227 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
11228 return std::make_pair(0U, &LoongArch::LASX256RegClass);
11229 break;
11230 default:
11231 break;
11232 }
11233 }
11234
11235 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
11236 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
11237 // constraints while the official register name is prefixed with a '$'. So we
11238 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
11239 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
11240 // case insensitive, so no need to convert the constraint to upper case here.
11241 //
11242 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
11243 // decode the usage of register name aliases into their official names. And
11244 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
11245 // official register names.
11246 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
11247 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
11248 bool IsFP = Constraint[2] == 'f';
11249 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
11250 std::pair<unsigned, const TargetRegisterClass *> R;
11252 TRI, join_items("", Temp.first, Temp.second), VT);
11253 // Match those names to the widest floating point register type available.
11254 if (IsFP) {
11255 unsigned RegNo = R.first;
11256 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
11257 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
11258 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
11259 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
11260 }
11261 }
11262 }
11263 return R;
11264 }
11265
11266 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
11267}
11268
11269void LoongArchTargetLowering::LowerAsmOperandForConstraint(
11270 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
11271 SelectionDAG &DAG) const {
11272 // Currently only support length 1 constraints.
11273 if (Constraint.size() == 1) {
11274 switch (Constraint[0]) {
11275 case 'l':
11276 // Validate & create a 16-bit signed immediate operand.
11277 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11278 uint64_t CVal = C->getSExtValue();
11279 if (isInt<16>(CVal))
11280 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
11281 Subtarget.getGRLenVT()));
11282 }
11283 return;
11284 case 'I':
11285 // Validate & create a 12-bit signed immediate operand.
11286 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11287 uint64_t CVal = C->getSExtValue();
11288 if (isInt<12>(CVal))
11289 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
11290 Subtarget.getGRLenVT()));
11291 }
11292 return;
11293 case 'J':
11294 // Validate & create an integer zero operand.
11295 if (auto *C = dyn_cast<ConstantSDNode>(Op))
11296 if (C->getZExtValue() == 0)
11297 Ops.push_back(
11298 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
11299 return;
11300 case 'K':
11301 // Validate & create a 12-bit unsigned immediate operand.
11302 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
11303 uint64_t CVal = C->getZExtValue();
11304 if (isUInt<12>(CVal))
11305 Ops.push_back(
11306 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
11307 }
11308 return;
11309 default:
11310 break;
11311 }
11312 }
11314}
11315
11316#define GET_REGISTER_MATCHER
11317#include "LoongArchGenAsmMatcher.inc"
11318
11321 const MachineFunction &MF) const {
11322 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
11323 std::string NewRegName = Name.second.str();
11324 Register Reg = MatchRegisterAltName(NewRegName);
11325 if (!Reg)
11326 Reg = MatchRegisterName(NewRegName);
11327 if (!Reg)
11328 return Reg;
11329 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
11330 if (!ReservedRegs.test(Reg))
11331 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
11332 StringRef(RegName) + "\"."));
11333 return Reg;
11334}
11335
11337 EVT VT, SDValue C) const {
11338 // TODO: Support vectors.
11339 if (!VT.isScalarInteger())
11340 return false;
11341
11342 // Omit the optimization if the data size exceeds GRLen.
11343 if (VT.getSizeInBits() > Subtarget.getGRLen())
11344 return false;
11345
11346 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
11347 const APInt &Imm = ConstNode->getAPIntValue();
11348 // Break MUL into (SLLI + ADD/SUB) or ALSL.
11349 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
11350 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
11351 return true;
11352 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
11353 if (ConstNode->hasOneUse() &&
11354 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
11355 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
11356 return true;
11357 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
11358 // in which the immediate has two set bits. Or Break (MUL x, imm)
11359 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
11360 // equals to (1 << s0) - (1 << s1).
11361 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
11362 unsigned Shifts = Imm.countr_zero();
11363 // Reject immediates which can be composed via a single LUI.
11364 if (Shifts >= 12)
11365 return false;
11366 // Reject multiplications can be optimized to
11367 // (SLLI (ALSL x, x, 1/2/3/4), s).
11368 APInt ImmPop = Imm.ashr(Shifts);
11369 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
11370 return false;
11371 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
11372 // since it needs one more instruction than other 3 cases.
11373 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
11374 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
11375 (ImmSmall - Imm).isPowerOf2())
11376 return true;
11377 }
11378 }
11379
11380 return false;
11381}
11382
11384 const AddrMode &AM,
11385 Type *Ty, unsigned AS,
11386 Instruction *I) const {
11387 // LoongArch has four basic addressing modes:
11388 // 1. reg
11389 // 2. reg + 12-bit signed offset
11390 // 3. reg + 14-bit signed offset left-shifted by 2
11391 // 4. reg1 + reg2
11392 // TODO: Add more checks after support vector extension.
11393
11394 // No global is ever allowed as a base.
11395 if (AM.BaseGV)
11396 return false;
11397
11398 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
11399 // with `UAL` feature.
11400 if (!isInt<12>(AM.BaseOffs) &&
11401 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
11402 return false;
11403
11404 switch (AM.Scale) {
11405 case 0:
11406 // "r+i" or just "i", depending on HasBaseReg.
11407 break;
11408 case 1:
11409 // "r+r+i" is not allowed.
11410 if (AM.HasBaseReg && AM.BaseOffs)
11411 return false;
11412 // Otherwise we have "r+r" or "r+i".
11413 break;
11414 case 2:
11415 // "2*r+r" or "2*r+i" is not allowed.
11416 if (AM.HasBaseReg || AM.BaseOffs)
11417 return false;
11418 // Allow "2*r" as "r+r".
11419 break;
11420 default:
11421 return false;
11422 }
11423
11424 return true;
11425}
11426
11428 return isInt<12>(Imm);
11429}
11430
11432 return isInt<12>(Imm);
11433}
11434
11436 // Zexts are free if they can be combined with a load.
11437 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
11438 // poorly with type legalization of compares preferring sext.
11439 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
11440 EVT MemVT = LD->getMemoryVT();
11441 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
11442 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11443 LD->getExtensionType() == ISD::ZEXTLOAD))
11444 return true;
11445 }
11446
11447 return TargetLowering::isZExtFree(Val, VT2);
11448}
11449
11451 EVT DstVT) const {
11452 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
11453}
11454
11456 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
11457}
11458
11460 // TODO: Support vectors.
11461 if (Y.getValueType().isVector())
11462 return false;
11463
11464 return !isa<ConstantSDNode>(Y);
11465}
11466
11468 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
11469 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
11470}
11471
11473 Type *Ty, bool IsSigned) const {
11474 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
11475 return true;
11476
11477 return IsSigned;
11478}
11479
11481 // Return false to suppress the unnecessary extensions if the LibCall
11482 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
11483 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
11484 Type.getSizeInBits() < Subtarget.getGRLen()))
11485 return false;
11486 return true;
11487}
11488
11489// memcpy, and other memory intrinsics, typically tries to use wider load/store
11490// if the source/dest is aligned and the copy size is large enough. We therefore
11491// want to align such objects passed to memory intrinsics.
11493 unsigned &MinSize,
11494 Align &PrefAlign) const {
11495 if (!isa<MemIntrinsic>(CI))
11496 return false;
11497
11498 if (Subtarget.is64Bit()) {
11499 MinSize = 8;
11500 PrefAlign = Align(8);
11501 } else {
11502 MinSize = 4;
11503 PrefAlign = Align(4);
11504 }
11505
11506 return true;
11507}
11508
11511 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
11512 VT.getVectorElementType() != MVT::i1)
11513 return TypeWidenVector;
11514
11516}
11517
11518bool LoongArchTargetLowering::splitValueIntoRegisterParts(
11519 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
11520 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
11521 bool IsABIRegCopy = CC.has_value();
11522 EVT ValueVT = Val.getValueType();
11523
11524 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11525 PartVT == MVT::f32) {
11526 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
11527 // nan, and cast to f32.
11528 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
11529 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
11530 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
11531 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
11532 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
11533 Parts[0] = Val;
11534 return true;
11535 }
11536
11537 return false;
11538}
11539
11540SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
11541 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
11542 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
11543 bool IsABIRegCopy = CC.has_value();
11544
11545 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
11546 PartVT == MVT::f32) {
11547 SDValue Val = Parts[0];
11548
11549 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
11550 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
11551 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
11552 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
11553 return Val;
11554 }
11555
11556 return SDValue();
11557}
11558
11559MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
11560 CallingConv::ID CC,
11561 EVT VT) const {
11562 // Use f32 to pass f16.
11563 if (VT == MVT::f16 && Subtarget.hasBasicF())
11564 return MVT::f32;
11565
11567}
11568
11569unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
11570 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
11571 // Use f32 to pass f16.
11572 if (VT == MVT::f16 && Subtarget.hasBasicF())
11573 return 1;
11574
11576}
11577
11579 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
11580 const SelectionDAG &DAG, unsigned Depth) const {
11581 unsigned Opc = Op.getOpcode();
11582 Known.resetAll();
11583 switch (Opc) {
11584 default:
11585 break;
11586 case LoongArchISD::VPICK_ZEXT_ELT: {
11587 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
11588 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
11589 unsigned VTBits = VT.getScalarSizeInBits();
11590 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
11591 Known.Zero.setBitsFrom(VTBits);
11592 break;
11593 }
11594 }
11595}
11596
11598 SDValue Op, const APInt &OriginalDemandedBits,
11599 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
11600 unsigned Depth) const {
11601 EVT VT = Op.getValueType();
11602 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
11603 unsigned Opc = Op.getOpcode();
11604 switch (Opc) {
11605 default:
11606 break;
11607 case LoongArchISD::CRC_W_B_W:
11608 case LoongArchISD::CRC_W_H_W:
11609 case LoongArchISD::CRCC_W_B_W:
11610 case LoongArchISD::CRCC_W_H_W: {
11611 KnownBits KnownSrc;
11612 APInt DemandedSrcBits =
11613 APInt::getLowBitsSet(BitWidth, (Opc == LoongArchISD::CRC_W_B_W ||
11614 Opc == LoongArchISD::CRCC_W_B_W)
11615 ? 8
11616 : 16);
11617 return SimplifyDemandedBits(Op.getOperand(1), DemandedSrcBits,
11618 OriginalDemandedElts, KnownSrc, TLO, Depth + 1);
11619 }
11620 case LoongArchISD::VMSKLTZ:
11621 case LoongArchISD::XVMSKLTZ: {
11622 SDValue Src = Op.getOperand(0);
11623 MVT SrcVT = Src.getSimpleValueType();
11624 unsigned SrcBits = SrcVT.getScalarSizeInBits();
11625 unsigned NumElts = SrcVT.getVectorNumElements();
11626
11627 // If we don't need the sign bits at all just return zero.
11628 if (OriginalDemandedBits.countr_zero() >= NumElts)
11629 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
11630
11631 // Only demand the vector elements of the sign bits we need.
11632 APInt KnownUndef, KnownZero;
11633 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
11634 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
11635 TLO, Depth + 1))
11636 return true;
11637
11638 Known.Zero = KnownZero.zext(BitWidth);
11639 Known.Zero.setHighBits(BitWidth - NumElts);
11640
11641 // [X]VMSKLTZ only uses the MSB from each vector element.
11642 KnownBits KnownSrc;
11643 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
11644 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
11645 Depth + 1))
11646 return true;
11647
11648 if (KnownSrc.One[SrcBits - 1])
11649 Known.One.setLowBits(NumElts);
11650 else if (KnownSrc.Zero[SrcBits - 1])
11651 Known.Zero.setLowBits(NumElts);
11652
11653 // Attempt to avoid multi-use ops if we don't need anything from it.
11655 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
11656 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
11657 return false;
11658 }
11659 }
11660
11662 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
11663}
11664
11666 unsigned Opc = VecOp.getOpcode();
11667
11668 // Assume target opcodes can't be scalarized.
11669 // TODO - do we have any exceptions?
11670 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
11671 return false;
11672
11673 // If the vector op is not supported, try to convert to scalar.
11674 EVT VecVT = VecOp.getValueType();
11676 return true;
11677
11678 // If the vector op is supported, but the scalar op is not, the transform may
11679 // not be worthwhile.
11680 EVT ScalarVT = VecVT.getScalarType();
11681 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
11682}
11683
11685 unsigned Index) const {
11687 return false;
11688
11689 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
11690 return Index == 0;
11691}
11692
11694 unsigned Index) const {
11695 EVT EltVT = VT.getScalarType();
11696
11697 // Extract a scalar FP value from index 0 of a vector is free.
11698 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
11699}
11700
11702 const MachineFunction &MF) const {
11703
11704 // If the function specifically requests inline stack probes, emit them.
11705 if (MF.getFunction().hasFnAttribute("probe-stack"))
11706 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11707 "inline-asm";
11708
11709 return false;
11710}
11711
11713 Align StackAlign) const {
11714 // The default stack probe size is 4096 if the function has no
11715 // stack-probe-size attribute.
11716 const Function &Fn = MF.getFunction();
11717 unsigned StackProbeSize =
11718 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
11719 // Round down to the stack alignment.
11720 StackProbeSize = alignDown(StackProbeSize, StackAlign.value());
11721 return StackProbeSize ? StackProbeSize : StackAlign.value();
11722}
11723
11724SDValue
11725LoongArchTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
11726 SelectionDAG &DAG) const {
11728 if (!hasInlineStackProbe(MF))
11729 return SDValue();
11730
11731 const MVT GRLenVT = Subtarget.getGRLenVT();
11732 // Get the inputs.
11733 SDValue Chain = Op.getOperand(0);
11734 SDValue Size = Op.getOperand(1);
11735
11736 const MaybeAlign Align =
11737 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11738 const SDLoc dl(Op);
11739 const EVT VT = Op.getValueType();
11740
11741 // Construct the new SP value in a GPR.
11742 SDValue SP = DAG.getCopyFromReg(Chain, dl, LoongArch::R3, GRLenVT);
11743 Chain = SP.getValue(1);
11744 SP = DAG.getNode(ISD::SUB, dl, GRLenVT, SP, Size);
11745 if (Align)
11746 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11747 DAG.getSignedConstant(-Align->value(), dl, VT));
11748
11749 // Set the real SP to the new value with a probing loop.
11750 Chain = DAG.getNode(LoongArchISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
11751 return DAG.getMergeValues({SP, Chain}, dl);
11752}
11753
11756 MachineBasicBlock *MBB) const {
11757 MachineFunction &MF = *MBB->getParent();
11758 MachineBasicBlock::iterator MBBI = MI.getIterator();
11759 DebugLoc DL = MBB->findDebugLoc(MBBI);
11760 const Register TargetReg = MI.getOperand(0).getReg();
11761
11762 const LoongArchInstrInfo *TII = Subtarget.getInstrInfo();
11763 const bool IsLA64 = Subtarget.is64Bit();
11764 const Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
11765 const LoongArchTargetLowering *TLI = Subtarget.getTargetLowering();
11766 const uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
11767
11768 MachineFunction::iterator MBBInsertPoint = std::next(MBB->getIterator());
11769 MachineBasicBlock *const LoopTestMBB =
11770 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11771 MF.insert(MBBInsertPoint, LoopTestMBB);
11772 MachineBasicBlock *const ExitMBB =
11773 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
11774 MF.insert(MBBInsertPoint, ExitMBB);
11775 const Register SPReg = LoongArch::R3;
11776 const Register ScratchReg =
11777 MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
11778
11779 // ScratchReg = ProbeSize
11780 TII->movImm(*MBB, MBBI, DL, ScratchReg, ProbeSize, MachineInstr::NoFlags);
11781
11782 // LoopTest:
11783 // sub.{w/d} $sp, $sp, ScratchReg
11784 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11785 TII->get(IsLA64 ? LoongArch::SUB_D : LoongArch::SUB_W), SPReg)
11786 .addReg(SPReg)
11787 .addReg(ScratchReg);
11788
11789 // st.{w/d} $zero, $sp, 0
11790 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
11791 TII->get(IsLA64 ? LoongArch::ST_D : LoongArch::ST_W))
11792 .addReg(LoongArch::R0)
11793 .addReg(SPReg)
11794 .addImm(0);
11795
11796 // bltu TargetReg, $sp, LoopTest
11797 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(LoongArch::BLTU))
11798 .addReg(TargetReg)
11799 .addReg(SPReg)
11800 .addMBB(LoopTestMBB);
11801
11802 // move $sp, TargetReg
11803 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(LoongArch::OR), SPReg)
11804 .addReg(TargetReg)
11805 .addReg(LoongArch::R0);
11806
11807 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
11809
11810 LoopTestMBB->addSuccessor(ExitMBB);
11811 LoopTestMBB->addSuccessor(LoopTestMBB);
11812 MBB->addSuccessor(LoopTestMBB);
11813
11814 MI.eraseFromParent();
11815 MF.getInfo<LoongArchMachineFunctionInfo>()->setDynamicAllocation();
11816 return ExitMBB->begin()->getParent();
11817}
static MCRegister MatchRegisterName(StringRef Name)
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:856
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static bool isSigned(unsigned Opcode)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue performUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performHorizWideningCombine(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static SDValue ExtendSrcToDst(SDNode *N, SelectionDAG &DAG, unsigned ExtendOp)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static SDValue performDemandedBitsCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static bool buildVPERMIInfo(ArrayRef< int > Mask, SDValue V1, SDValue V2, SmallVectorImpl< SDValue > &SrcVec, unsigned &MaskImm)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue matchLowHalfOf128BitLanes(SDValue N)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue performFP_TO_INTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue matchDeinterleaveBuildVector(SDValue N, unsigned &StartIndex)
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isZero() const
Definition APFloat.h:1540
LLVM_READONLY bool isOne() const
Definition APFloat.h:1622
APInt bitcastToAPInt() const
Definition APFloat.h:1436
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Definition Argument.h:50
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
size_type count() const
Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:501
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:126
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
iterator_range< arg_iterator > args()
Definition Function.h:866
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:758
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:770
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:353
Argument * getArg(unsigned i) const
Definition Function.h:860
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
void setIncomingIndirectArg(unsigned ArgIndex, Register Reg)
Register getIncomingIndirectArg(unsigned ArgIndex) const
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool hasInlineStackProbe(const MachineFunction &MF) const override
True if stack clash protection is enabled for this function.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
MachineBasicBlock * emitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:736
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:827
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:787
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:888
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:918
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SET_ROUNDING
Set rounding mode.
Definition ISDOpcodes.h:983
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:804
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:819
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:986
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:813
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:934
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:967
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:929
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:841
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:573
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Next
Definition InstrProf.h:147
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...