LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
357 }
358 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
360 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
362 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
365 }
366 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
374 VT, Expand);
382 }
384 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
385 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
386 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
388
389 for (MVT VT :
390 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
391 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
401 }
404 // We want to legalize this to an f64 load rather than an i64 load.
405 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
406 }
407
408 // Set operations for 'LASX' feature.
409
410 if (Subtarget.hasExtLASX()) {
411 for (MVT VT : LASXVTs) {
415
421
425 }
426 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
429 Legal);
431 VT, Legal);
438 Expand);
454 }
455 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
457 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
459 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
462 }
463 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
471 VT, Expand);
479 }
482 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16}) {
485 }
486 for (MVT VT :
487 {MVT::v2i64, MVT::v4i32, MVT::v4i64, MVT::v8i16, MVT::v8i32}) {
490 }
491 }
492
493 // Set DAG combine for LA32 and LA64.
494 if (Subtarget.hasBasicF()) {
496 }
497
502
503 // Set DAG combine for 'LSX' feature.
504
505 if (Subtarget.hasExtLSX()) {
510 }
511
512 // Set DAG combine for 'LASX' feature.
513 if (Subtarget.hasExtLASX()) {
518 }
519
520 // Compute derived properties from the register classes.
521 computeRegisterProperties(Subtarget.getRegisterInfo());
522
524
527
528 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
529
531
532 // Function alignments.
534 // Set preferred alignments.
535 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
536 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
537 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
538
539 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
540 if (Subtarget.hasLAMCAS())
542
543 if (Subtarget.hasSCQ()) {
546 }
547
548 // Disable strict node mutation.
549 IsStrictFPEnabled = true;
550}
551
553 const GlobalAddressSDNode *GA) const {
554 // In order to maximise the opportunity for common subexpression elimination,
555 // keep a separate ADD node for the global address offset instead of folding
556 // it in the global address node. Later peephole optimisations may choose to
557 // fold it back in when profitable.
558 return false;
559}
560
562 SelectionDAG &DAG) const {
563 switch (Op.getOpcode()) {
565 return lowerATOMIC_FENCE(Op, DAG);
567 return lowerEH_DWARF_CFA(Op, DAG);
569 return lowerGlobalAddress(Op, DAG);
571 return lowerGlobalTLSAddress(Op, DAG);
573 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
575 return lowerINTRINSIC_W_CHAIN(Op, DAG);
577 return lowerINTRINSIC_VOID(Op, DAG);
579 return lowerBlockAddress(Op, DAG);
580 case ISD::JumpTable:
581 return lowerJumpTable(Op, DAG);
582 case ISD::SHL_PARTS:
583 return lowerShiftLeftParts(Op, DAG);
584 case ISD::SRA_PARTS:
585 return lowerShiftRightParts(Op, DAG, true);
586 case ISD::SRL_PARTS:
587 return lowerShiftRightParts(Op, DAG, false);
589 return lowerConstantPool(Op, DAG);
590 case ISD::FP_TO_SINT:
591 return lowerFP_TO_SINT(Op, DAG);
592 case ISD::BITCAST:
593 return lowerBITCAST(Op, DAG);
594 case ISD::UINT_TO_FP:
595 return lowerUINT_TO_FP(Op, DAG);
596 case ISD::SINT_TO_FP:
597 return lowerSINT_TO_FP(Op, DAG);
598 case ISD::VASTART:
599 return lowerVASTART(Op, DAG);
600 case ISD::FRAMEADDR:
601 return lowerFRAMEADDR(Op, DAG);
602 case ISD::RETURNADDR:
603 return lowerRETURNADDR(Op, DAG);
605 return lowerWRITE_REGISTER(Op, DAG);
607 return lowerINSERT_VECTOR_ELT(Op, DAG);
609 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
611 return lowerBUILD_VECTOR(Op, DAG);
613 return lowerCONCAT_VECTORS(Op, DAG);
615 return lowerVECTOR_SHUFFLE(Op, DAG);
616 case ISD::BITREVERSE:
617 return lowerBITREVERSE(Op, DAG);
619 return lowerSCALAR_TO_VECTOR(Op, DAG);
620 case ISD::PREFETCH:
621 return lowerPREFETCH(Op, DAG);
622 case ISD::SELECT:
623 return lowerSELECT(Op, DAG);
624 case ISD::BRCOND:
625 return lowerBRCOND(Op, DAG);
626 case ISD::FP_TO_FP16:
627 return lowerFP_TO_FP16(Op, DAG);
628 case ISD::FP16_TO_FP:
629 return lowerFP16_TO_FP(Op, DAG);
630 case ISD::FP_TO_BF16:
631 return lowerFP_TO_BF16(Op, DAG);
632 case ISD::BF16_TO_FP:
633 return lowerBF16_TO_FP(Op, DAG);
635 return lowerVECREDUCE_ADD(Op, DAG);
636 case ISD::ROTL:
637 case ISD::ROTR:
638 return lowerRotate(Op, DAG);
646 return lowerVECREDUCE(Op, DAG);
647 case ISD::ConstantFP:
648 return lowerConstantFP(Op, DAG);
649 case ISD::SETCC:
650 return lowerSETCC(Op, DAG);
651 case ISD::FP_ROUND:
652 return lowerFP_ROUND(Op, DAG);
653 case ISD::FP_EXTEND:
654 return lowerFP_EXTEND(Op, DAG);
655 }
656 return SDValue();
657}
658
659// Helper to attempt to return a cheaper, bit-inverted version of \p V.
661 // TODO: don't always ignore oneuse constraints.
662 V = peekThroughBitcasts(V);
663 EVT VT = V.getValueType();
664
665 // Match not(xor X, -1) -> X.
666 if (V.getOpcode() == ISD::XOR &&
667 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
668 isAllOnesConstant(V.getOperand(1))))
669 return V.getOperand(0);
670
671 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
672 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
673 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
674 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
675 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
676 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
677 V.getOperand(1));
678 }
679 }
680
681 // Match not(SplatVector(not(X)) -> SplatVector(X).
682 if (V.getOpcode() == ISD::BUILD_VECTOR) {
683 if (SDValue SplatValue =
684 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
685 if (!V->isOnlyUserOf(SplatValue.getNode()))
686 return SDValue();
687
688 if (SDValue Not = isNOT(SplatValue, DAG)) {
689 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
690 return DAG.getSplat(VT, SDLoc(Not), Not);
691 }
692 }
693 }
694
695 // Match not(or(not(X),not(Y))) -> and(X, Y).
696 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
697 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
698 // TODO: Handle cases with single NOT operand -> VANDN
699 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
700 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
701 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
702 DAG.getBitcast(VT, Op1));
703 }
704
705 // TODO: Add more matching patterns. Such as,
706 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
707 // not(slt(C, X)) -> slt(X - 1, C)
708 return SDValue();
709}
710
711// Combine two ISD::FP_ROUND / LoongArchISD::VFCVT nodes with same type to
712// LoongArchISD::VFCVT. For example:
713// x1 = fp_round x, 0
714// y1 = fp_round y, 0
715// z = concat_vectors x1, y1
716// Or
717// x1 = LoongArch::VFCVT undef, x
718// y1 = LoongArch::VFCVT undef, y
719// z = LoongArchISD::VPACKEV y1, x1; or LoongArchISD::VPERMI y1, x1, 68
720// can be combined to:
721// z = LoongArch::VFCVT y, x
723 const LoongArchSubtarget &Subtarget) {
724 assert(((N->getOpcode() == ISD::CONCAT_VECTORS && N->getNumOperands() == 2) ||
725 (N->getOpcode() == LoongArchISD::VPACKEV) ||
726 (N->getOpcode() == LoongArchISD::VPERMI)) &&
727 "Invalid Node");
728
729 SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
730 SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
731 unsigned Opcode0 = Op0.getOpcode();
732 unsigned Opcode1 = Op1.getOpcode();
733 if (Opcode0 != Opcode1)
734 return SDValue();
735
736 if (Opcode0 != ISD::FP_ROUND && Opcode0 != LoongArchISD::VFCVT)
737 return SDValue();
738
739 // Check if two nodes have only one use.
740 if (!Op0.hasOneUse() || !Op1.hasOneUse())
741 return SDValue();
742
743 EVT VT = N.getValueType();
744 EVT SVT0 = Op0.getValueType();
745 EVT SVT1 = Op1.getValueType();
746 // Check if two nodes have the same result type.
747 if (SVT0 != SVT1)
748 return SDValue();
749
750 // Check if two nodes have the same operand type.
751 EVT SSVT0 = Op0.getOperand(0).getValueType();
752 EVT SSVT1 = Op1.getOperand(0).getValueType();
753 if (SSVT0 != SSVT1)
754 return SDValue();
755
756 if (N->getOpcode() == ISD::CONCAT_VECTORS && Opcode0 == ISD::FP_ROUND) {
757 if (Subtarget.hasExtLASX() && VT.is256BitVector() && SVT0 == MVT::v4f32 &&
758 SSVT0 == MVT::v4f64) {
759 // A vector_shuffle is required in the final step, as xvfcvt instruction
760 // operates on each 128-bit segament as a lane.
761 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v8f32,
762 Op1.getOperand(0), Op0.getOperand(0));
763 SDValue Undef = DAG.getUNDEF(Res.getValueType());
764 // After VFCVT, the high part of Res comes from the high parts of Op0 and
765 // Op1, and the low part comes from the low parts of Op0 and Op1. However,
766 // the desired order requires Op0 to fully occupy the lower half and Op1
767 // the upper half of Res. The Mask reorders the elements of Res to achieve
768 // this:
769 // - The first four elements (0, 1, 4, 5) come from Op0.
770 // - The next four elements (2, 3, 6, 7) come from Op1.
771 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
772 Res = DAG.getVectorShuffle(Res.getValueType(), DL, Res, Undef, Mask);
773 return DAG.getBitcast(VT, Res);
774 }
775 }
776
777 if ((N->getOpcode() == LoongArchISD::VPACKEV ||
778 N->getOpcode() == LoongArchISD::VPERMI) &&
779 Opcode0 == LoongArchISD::VFCVT) {
780 // For VPACKEV or VPERMI, check if the first operation of VFCVT is undef.
781 if (!Op0.getOperand(0).isUndef() || !Op1.getOperand(0).isUndef())
782 return SDValue();
783
784 if (!Subtarget.hasExtLSX() || SVT0 != MVT::v4f32 || SSVT0 != MVT::v2f64)
785 return SDValue();
786
787 if (N->getOpcode() == LoongArchISD::VPACKEV &&
788 (VT == MVT::v2i64 || VT == MVT::v2f64)) {
789 SDValue Res = DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32,
790 Op0.getOperand(1), Op1.getOperand(1));
791 return DAG.getBitcast(VT, Res);
792 }
793
794 if (N->getOpcode() == LoongArchISD::VPERMI && VT == MVT::v4f32) {
795 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
796 if (Imm != 68)
797 return SDValue();
798 return DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Op0.getOperand(1),
799 Op1.getOperand(1));
800 }
801 }
802
803 return SDValue();
804}
805
806SDValue LoongArchTargetLowering::lowerFP_ROUND(SDValue Op,
807 SelectionDAG &DAG) const {
808 SDLoc DL(Op);
809 SDValue In = Op.getOperand(0);
810 MVT VT = Op.getSimpleValueType();
811 MVT SVT = In.getSimpleValueType();
812
813 if (VT == MVT::v4f32 && SVT == MVT::v4f64) {
814 SDValue Lo, Hi;
815 std::tie(Lo, Hi) = DAG.SplitVector(In, DL);
816 return DAG.getNode(LoongArchISD::VFCVT, DL, VT, Hi, Lo);
817 }
818
819 return SDValue();
820}
821
822SDValue LoongArchTargetLowering::lowerFP_EXTEND(SDValue Op,
823 SelectionDAG &DAG) const {
824
825 SDLoc DL(Op);
826 EVT VT = Op.getValueType();
827 SDValue Src = Op->getOperand(0);
828 EVT SVT = Src.getValueType();
829
830 bool V2F32ToV2F64 =
831 VT == MVT::v2f64 && SVT == MVT::v2f32 && Subtarget.hasExtLSX();
832 bool V4F32ToV4F64 =
833 VT == MVT::v4f64 && SVT == MVT::v4f32 && Subtarget.hasExtLASX();
834 if (!V2F32ToV2F64 && !V4F32ToV4F64)
835 return SDValue();
836
837 // Check if Op is the high part of vector.
838 auto CheckVecHighPart = [](SDValue Op) {
840 if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
841 SDValue SOp = Op.getOperand(0);
842 EVT SVT = SOp.getValueType();
843 if (!SVT.isVector() || (SVT.getVectorNumElements() % 2 != 0))
844 return SDValue();
845
846 const uint64_t Imm = Op.getConstantOperandVal(1);
847 if (Imm == SVT.getVectorNumElements() / 2)
848 return SOp;
849 return SDValue();
850 }
851 return SDValue();
852 };
853
854 unsigned Opcode;
855 SDValue VFCVTOp;
856 EVT WideOpVT = SVT.getSimpleVT().getDoubleNumVectorElementsVT();
857 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
858
859 // If the operand of ISD::FP_EXTEND comes from the high part of vector,
860 // generate LoongArchISD::VFCVTH, otherwise LoongArchISD::VFCVTL.
861 if (SDValue V = CheckVecHighPart(Src)) {
862 assert(V.getValueSizeInBits() == WideOpVT.getSizeInBits() &&
863 "Unexpected wide vector");
864 Opcode = LoongArchISD::VFCVTH;
865 VFCVTOp = DAG.getBitcast(WideOpVT, V);
866 } else {
867 Opcode = LoongArchISD::VFCVTL;
868 VFCVTOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideOpVT,
869 DAG.getUNDEF(WideOpVT), Src, ZeroIdx);
870 }
871
872 // v2f64 = fp_extend v2f32
873 if (V2F32ToV2F64)
874 return DAG.getNode(Opcode, DL, VT, VFCVTOp);
875
876 // v4f64 = fp_extend v4f32
877 if (V4F32ToV4F64) {
878 // XVFCVT instruction operates on each 128-bit segment as a lane, so a
879 // vector_shuffle is required firstly.
880 SmallVector<int, 8> Mask = {0, 1, 4, 5, 2, 3, 6, 7};
881 SDValue Res = DAG.getVectorShuffle(WideOpVT, DL, VFCVTOp,
882 DAG.getUNDEF(WideOpVT), Mask);
883 Res = DAG.getNode(Opcode, DL, VT, Res);
884 return Res;
885 }
886
887 return SDValue();
888}
889
890SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
891 SelectionDAG &DAG) const {
892 EVT VT = Op.getValueType();
893 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
894 const APFloat &FPVal = CFP->getValueAPF();
895 SDLoc DL(CFP);
896
897 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
898 (VT == MVT::f64 && Subtarget.hasBasicD()));
899
900 // If value is 0.0 or -0.0, just ignore it.
901 if (FPVal.isZero())
902 return SDValue();
903
904 // If lsx enabled, use cheaper 'vldi' instruction if possible.
905 if (isFPImmVLDILegal(FPVal, VT))
906 return SDValue();
907
908 // Construct as integer, and move to float register.
909 APInt INTVal = FPVal.bitcastToAPInt();
910
911 // If more than MaterializeFPImmInsNum instructions will be used to
912 // generate the INTVal and move it to float register, fallback to
913 // use floating point load from the constant pool.
915 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
916 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
917 return SDValue();
918
919 switch (VT.getSimpleVT().SimpleTy) {
920 default:
921 llvm_unreachable("Unexpected floating point type!");
922 break;
923 case MVT::f32: {
924 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
925 if (Subtarget.is64Bit())
926 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
927 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
928 : LoongArchISD::MOVGR2FR_W,
929 DL, VT, NewVal);
930 }
931 case MVT::f64: {
932 if (Subtarget.is64Bit()) {
933 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
934 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
935 }
936 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
937 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
938 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
939 }
940 }
941
942 return SDValue();
943}
944
945// Ensure SETCC result and operand have the same bit width; isel does not
946// support mismatched widths.
947SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
948 SelectionDAG &DAG) const {
949 SDLoc DL(Op);
950 EVT ResultVT = Op.getValueType();
951 EVT OperandVT = Op.getOperand(0).getValueType();
952
953 EVT SetCCResultVT =
954 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
955
956 if (ResultVT == SetCCResultVT)
957 return Op;
958
959 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
960 "SETCC operands must have the same type!");
961
962 SDValue SetCCNode =
963 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
964 Op.getOperand(1), Op.getOperand(2));
965
966 if (ResultVT.bitsGT(SetCCResultVT))
967 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
968 else if (ResultVT.bitsLT(SetCCResultVT))
969 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
970
971 return SetCCNode;
972}
973
974// Lower vecreduce_add using vhaddw instructions.
975// For Example:
976// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
977// can be lowered to:
978// VHADDW_D_W vr0, vr0, vr0
979// VHADDW_Q_D vr0, vr0, vr0
980// VPICKVE2GR_D a0, vr0, 0
981// ADDI_W a0, a0, 0
982SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
983 SelectionDAG &DAG) const {
984
985 SDLoc DL(Op);
986 MVT OpVT = Op.getSimpleValueType();
987 SDValue Val = Op.getOperand(0);
988
989 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
990 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
991 unsigned ResBits = OpVT.getScalarSizeInBits();
992
993 unsigned LegalVecSize = 128;
994 bool isLASX256Vector =
995 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
996
997 // Ensure operand type legal or enable it legal.
998 while (!isTypeLegal(Val.getSimpleValueType())) {
999 Val = DAG.WidenVector(Val, DL);
1000 }
1001
1002 // NumEles is designed for iterations count, v4i32 for LSX
1003 // and v8i32 for LASX should have the same count.
1004 if (isLASX256Vector) {
1005 NumEles /= 2;
1006 LegalVecSize = 256;
1007 }
1008
1009 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
1010 MVT IntTy = MVT::getIntegerVT(EleBits);
1011 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
1012 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
1013 }
1014
1015 if (isLASX256Vector) {
1016 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
1017 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
1018 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
1019 }
1020
1021 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
1022 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1023 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
1024}
1025
1026// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
1027// For Example:
1028// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
1029// can be lowered to:
1030// VBSRL_V vr1, vr0, 8
1031// VMAX_W vr0, vr1, vr0
1032// VBSRL_V vr1, vr0, 4
1033// VMAX_W vr0, vr1, vr0
1034// VPICKVE2GR_W a0, vr0, 0
1035// For 256 bit vector, it is illegal and will be spilt into
1036// two 128 bit vector by default then processed by this.
1037SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
1038 SelectionDAG &DAG) const {
1039 SDLoc DL(Op);
1040
1041 MVT OpVT = Op.getSimpleValueType();
1042 SDValue Val = Op.getOperand(0);
1043
1044 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
1045 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
1046
1047 // Ensure operand type legal or enable it legal.
1048 while (!isTypeLegal(Val.getSimpleValueType())) {
1049 Val = DAG.WidenVector(Val, DL);
1050 }
1051
1052 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
1053 MVT VecTy = Val.getSimpleValueType();
1054 MVT GRLenVT = Subtarget.getGRLenVT();
1055
1056 for (int i = NumEles; i > 1; i /= 2) {
1057 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
1058 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
1059 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
1060 }
1061
1062 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
1063 DAG.getConstant(0, DL, GRLenVT));
1064}
1065
1066SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
1067 SelectionDAG &DAG) const {
1068 unsigned IsData = Op.getConstantOperandVal(4);
1069
1070 // We don't support non-data prefetch.
1071 // Just preserve the chain.
1072 if (!IsData)
1073 return Op.getOperand(0);
1074
1075 return Op;
1076}
1077
1078SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
1079 SelectionDAG &DAG) const {
1080 MVT VT = Op.getSimpleValueType();
1081 assert(VT.isVector() && "Unexpected type");
1082
1083 SDLoc DL(Op);
1084 SDValue R = Op.getOperand(0);
1085 SDValue Amt = Op.getOperand(1);
1086 unsigned Opcode = Op.getOpcode();
1087 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1088
1089 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
1090 if (V.getOpcode() != ISD::BUILD_VECTOR)
1091 return false;
1092 if (SDValue SplatValue =
1093 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
1094 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
1095 CstSplatValue = C->getAPIntValue();
1096 return true;
1097 }
1098 }
1099 return false;
1100 };
1101
1102 // Check for constant splat rotation amount.
1103 APInt CstSplatValue;
1104 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
1105 bool isROTL = Opcode == ISD::ROTL;
1106
1107 // Check for splat rotate by zero.
1108 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
1109 return R;
1110
1111 // LoongArch targets always prefer ISD::ROTR.
1112 if (isROTL) {
1113 SDValue Zero = DAG.getConstant(0, DL, VT);
1114 return DAG.getNode(ISD::ROTR, DL, VT, R,
1115 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
1116 }
1117
1118 // Rotate by a immediate.
1119 if (IsCstSplat) {
1120 // ISD::ROTR: Attemp to rotate by a positive immediate.
1121 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
1122 if (SDValue Urem =
1123 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
1124 return DAG.getNode(Opcode, DL, VT, R, Urem);
1125 }
1126
1127 return Op;
1128}
1129
1130// Return true if Val is equal to (setcc LHS, RHS, CC).
1131// Return false if Val is the inverse of (setcc LHS, RHS, CC).
1132// Otherwise, return std::nullopt.
1133static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
1134 ISD::CondCode CC, SDValue Val) {
1135 assert(Val->getOpcode() == ISD::SETCC);
1136 SDValue LHS2 = Val.getOperand(0);
1137 SDValue RHS2 = Val.getOperand(1);
1138 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
1139
1140 if (LHS == LHS2 && RHS == RHS2) {
1141 if (CC == CC2)
1142 return true;
1143 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1144 return false;
1145 } else if (LHS == RHS2 && RHS == LHS2) {
1147 if (CC == CC2)
1148 return true;
1149 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
1150 return false;
1151 }
1152
1153 return std::nullopt;
1154}
1155
1157 const LoongArchSubtarget &Subtarget) {
1158 SDValue CondV = N->getOperand(0);
1159 SDValue TrueV = N->getOperand(1);
1160 SDValue FalseV = N->getOperand(2);
1161 MVT VT = N->getSimpleValueType(0);
1162 SDLoc DL(N);
1163
1164 // (select c, -1, y) -> -c | y
1165 if (isAllOnesConstant(TrueV)) {
1166 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1167 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
1168 }
1169 // (select c, y, -1) -> (c-1) | y
1170 if (isAllOnesConstant(FalseV)) {
1171 SDValue Neg =
1172 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1173 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
1174 }
1175
1176 // (select c, 0, y) -> (c-1) & y
1177 if (isNullConstant(TrueV)) {
1178 SDValue Neg =
1179 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
1180 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
1181 }
1182 // (select c, y, 0) -> -c & y
1183 if (isNullConstant(FalseV)) {
1184 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1185 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
1186 }
1187
1188 // select c, ~x, x --> xor -c, x
1189 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
1190 const APInt &TrueVal = TrueV->getAsAPIntVal();
1191 const APInt &FalseVal = FalseV->getAsAPIntVal();
1192 if (~TrueVal == FalseVal) {
1193 SDValue Neg = DAG.getNegative(CondV, DL, VT);
1194 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
1195 }
1196 }
1197
1198 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
1199 // when both truev and falsev are also setcc.
1200 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
1201 FalseV.getOpcode() == ISD::SETCC) {
1202 SDValue LHS = CondV.getOperand(0);
1203 SDValue RHS = CondV.getOperand(1);
1204 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1205
1206 // (select x, x, y) -> x | y
1207 // (select !x, x, y) -> x & y
1208 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
1209 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
1210 DAG.getFreeze(FalseV));
1211 }
1212 // (select x, y, x) -> x & y
1213 // (select !x, y, x) -> x | y
1214 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1215 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1216 DAG.getFreeze(TrueV), FalseV);
1217 }
1218 }
1219
1220 return SDValue();
1221}
1222
1223// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1224// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1225// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1226// being `0` or `-1`. In such cases we can replace `select` with `and`.
1227// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1228// than `c0`?
1229static SDValue
1231 const LoongArchSubtarget &Subtarget) {
1232 unsigned SelOpNo = 0;
1233 SDValue Sel = BO->getOperand(0);
1234 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1235 SelOpNo = 1;
1236 Sel = BO->getOperand(1);
1237 }
1238
1239 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1240 return SDValue();
1241
1242 unsigned ConstSelOpNo = 1;
1243 unsigned OtherSelOpNo = 2;
1244 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1245 ConstSelOpNo = 2;
1246 OtherSelOpNo = 1;
1247 }
1248 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1249 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1250 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1251 return SDValue();
1252
1253 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1254 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1255 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1256 return SDValue();
1257
1258 SDLoc DL(Sel);
1259 EVT VT = BO->getValueType(0);
1260
1261 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1262 if (SelOpNo == 1)
1263 std::swap(NewConstOps[0], NewConstOps[1]);
1264
1265 SDValue NewConstOp =
1266 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1267 if (!NewConstOp)
1268 return SDValue();
1269
1270 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1271 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1272 return SDValue();
1273
1274 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1275 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1276 if (SelOpNo == 1)
1277 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1278 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1279
1280 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1281 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1282 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1283}
1284
1285// Changes the condition code and swaps operands if necessary, so the SetCC
1286// operation matches one of the comparisons supported directly by branches
1287// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1288// compare with 1/-1.
1290 ISD::CondCode &CC, SelectionDAG &DAG) {
1291 // If this is a single bit test that can't be handled by ANDI, shift the
1292 // bit to be tested to the MSB and perform a signed compare with 0.
1293 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1294 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1295 isa<ConstantSDNode>(LHS.getOperand(1))) {
1296 uint64_t Mask = LHS.getConstantOperandVal(1);
1297 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1298 unsigned ShAmt = 0;
1299 if (isPowerOf2_64(Mask)) {
1300 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1301 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1302 } else {
1303 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1304 }
1305
1306 LHS = LHS.getOperand(0);
1307 if (ShAmt != 0)
1308 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1309 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1310 return;
1311 }
1312 }
1313
1314 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1315 int64_t C = RHSC->getSExtValue();
1316 switch (CC) {
1317 default:
1318 break;
1319 case ISD::SETGT:
1320 // Convert X > -1 to X >= 0.
1321 if (C == -1) {
1322 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1323 CC = ISD::SETGE;
1324 return;
1325 }
1326 break;
1327 case ISD::SETLT:
1328 // Convert X < 1 to 0 >= X.
1329 if (C == 1) {
1330 RHS = LHS;
1331 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1332 CC = ISD::SETGE;
1333 return;
1334 }
1335 break;
1336 }
1337 }
1338
1339 switch (CC) {
1340 default:
1341 break;
1342 case ISD::SETGT:
1343 case ISD::SETLE:
1344 case ISD::SETUGT:
1345 case ISD::SETULE:
1347 std::swap(LHS, RHS);
1348 break;
1349 }
1350}
1351
1352SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1353 SelectionDAG &DAG) const {
1354 SDValue CondV = Op.getOperand(0);
1355 SDValue TrueV = Op.getOperand(1);
1356 SDValue FalseV = Op.getOperand(2);
1357 SDLoc DL(Op);
1358 MVT VT = Op.getSimpleValueType();
1359 MVT GRLenVT = Subtarget.getGRLenVT();
1360
1361 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1362 return V;
1363
1364 if (Op.hasOneUse()) {
1365 unsigned UseOpc = Op->user_begin()->getOpcode();
1366 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1367 SDNode *BinOp = *Op->user_begin();
1368 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1369 DAG, Subtarget)) {
1370 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1371 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1372 // may return a constant node and cause crash in lowerSELECT.
1373 if (NewSel.getOpcode() == ISD::SELECT)
1374 return lowerSELECT(NewSel, DAG);
1375 return NewSel;
1376 }
1377 }
1378 }
1379
1380 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1381 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1382 // (select condv, truev, falsev)
1383 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1384 if (CondV.getOpcode() != ISD::SETCC ||
1385 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1386 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1387 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1388
1389 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1390
1391 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1392 }
1393
1394 // If the CondV is the output of a SETCC node which operates on GRLenVT
1395 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1396 // to take advantage of the integer compare+branch instructions. i.e.: (select
1397 // (setcc lhs, rhs, cc), truev, falsev)
1398 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1399 SDValue LHS = CondV.getOperand(0);
1400 SDValue RHS = CondV.getOperand(1);
1401 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1402
1403 // Special case for a select of 2 constants that have a difference of 1.
1404 // Normally this is done by DAGCombine, but if the select is introduced by
1405 // type legalization or op legalization, we miss it. Restricting to SETLT
1406 // case for now because that is what signed saturating add/sub need.
1407 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1408 // but we would probably want to swap the true/false values if the condition
1409 // is SETGE/SETLE to avoid an XORI.
1410 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1411 CCVal == ISD::SETLT) {
1412 const APInt &TrueVal = TrueV->getAsAPIntVal();
1413 const APInt &FalseVal = FalseV->getAsAPIntVal();
1414 if (TrueVal - 1 == FalseVal)
1415 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1416 if (TrueVal + 1 == FalseVal)
1417 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1418 }
1419
1420 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1421 // 1 < x ? x : 1 -> 0 < x ? x : 1
1422 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1423 RHS == TrueV && LHS == FalseV) {
1424 LHS = DAG.getConstant(0, DL, VT);
1425 // 0 <u x is the same as x != 0.
1426 if (CCVal == ISD::SETULT) {
1427 std::swap(LHS, RHS);
1428 CCVal = ISD::SETNE;
1429 }
1430 }
1431
1432 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1433 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1434 RHS == FalseV) {
1435 RHS = DAG.getConstant(0, DL, VT);
1436 }
1437
1438 SDValue TargetCC = DAG.getCondCode(CCVal);
1439
1440 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1441 // (select (setcc lhs, rhs, CC), constant, falsev)
1442 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1443 std::swap(TrueV, FalseV);
1444 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1445 }
1446
1447 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1448 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1449}
1450
1451SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1452 SelectionDAG &DAG) const {
1453 SDValue CondV = Op.getOperand(1);
1454 SDLoc DL(Op);
1455 MVT GRLenVT = Subtarget.getGRLenVT();
1456
1457 if (CondV.getOpcode() == ISD::SETCC) {
1458 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1459 SDValue LHS = CondV.getOperand(0);
1460 SDValue RHS = CondV.getOperand(1);
1461 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1462
1463 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1464
1465 SDValue TargetCC = DAG.getCondCode(CCVal);
1466 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1467 Op.getOperand(0), LHS, RHS, TargetCC,
1468 Op.getOperand(2));
1469 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1470 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1471 Op.getOperand(0), CondV, Op.getOperand(2));
1472 }
1473 }
1474
1475 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1476 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1477 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1478}
1479
1480SDValue
1481LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1482 SelectionDAG &DAG) const {
1483 SDLoc DL(Op);
1484 MVT OpVT = Op.getSimpleValueType();
1485
1486 SDValue Vector = DAG.getUNDEF(OpVT);
1487 SDValue Val = Op.getOperand(0);
1488 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1489
1490 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1491}
1492
1493SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1494 SelectionDAG &DAG) const {
1495 EVT ResTy = Op->getValueType(0);
1496 SDValue Src = Op->getOperand(0);
1497 SDLoc DL(Op);
1498
1499 // LoongArchISD::BITREV_8B is not supported on LA32.
1500 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1501 return SDValue();
1502
1503 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1504 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1505 unsigned int NewEltNum = NewVT.getVectorNumElements();
1506
1507 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1508
1510 for (unsigned int i = 0; i < NewEltNum; i++) {
1511 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1512 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1513 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1514 ? (unsigned)LoongArchISD::BITREV_8B
1515 : (unsigned)ISD::BITREVERSE;
1516 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1517 }
1518 SDValue Res =
1519 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1520
1521 switch (ResTy.getSimpleVT().SimpleTy) {
1522 default:
1523 return SDValue();
1524 case MVT::v16i8:
1525 case MVT::v32i8:
1526 return Res;
1527 case MVT::v8i16:
1528 case MVT::v16i16:
1529 case MVT::v4i32:
1530 case MVT::v8i32: {
1532 for (unsigned int i = 0; i < NewEltNum; i++)
1533 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1534 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1535 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1536 }
1537 }
1538}
1539
1540// Widen element type to get a new mask value (if possible).
1541// For example:
1542// shufflevector <4 x i32> %a, <4 x i32> %b,
1543// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1544// is equivalent to:
1545// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1546// can be lowered to:
1547// VPACKOD_D vr0, vr0, vr1
1549 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1550 unsigned EltBits = VT.getScalarSizeInBits();
1551
1552 if (EltBits > 32 || EltBits == 1)
1553 return SDValue();
1554
1555 SmallVector<int, 8> NewMask;
1556 if (widenShuffleMaskElts(Mask, NewMask)) {
1557 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1558 : MVT::getIntegerVT(EltBits * 2);
1559 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1560 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1561 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1562 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1563 return DAG.getBitcast(
1564 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1565 }
1566 }
1567
1568 return SDValue();
1569}
1570
1571/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1572/// instruction.
1573// The funciton matches elements from one of the input vector shuffled to the
1574// left or right with zeroable elements 'shifted in'. It handles both the
1575// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1576// lane.
1577// Mostly copied from X86.
1578static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1579 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1580 int MaskOffset, const APInt &Zeroable) {
1581 int Size = Mask.size();
1582 unsigned SizeInBits = Size * ScalarSizeInBits;
1583
1584 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1585 for (int i = 0; i < Size; i += Scale)
1586 for (int j = 0; j < Shift; ++j)
1587 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1588 return false;
1589
1590 return true;
1591 };
1592
1593 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1594 int Step = 1) {
1595 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1596 if (!(Mask[i] == -1 || Mask[i] == Low))
1597 return false;
1598 return true;
1599 };
1600
1601 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1602 for (int i = 0; i != Size; i += Scale) {
1603 unsigned Pos = Left ? i + Shift : i;
1604 unsigned Low = Left ? i : i + Shift;
1605 unsigned Len = Scale - Shift;
1606 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1607 return -1;
1608 }
1609
1610 int ShiftEltBits = ScalarSizeInBits * Scale;
1611 bool ByteShift = ShiftEltBits > 64;
1612 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1613 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1614 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1615
1616 // Normalize the scale for byte shifts to still produce an i64 element
1617 // type.
1618 Scale = ByteShift ? Scale / 2 : Scale;
1619
1620 // We need to round trip through the appropriate type for the shift.
1621 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1622 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1623 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1624 return (int)ShiftAmt;
1625 };
1626
1627 unsigned MaxWidth = 128;
1628 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1629 for (int Shift = 1; Shift != Scale; ++Shift)
1630 for (bool Left : {true, false})
1631 if (CheckZeros(Shift, Scale, Left)) {
1632 int ShiftAmt = MatchShift(Shift, Scale, Left);
1633 if (0 < ShiftAmt)
1634 return ShiftAmt;
1635 }
1636
1637 // no match
1638 return -1;
1639}
1640
1641/// Lower VECTOR_SHUFFLE as shift (if possible).
1642///
1643/// For example:
1644/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1645/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1646/// is lowered to:
1647/// (VBSLL_V $v0, $v0, 4)
1648///
1649/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1650/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1651/// is lowered to:
1652/// (VSLLI_D $v0, $v0, 32)
1654 MVT VT, SDValue V1, SDValue V2,
1655 SelectionDAG &DAG,
1656 const LoongArchSubtarget &Subtarget,
1657 const APInt &Zeroable) {
1658 int Size = Mask.size();
1659 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1660
1661 MVT ShiftVT;
1662 SDValue V = V1;
1663 unsigned Opcode;
1664
1665 // Try to match shuffle against V1 shift.
1666 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1667 Mask, 0, Zeroable);
1668
1669 // If V1 failed, try to match shuffle against V2 shift.
1670 if (ShiftAmt < 0) {
1671 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1672 Mask, Size, Zeroable);
1673 V = V2;
1674 }
1675
1676 if (ShiftAmt < 0)
1677 return SDValue();
1678
1679 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1680 "Illegal integer vector type");
1681 V = DAG.getBitcast(ShiftVT, V);
1682 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1683 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1684 return DAG.getBitcast(VT, V);
1685}
1686
1687/// Determine whether a range fits a regular pattern of values.
1688/// This function accounts for the possibility of jumping over the End iterator.
1689template <typename ValType>
1690static bool
1692 unsigned CheckStride,
1694 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1695 auto &I = Begin;
1696
1697 while (I != End) {
1698 if (*I != -1 && *I != ExpectedIndex)
1699 return false;
1700 ExpectedIndex += ExpectedIndexStride;
1701
1702 // Incrementing past End is undefined behaviour so we must increment one
1703 // step at a time and check for End at each step.
1704 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1705 ; // Empty loop body.
1706 }
1707 return true;
1708}
1709
1710/// Compute whether each element of a shuffle is zeroable.
1711///
1712/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1714 SDValue V2, APInt &KnownUndef,
1715 APInt &KnownZero) {
1716 int Size = Mask.size();
1717 KnownUndef = KnownZero = APInt::getZero(Size);
1718
1719 V1 = peekThroughBitcasts(V1);
1720 V2 = peekThroughBitcasts(V2);
1721
1722 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1723 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1724
1725 int VectorSizeInBits = V1.getValueSizeInBits();
1726 int ScalarSizeInBits = VectorSizeInBits / Size;
1727 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1728 (void)ScalarSizeInBits;
1729
1730 for (int i = 0; i < Size; ++i) {
1731 int M = Mask[i];
1732 if (M < 0) {
1733 KnownUndef.setBit(i);
1734 continue;
1735 }
1736 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1737 KnownZero.setBit(i);
1738 continue;
1739 }
1740 }
1741}
1742
1743/// Test whether a shuffle mask is equivalent within each sub-lane.
1744///
1745/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1746/// non-trivial to compute in the face of undef lanes. The representation is
1747/// suitable for use with existing 128-bit shuffles as entries from the second
1748/// vector have been remapped to [LaneSize, 2*LaneSize).
1749static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1750 ArrayRef<int> Mask,
1751 SmallVectorImpl<int> &RepeatedMask) {
1752 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1753 RepeatedMask.assign(LaneSize, -1);
1754 int Size = Mask.size();
1755 for (int i = 0; i < Size; ++i) {
1756 assert(Mask[i] == -1 || Mask[i] >= 0);
1757 if (Mask[i] < 0)
1758 continue;
1759 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1760 // This entry crosses lanes, so there is no way to model this shuffle.
1761 return false;
1762
1763 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1764 // Adjust second vector indices to start at LaneSize instead of Size.
1765 int LocalM =
1766 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1767 if (RepeatedMask[i % LaneSize] < 0)
1768 // This is the first non-undef entry in this slot of a 128-bit lane.
1769 RepeatedMask[i % LaneSize] = LocalM;
1770 else if (RepeatedMask[i % LaneSize] != LocalM)
1771 // Found a mismatch with the repeated mask.
1772 return false;
1773 }
1774 return true;
1775}
1776
1777/// Attempts to match vector shuffle as byte rotation.
1779 ArrayRef<int> Mask) {
1780
1781 SDValue Lo, Hi;
1782 SmallVector<int, 16> RepeatedMask;
1783
1784 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1785 return -1;
1786
1787 int NumElts = RepeatedMask.size();
1788 int Rotation = 0;
1789 int Scale = 16 / NumElts;
1790
1791 for (int i = 0; i < NumElts; ++i) {
1792 int M = RepeatedMask[i];
1793 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1794 "Unexpected mask index.");
1795 if (M < 0)
1796 continue;
1797
1798 // Determine where a rotated vector would have started.
1799 int StartIdx = i - (M % NumElts);
1800 if (StartIdx == 0)
1801 return -1;
1802
1803 // If we found the tail of a vector the rotation must be the missing
1804 // front. If we found the head of a vector, it must be how much of the
1805 // head.
1806 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1807
1808 if (Rotation == 0)
1809 Rotation = CandidateRotation;
1810 else if (Rotation != CandidateRotation)
1811 return -1;
1812
1813 // Compute which value this mask is pointing at.
1814 SDValue MaskV = M < NumElts ? V1 : V2;
1815
1816 // Compute which of the two target values this index should be assigned
1817 // to. This reflects whether the high elements are remaining or the low
1818 // elements are remaining.
1819 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1820
1821 // Either set up this value if we've not encountered it before, or check
1822 // that it remains consistent.
1823 if (!TargetV)
1824 TargetV = MaskV;
1825 else if (TargetV != MaskV)
1826 return -1;
1827 }
1828
1829 // Check that we successfully analyzed the mask, and normalize the results.
1830 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1831 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1832 if (!Lo)
1833 Lo = Hi;
1834 else if (!Hi)
1835 Hi = Lo;
1836
1837 V1 = Lo;
1838 V2 = Hi;
1839
1840 return Rotation * Scale;
1841}
1842
1843/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1844///
1845/// For example:
1846/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1847/// <2 x i32> <i32 3, i32 0>
1848/// is lowered to:
1849/// (VBSRL_V $v1, $v1, 8)
1850/// (VBSLL_V $v0, $v0, 8)
1851/// (VOR_V $v0, $V0, $v1)
1852static SDValue
1854 SDValue V1, SDValue V2, SelectionDAG &DAG,
1855 const LoongArchSubtarget &Subtarget) {
1856
1857 SDValue Lo = V1, Hi = V2;
1858 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1859 if (ByteRotation <= 0)
1860 return SDValue();
1861
1862 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1863 Lo = DAG.getBitcast(ByteVT, Lo);
1864 Hi = DAG.getBitcast(ByteVT, Hi);
1865
1866 int LoByteShift = 16 - ByteRotation;
1867 int HiByteShift = ByteRotation;
1868 MVT GRLenVT = Subtarget.getGRLenVT();
1869
1870 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1871 DAG.getConstant(LoByteShift, DL, GRLenVT));
1872 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1873 DAG.getConstant(HiByteShift, DL, GRLenVT));
1874 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1875}
1876
1877/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1878///
1879/// For example:
1880/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1881/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1882/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1883/// is lowered to:
1884/// (VREPLI $v1, 0)
1885/// (VILVL $v0, $v1, $v0)
1887 ArrayRef<int> Mask, MVT VT,
1888 SDValue V1, SDValue V2,
1889 SelectionDAG &DAG,
1890 const APInt &Zeroable) {
1891 int Bits = VT.getSizeInBits();
1892 int EltBits = VT.getScalarSizeInBits();
1893 int NumElements = VT.getVectorNumElements();
1894
1895 if (Zeroable.isAllOnes())
1896 return DAG.getConstant(0, DL, VT);
1897
1898 // Define a helper function to check a particular ext-scale and lower to it if
1899 // valid.
1900 auto Lower = [&](int Scale) -> SDValue {
1901 SDValue InputV;
1902 bool AnyExt = true;
1903 int Offset = 0;
1904 for (int i = 0; i < NumElements; i++) {
1905 int M = Mask[i];
1906 if (M < 0)
1907 continue;
1908 if (i % Scale != 0) {
1909 // Each of the extended elements need to be zeroable.
1910 if (!Zeroable[i])
1911 return SDValue();
1912
1913 AnyExt = false;
1914 continue;
1915 }
1916
1917 // Each of the base elements needs to be consecutive indices into the
1918 // same input vector.
1919 SDValue V = M < NumElements ? V1 : V2;
1920 M = M % NumElements;
1921 if (!InputV) {
1922 InputV = V;
1923 Offset = M - (i / Scale);
1924
1925 // These offset can't be handled
1926 if (Offset % (NumElements / Scale))
1927 return SDValue();
1928 } else if (InputV != V)
1929 return SDValue();
1930
1931 if (M != (Offset + (i / Scale)))
1932 return SDValue(); // Non-consecutive strided elements.
1933 }
1934
1935 // If we fail to find an input, we have a zero-shuffle which should always
1936 // have already been handled.
1937 if (!InputV)
1938 return SDValue();
1939
1940 do {
1941 unsigned VilVLoHi = LoongArchISD::VILVL;
1942 if (Offset >= (NumElements / 2)) {
1943 VilVLoHi = LoongArchISD::VILVH;
1944 Offset -= (NumElements / 2);
1945 }
1946
1947 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1948 SDValue Ext =
1949 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1950 InputV = DAG.getBitcast(InputVT, InputV);
1951 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1952 Scale /= 2;
1953 EltBits *= 2;
1954 NumElements /= 2;
1955 } while (Scale > 1);
1956 return DAG.getBitcast(VT, InputV);
1957 };
1958
1959 // Each iteration, try extending the elements half as much, but into twice as
1960 // many elements.
1961 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1962 NumExtElements *= 2) {
1963 if (SDValue V = Lower(NumElements / NumExtElements))
1964 return V;
1965 }
1966 return SDValue();
1967}
1968
1969/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1970///
1971/// VREPLVEI performs vector broadcast based on an element specified by an
1972/// integer immediate, with its mask being similar to:
1973/// <x, x, x, ...>
1974/// where x is any valid index.
1975///
1976/// When undef's appear in the mask they are treated as if they were whatever
1977/// value is necessary in order to fit the above form.
1978static SDValue
1980 SDValue V1, SelectionDAG &DAG,
1981 const LoongArchSubtarget &Subtarget) {
1982 int SplatIndex = -1;
1983 for (const auto &M : Mask) {
1984 if (M != -1) {
1985 SplatIndex = M;
1986 break;
1987 }
1988 }
1989
1990 if (SplatIndex == -1)
1991 return DAG.getUNDEF(VT);
1992
1993 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1994 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1995 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1996 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1997 }
1998
1999 return SDValue();
2000}
2001
2002/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
2003///
2004/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
2005/// elements according to a <4 x i2> constant (encoded as an integer immediate).
2006///
2007/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
2008/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2009/// When undef's appear they are treated as if they were whatever value is
2010/// necessary in order to fit the above forms.
2011///
2012/// For example:
2013/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2014/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2015/// i32 7, i32 6, i32 5, i32 4>
2016/// is lowered to:
2017/// (VSHUF4I_H $v0, $v1, 27)
2018/// where the 27 comes from:
2019/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2020static SDValue
2022 SDValue V1, SDValue V2, SelectionDAG &DAG,
2023 const LoongArchSubtarget &Subtarget) {
2024
2025 unsigned SubVecSize = 4;
2026 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2027 SubVecSize = 2;
2028
2029 int SubMask[4] = {-1, -1, -1, -1};
2030 for (unsigned i = 0; i < SubVecSize; ++i) {
2031 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
2032 int M = Mask[j];
2033
2034 // Convert from vector index to 4-element subvector index
2035 // If an index refers to an element outside of the subvector then give up
2036 if (M != -1) {
2037 M -= 4 * (j / SubVecSize);
2038 if (M < 0 || M >= 4)
2039 return SDValue();
2040 }
2041
2042 // If the mask has an undef, replace it with the current index.
2043 // Note that it might still be undef if the current index is also undef
2044 if (SubMask[i] == -1)
2045 SubMask[i] = M;
2046 // Check that non-undef values are the same as in the mask. If they
2047 // aren't then give up
2048 else if (M != -1 && M != SubMask[i])
2049 return SDValue();
2050 }
2051 }
2052
2053 // Calculate the immediate. Replace any remaining undefs with zero
2054 int Imm = 0;
2055 for (int i = SubVecSize - 1; i >= 0; --i) {
2056 int M = SubMask[i];
2057
2058 if (M == -1)
2059 M = 0;
2060
2061 Imm <<= 2;
2062 Imm |= M & 0x3;
2063 }
2064
2065 MVT GRLenVT = Subtarget.getGRLenVT();
2066
2067 // Return vshuf4i.d
2068 if (VT == MVT::v2f64 || VT == MVT::v2i64)
2069 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2070 DAG.getConstant(Imm, DL, GRLenVT));
2071
2072 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
2073 DAG.getConstant(Imm, DL, GRLenVT));
2074}
2075
2076/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
2077///
2078/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
2079/// reverse whose mask likes:
2080/// <7, 6, 5, 4, 3, 2, 1, 0>
2081///
2082/// When undef's appear in the mask they are treated as if they were whatever
2083/// value is necessary in order to fit the above forms.
2084static SDValue
2086 SDValue V1, SelectionDAG &DAG,
2087 const LoongArchSubtarget &Subtarget) {
2088 // Only vectors with i8/i16 elements which cannot match other patterns
2089 // directly needs to do this.
2090 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
2091 VT != MVT::v16i16)
2092 return SDValue();
2093
2094 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
2095 return SDValue();
2096
2097 int WidenNumElts = VT.getVectorNumElements() / 4;
2098 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
2099 for (int i = 0; i < WidenNumElts; ++i)
2100 WidenMask[i] = WidenNumElts - 1 - i;
2101
2102 MVT WidenVT = MVT::getVectorVT(
2103 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
2104 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
2105 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
2106 DAG.getUNDEF(WidenVT), WidenMask);
2107
2108 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
2109 DAG.getBitcast(VT, WidenRev),
2110 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
2111}
2112
2113/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
2114///
2115/// VPACKEV interleaves the even elements from each vector.
2116///
2117/// It is possible to lower into VPACKEV when the mask consists of two of the
2118/// following forms interleaved:
2119/// <0, 2, 4, ...>
2120/// <n, n+2, n+4, ...>
2121/// where n is the number of elements in the vector.
2122/// For example:
2123/// <0, 0, 2, 2, 4, 4, ...>
2124/// <0, n, 2, n+2, 4, n+4, ...>
2125///
2126/// When undef's appear in the mask they are treated as if they were whatever
2127/// value is necessary in order to fit the above forms.
2129 MVT VT, SDValue V1, SDValue V2,
2130 SelectionDAG &DAG) {
2131
2132 const auto &Begin = Mask.begin();
2133 const auto &End = Mask.end();
2134 SDValue OriV1 = V1, OriV2 = V2;
2135
2136 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2137 V1 = OriV1;
2138 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
2139 V1 = OriV2;
2140 else
2141 return SDValue();
2142
2143 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2144 V2 = OriV1;
2145 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
2146 V2 = OriV2;
2147 else
2148 return SDValue();
2149
2150 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
2151}
2152
2153/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
2154///
2155/// VPACKOD interleaves the odd elements from each vector.
2156///
2157/// It is possible to lower into VPACKOD when the mask consists of two of the
2158/// following forms interleaved:
2159/// <1, 3, 5, ...>
2160/// <n+1, n+3, n+5, ...>
2161/// where n is the number of elements in the vector.
2162/// For example:
2163/// <1, 1, 3, 3, 5, 5, ...>
2164/// <1, n+1, 3, n+3, 5, n+5, ...>
2165///
2166/// When undef's appear in the mask they are treated as if they were whatever
2167/// value is necessary in order to fit the above forms.
2169 MVT VT, SDValue V1, SDValue V2,
2170 SelectionDAG &DAG) {
2171
2172 const auto &Begin = Mask.begin();
2173 const auto &End = Mask.end();
2174 SDValue OriV1 = V1, OriV2 = V2;
2175
2176 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2177 V1 = OriV1;
2178 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
2179 V1 = OriV2;
2180 else
2181 return SDValue();
2182
2183 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2184 V2 = OriV1;
2185 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
2186 V2 = OriV2;
2187 else
2188 return SDValue();
2189
2190 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
2191}
2192
2193/// Lower VECTOR_SHUFFLE into VILVH (if possible).
2194///
2195/// VILVH interleaves consecutive elements from the left (highest-indexed) half
2196/// of each vector.
2197///
2198/// It is possible to lower into VILVH when the mask consists of two of the
2199/// following forms interleaved:
2200/// <x, x+1, x+2, ...>
2201/// <n+x, n+x+1, n+x+2, ...>
2202/// where n is the number of elements in the vector and x is half n.
2203/// For example:
2204/// <x, x, x+1, x+1, x+2, x+2, ...>
2205/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2206///
2207/// When undef's appear in the mask they are treated as if they were whatever
2208/// value is necessary in order to fit the above forms.
2210 MVT VT, SDValue V1, SDValue V2,
2211 SelectionDAG &DAG) {
2212
2213 const auto &Begin = Mask.begin();
2214 const auto &End = Mask.end();
2215 unsigned HalfSize = Mask.size() / 2;
2216 SDValue OriV1 = V1, OriV2 = V2;
2217
2218 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2219 V1 = OriV1;
2220 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2221 V1 = OriV2;
2222 else
2223 return SDValue();
2224
2225 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2226 V2 = OriV1;
2227 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2228 1))
2229 V2 = OriV2;
2230 else
2231 return SDValue();
2232
2233 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2234}
2235
2236/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2237///
2238/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2239/// of each vector.
2240///
2241/// It is possible to lower into VILVL when the mask consists of two of the
2242/// following forms interleaved:
2243/// <0, 1, 2, ...>
2244/// <n, n+1, n+2, ...>
2245/// where n is the number of elements in the vector.
2246/// For example:
2247/// <0, 0, 1, 1, 2, 2, ...>
2248/// <0, n, 1, n+1, 2, n+2, ...>
2249///
2250/// When undef's appear in the mask they are treated as if they were whatever
2251/// value is necessary in order to fit the above forms.
2253 MVT VT, SDValue V1, SDValue V2,
2254 SelectionDAG &DAG) {
2255
2256 const auto &Begin = Mask.begin();
2257 const auto &End = Mask.end();
2258 SDValue OriV1 = V1, OriV2 = V2;
2259
2260 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2261 V1 = OriV1;
2262 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2263 V1 = OriV2;
2264 else
2265 return SDValue();
2266
2267 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2268 V2 = OriV1;
2269 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2270 V2 = OriV2;
2271 else
2272 return SDValue();
2273
2274 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2275}
2276
2277/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2278///
2279/// VPICKEV copies the even elements of each vector into the result vector.
2280///
2281/// It is possible to lower into VPICKEV when the mask consists of two of the
2282/// following forms concatenated:
2283/// <0, 2, 4, ...>
2284/// <n, n+2, n+4, ...>
2285/// where n is the number of elements in the vector.
2286/// For example:
2287/// <0, 2, 4, ..., 0, 2, 4, ...>
2288/// <0, 2, 4, ..., n, n+2, n+4, ...>
2289///
2290/// When undef's appear in the mask they are treated as if they were whatever
2291/// value is necessary in order to fit the above forms.
2293 MVT VT, SDValue V1, SDValue V2,
2294 SelectionDAG &DAG) {
2295
2296 const auto &Begin = Mask.begin();
2297 const auto &Mid = Mask.begin() + Mask.size() / 2;
2298 const auto &End = Mask.end();
2299 SDValue OriV1 = V1, OriV2 = V2;
2300
2301 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2302 V1 = OriV1;
2303 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2304 V1 = OriV2;
2305 else
2306 return SDValue();
2307
2308 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2309 V2 = OriV1;
2310 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2311 V2 = OriV2;
2312
2313 else
2314 return SDValue();
2315
2316 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2317}
2318
2319/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2320///
2321/// VPICKOD copies the odd elements of each vector into the result vector.
2322///
2323/// It is possible to lower into VPICKOD when the mask consists of two of the
2324/// following forms concatenated:
2325/// <1, 3, 5, ...>
2326/// <n+1, n+3, n+5, ...>
2327/// where n is the number of elements in the vector.
2328/// For example:
2329/// <1, 3, 5, ..., 1, 3, 5, ...>
2330/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2331///
2332/// When undef's appear in the mask they are treated as if they were whatever
2333/// value is necessary in order to fit the above forms.
2335 MVT VT, SDValue V1, SDValue V2,
2336 SelectionDAG &DAG) {
2337
2338 const auto &Begin = Mask.begin();
2339 const auto &Mid = Mask.begin() + Mask.size() / 2;
2340 const auto &End = Mask.end();
2341 SDValue OriV1 = V1, OriV2 = V2;
2342
2343 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2344 V1 = OriV1;
2345 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2346 V1 = OriV2;
2347 else
2348 return SDValue();
2349
2350 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2351 V2 = OriV1;
2352 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2353 V2 = OriV2;
2354 else
2355 return SDValue();
2356
2357 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2358}
2359
2360/// Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
2361///
2362/// VEXTRINS copies one element of a vector into any place of the result
2363/// vector and makes no change to the rest elements of the result vector.
2364///
2365/// It is possible to lower into VEXTRINS when the mask takes the form:
2366/// <0, 1, 2, ..., n+i, ..., n-1> or <n, n+1, n+2, ..., i, ..., 2n-1> or
2367/// <0, 1, 2, ..., i, ..., n-1> or <n, n+1, n+2, ..., n+i, ..., 2n-1>
2368/// where n is the number of elements in the vector and i is in [0, n).
2369/// For example:
2370/// <0, 1, 2, 3, 4, 5, 6, 8> , <2, 9, 10, 11, 12, 13, 14, 15> ,
2371/// <0, 1, 2, 6, 4, 5, 6, 7> , <8, 9, 10, 11, 12, 9, 14, 15>
2372///
2373/// When undef's appear in the mask they are treated as if they were whatever
2374/// value is necessary in order to fit the above forms.
2375static SDValue
2377 SDValue V1, SDValue V2, SelectionDAG &DAG,
2378 const LoongArchSubtarget &Subtarget) {
2379 unsigned NumElts = VT.getVectorNumElements();
2380 MVT EltVT = VT.getVectorElementType();
2381 MVT GRLenVT = Subtarget.getGRLenVT();
2382
2383 if (Mask.size() != NumElts)
2384 return SDValue();
2385
2386 auto tryLowerToExtrAndIns = [&](unsigned Base) -> SDValue {
2387 int DiffCount = 0;
2388 int DiffPos = -1;
2389 for (unsigned i = 0; i < NumElts; ++i) {
2390 if (Mask[i] == -1)
2391 continue;
2392 if (Mask[i] != int(Base + i)) {
2393 ++DiffCount;
2394 DiffPos = int(i);
2395 if (DiffCount > 1)
2396 return SDValue();
2397 }
2398 }
2399
2400 // Need exactly one differing element to lower into VEXTRINS.
2401 if (DiffCount != 1)
2402 return SDValue();
2403
2404 // DiffMask must be in [0, 2N).
2405 int DiffMask = Mask[DiffPos];
2406 if (DiffMask < 0 || DiffMask >= int(2 * NumElts))
2407 return SDValue();
2408
2409 // Determine source vector and source index.
2410 SDValue SrcVec;
2411 unsigned SrcIdx;
2412 if (unsigned(DiffMask) < NumElts) {
2413 SrcVec = V1;
2414 SrcIdx = unsigned(DiffMask);
2415 } else {
2416 SrcVec = V2;
2417 SrcIdx = unsigned(DiffMask) - NumElts;
2418 }
2419
2420 // Replace with EXTRACT_VECTOR_ELT + INSERT_VECTOR_ELT, it will match the
2421 // patterns of VEXTRINS in tablegen.
2422 SDValue Extracted = DAG.getNode(
2423 ISD::EXTRACT_VECTOR_ELT, DL, EltVT.isFloatingPoint() ? EltVT : GRLenVT,
2424 SrcVec, DAG.getConstant(SrcIdx, DL, GRLenVT));
2425 SDValue Result =
2426 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, (Base == 0) ? V1 : V2,
2427 Extracted, DAG.getConstant(DiffPos, DL, GRLenVT));
2428
2429 return Result;
2430 };
2431
2432 // Try [0, n-1) insertion then [n, 2n-1) insertion.
2433 if (SDValue Result = tryLowerToExtrAndIns(0))
2434 return Result;
2435 return tryLowerToExtrAndIns(NumElts);
2436}
2437
2438// Check the Mask and then build SrcVec and MaskImm infos which will
2439// be used to build LoongArchISD nodes for VPERMI_W or XVPERMI_W.
2440// On success, return true. Otherwise, return false.
2443 unsigned &MaskImm) {
2444 unsigned MaskSize = Mask.size();
2445
2446 auto isValid = [&](int M, int Off) {
2447 return (M == -1) || (M >= Off && M < Off + 4);
2448 };
2449
2450 auto buildImm = [&](int MLo, int MHi, unsigned Off, unsigned I) {
2451 auto immPart = [&](int M, unsigned Off) {
2452 return (M == -1 ? 0 : (M - Off)) & 0x3;
2453 };
2454 MaskImm |= immPart(MLo, Off) << (I * 2);
2455 MaskImm |= immPart(MHi, Off) << ((I + 1) * 2);
2456 };
2457
2458 for (unsigned i = 0; i < 4; i += 2) {
2459 int MLo = Mask[i];
2460 int MHi = Mask[i + 1];
2461
2462 if (MaskSize == 8) { // Only v8i32/v8f32 need this check.
2463 int M2Lo = Mask[i + 4];
2464 int M2Hi = Mask[i + 5];
2465 if (M2Lo != MLo + 4 || M2Hi != MHi + 4)
2466 return false;
2467 }
2468
2469 if (isValid(MLo, 0) && isValid(MHi, 0)) {
2470 SrcVec.push_back(V1);
2471 buildImm(MLo, MHi, 0, i);
2472 } else if (isValid(MLo, MaskSize) && isValid(MHi, MaskSize)) {
2473 SrcVec.push_back(V2);
2474 buildImm(MLo, MHi, MaskSize, i);
2475 } else {
2476 return false;
2477 }
2478 }
2479
2480 return true;
2481}
2482
2483/// Lower VECTOR_SHUFFLE into VPERMI (if possible).
2484///
2485/// VPERMI selects two elements from each of the two vectors based on the
2486/// mask and places them in the corresponding positions of the result vector
2487/// in order. Only v4i32 and v4f32 types are allowed.
2488///
2489/// It is possible to lower into VPERMI when the mask consists of two of the
2490/// following forms concatenated:
2491/// <i, j, u, v>
2492/// <u, v, i, j>
2493/// where i,j are in [0,4) and u,v are in [4, 8).
2494/// For example:
2495/// <2, 3, 4, 5>
2496/// <5, 7, 0, 2>
2497///
2498/// When undef's appear in the mask they are treated as if they were whatever
2499/// value is necessary in order to fit the above forms.
2501 MVT VT, SDValue V1, SDValue V2,
2502 SelectionDAG &DAG,
2503 const LoongArchSubtarget &Subtarget) {
2504 if ((VT != MVT::v4i32 && VT != MVT::v4f32) ||
2505 Mask.size() != VT.getVectorNumElements())
2506 return SDValue();
2507
2509 unsigned MaskImm = 0;
2510 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2511 return SDValue();
2512
2513 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2514 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2515}
2516
2517/// Lower VECTOR_SHUFFLE into VSHUF.
2518///
2519/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2520/// adding it as an operand to the resulting VSHUF.
2522 MVT VT, SDValue V1, SDValue V2,
2523 SelectionDAG &DAG,
2524 const LoongArchSubtarget &Subtarget) {
2525
2527 for (auto M : Mask)
2528 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2529
2530 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2531 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2532
2533 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2534 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2535 // VSHF concatenates the vectors in a bitwise fashion:
2536 // <0b00, 0b01> + <0b10, 0b11> ->
2537 // 0b0100 + 0b1110 -> 0b01001110
2538 // <0b10, 0b11, 0b00, 0b01>
2539 // We must therefore swap the operands to get the correct result.
2540 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2541}
2542
2543/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2544///
2545/// This routine breaks down the specific type of 128-bit shuffle and
2546/// dispatches to the lowering routines accordingly.
2548 SDValue V1, SDValue V2, SelectionDAG &DAG,
2549 const LoongArchSubtarget &Subtarget) {
2550 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2551 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2552 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2553 "Vector type is unsupported for lsx!");
2555 "Two operands have different types!");
2556 assert(VT.getVectorNumElements() == Mask.size() &&
2557 "Unexpected mask size for shuffle!");
2558 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2559
2560 APInt KnownUndef, KnownZero;
2561 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2562 APInt Zeroable = KnownUndef | KnownZero;
2563
2564 SDValue Result;
2565 // TODO: Add more comparison patterns.
2566 if (V2.isUndef()) {
2567 if ((Result =
2568 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2569 return Result;
2570 if ((Result =
2571 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2572 return Result;
2573 if ((Result =
2574 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2575 return Result;
2576
2577 // TODO: This comment may be enabled in the future to better match the
2578 // pattern for instruction selection.
2579 /* V2 = V1; */
2580 }
2581
2582 // It is recommended not to change the pattern comparison order for better
2583 // performance.
2584 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2585 return Result;
2586 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2587 return Result;
2588 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2589 return Result;
2590 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2591 return Result;
2592 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2593 return Result;
2594 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2595 return Result;
2596 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2597 (Result =
2598 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2599 return Result;
2600 if ((Result =
2601 lowerVECTOR_SHUFFLE_VEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2602 return Result;
2603 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2604 Zeroable)))
2605 return Result;
2606 if ((Result =
2607 lowerVECTOR_SHUFFLE_VPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2608 return Result;
2609 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2610 Zeroable)))
2611 return Result;
2612 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2613 Subtarget)))
2614 return Result;
2615 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2616 return NewShuffle;
2617 if ((Result =
2618 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2619 return Result;
2620 return SDValue();
2621}
2622
2623/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2624///
2625/// It is a XVREPLVEI when the mask is:
2626/// <x, x, x, ..., x+n, x+n, x+n, ...>
2627/// where the number of x is equal to n and n is half the length of vector.
2628///
2629/// When undef's appear in the mask they are treated as if they were whatever
2630/// value is necessary in order to fit the above form.
2631static SDValue
2633 SDValue V1, SelectionDAG &DAG,
2634 const LoongArchSubtarget &Subtarget) {
2635 int SplatIndex = -1;
2636 for (const auto &M : Mask) {
2637 if (M != -1) {
2638 SplatIndex = M;
2639 break;
2640 }
2641 }
2642
2643 if (SplatIndex == -1)
2644 return DAG.getUNDEF(VT);
2645
2646 const auto &Begin = Mask.begin();
2647 const auto &End = Mask.end();
2648 int HalfSize = Mask.size() / 2;
2649
2650 if (SplatIndex >= HalfSize)
2651 return SDValue();
2652
2653 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2654 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2655 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2656 0)) {
2657 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2658 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2659 }
2660
2661 return SDValue();
2662}
2663
2664/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2665static SDValue
2667 SDValue V1, SDValue V2, SelectionDAG &DAG,
2668 const LoongArchSubtarget &Subtarget) {
2669 // XVSHUF4I_D must be handled separately because it is different from other
2670 // types of [X]VSHUF4I instructions.
2671 if (Mask.size() == 4) {
2672 unsigned MaskImm = 0;
2673 for (int i = 1; i >= 0; --i) {
2674 int MLo = Mask[i];
2675 int MHi = Mask[i + 2];
2676 if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
2677 !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
2678 return SDValue();
2679 if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
2680 return SDValue();
2681
2682 MaskImm <<= 2;
2683 if (MLo != -1)
2684 MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
2685 else if (MHi != -1)
2686 MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
2687 }
2688
2689 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
2690 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2691 }
2692
2693 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2694}
2695
2696/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2697static SDValue
2699 SDValue V1, SDValue V2, SelectionDAG &DAG,
2700 const LoongArchSubtarget &Subtarget) {
2701 MVT GRLenVT = Subtarget.getGRLenVT();
2702 unsigned MaskSize = Mask.size();
2703 if (MaskSize != VT.getVectorNumElements())
2704 return SDValue();
2705
2706 // Consider XVPERMI_W.
2707 if (VT == MVT::v8i32 || VT == MVT::v8f32) {
2709 unsigned MaskImm = 0;
2710 if (!buildVPERMIInfo(Mask, V1, V2, SrcVec, MaskImm))
2711 return SDValue();
2712
2713 return DAG.getNode(LoongArchISD::VPERMI, DL, VT, SrcVec[1], SrcVec[0],
2714 DAG.getConstant(MaskImm, DL, GRLenVT));
2715 }
2716
2717 // Consider XVPERMI_D.
2718 if (VT == MVT::v4i64 || VT == MVT::v4f64) {
2719 unsigned MaskImm = 0;
2720 for (unsigned i = 0; i < MaskSize; ++i) {
2721 if (Mask[i] == -1)
2722 continue;
2723 if (Mask[i] >= (int)MaskSize)
2724 return SDValue();
2725 MaskImm |= Mask[i] << (i * 2);
2726 }
2727
2728 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2729 DAG.getConstant(MaskImm, DL, GRLenVT));
2730 }
2731
2732 return SDValue();
2733}
2734
2735/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2737 MVT VT, SDValue V1, SelectionDAG &DAG,
2738 const LoongArchSubtarget &Subtarget) {
2739 // LoongArch LASX only have XVPERM_W.
2740 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2741 return SDValue();
2742
2743 unsigned NumElts = VT.getVectorNumElements();
2744 unsigned HalfSize = NumElts / 2;
2745 bool FrontLo = true, FrontHi = true;
2746 bool BackLo = true, BackHi = true;
2747
2748 auto inRange = [](int val, int low, int high) {
2749 return (val == -1) || (val >= low && val < high);
2750 };
2751
2752 for (unsigned i = 0; i < HalfSize; ++i) {
2753 int Fronti = Mask[i];
2754 int Backi = Mask[i + HalfSize];
2755
2756 FrontLo &= inRange(Fronti, 0, HalfSize);
2757 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2758 BackLo &= inRange(Backi, 0, HalfSize);
2759 BackHi &= inRange(Backi, HalfSize, NumElts);
2760 }
2761
2762 // If both the lower and upper 128-bit parts access only one half of the
2763 // vector (either lower or upper), avoid using xvperm.w. The latency of
2764 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2765 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2766 return SDValue();
2767
2769 MVT GRLenVT = Subtarget.getGRLenVT();
2770 for (unsigned i = 0; i < NumElts; ++i)
2771 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2772 : DAG.getConstant(Mask[i], DL, GRLenVT));
2773 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2774
2775 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2776}
2777
2778/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2780 MVT VT, SDValue V1, SDValue V2,
2781 SelectionDAG &DAG) {
2782 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2783}
2784
2785/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2787 MVT VT, SDValue V1, SDValue V2,
2788 SelectionDAG &DAG) {
2789 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2790}
2791
2792/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2794 MVT VT, SDValue V1, SDValue V2,
2795 SelectionDAG &DAG) {
2796
2797 const auto &Begin = Mask.begin();
2798 const auto &End = Mask.end();
2799 unsigned HalfSize = Mask.size() / 2;
2800 unsigned LeftSize = HalfSize / 2;
2801 SDValue OriV1 = V1, OriV2 = V2;
2802
2803 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2804 1) &&
2805 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2806 V1 = OriV1;
2807 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2808 Mask.size() + HalfSize - LeftSize, 1) &&
2809 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2810 Mask.size() + HalfSize + LeftSize, 1))
2811 V1 = OriV2;
2812 else
2813 return SDValue();
2814
2815 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2816 1) &&
2817 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2818 1))
2819 V2 = OriV1;
2820 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2821 Mask.size() + HalfSize - LeftSize, 1) &&
2822 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2823 Mask.size() + HalfSize + LeftSize, 1))
2824 V2 = OriV2;
2825 else
2826 return SDValue();
2827
2828 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2829}
2830
2831/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2833 MVT VT, SDValue V1, SDValue V2,
2834 SelectionDAG &DAG) {
2835
2836 const auto &Begin = Mask.begin();
2837 const auto &End = Mask.end();
2838 unsigned HalfSize = Mask.size() / 2;
2839 SDValue OriV1 = V1, OriV2 = V2;
2840
2841 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2842 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2843 V1 = OriV1;
2844 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2845 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2846 Mask.size() + HalfSize, 1))
2847 V1 = OriV2;
2848 else
2849 return SDValue();
2850
2851 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2852 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2853 V2 = OriV1;
2854 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2855 1) &&
2856 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2857 Mask.size() + HalfSize, 1))
2858 V2 = OriV2;
2859 else
2860 return SDValue();
2861
2862 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2863}
2864
2865/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2867 MVT VT, SDValue V1, SDValue V2,
2868 SelectionDAG &DAG) {
2869
2870 const auto &Begin = Mask.begin();
2871 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2872 const auto &Mid = Mask.begin() + Mask.size() / 2;
2873 const auto &RightMid = Mask.end() - Mask.size() / 4;
2874 const auto &End = Mask.end();
2875 unsigned HalfSize = Mask.size() / 2;
2876 SDValue OriV1 = V1, OriV2 = V2;
2877
2878 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2879 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2880 V1 = OriV1;
2881 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2882 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2883 V1 = OriV2;
2884 else
2885 return SDValue();
2886
2887 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2888 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2889 V2 = OriV1;
2890 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2891 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2892 V2 = OriV2;
2893
2894 else
2895 return SDValue();
2896
2897 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2898}
2899
2900/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2902 MVT VT, SDValue V1, SDValue V2,
2903 SelectionDAG &DAG) {
2904
2905 const auto &Begin = Mask.begin();
2906 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2907 const auto &Mid = Mask.begin() + Mask.size() / 2;
2908 const auto &RightMid = Mask.end() - Mask.size() / 4;
2909 const auto &End = Mask.end();
2910 unsigned HalfSize = Mask.size() / 2;
2911 SDValue OriV1 = V1, OriV2 = V2;
2912
2913 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2914 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2915 V1 = OriV1;
2916 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2917 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2918 2))
2919 V1 = OriV2;
2920 else
2921 return SDValue();
2922
2923 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2924 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2925 V2 = OriV1;
2926 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2927 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2928 2))
2929 V2 = OriV2;
2930 else
2931 return SDValue();
2932
2933 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2934}
2935
2936/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
2937static SDValue
2939 SDValue V1, SDValue V2, SelectionDAG &DAG,
2940 const LoongArchSubtarget &Subtarget) {
2941 int NumElts = VT.getVectorNumElements();
2942 int HalfSize = NumElts / 2;
2943 MVT EltVT = VT.getVectorElementType();
2944 MVT GRLenVT = Subtarget.getGRLenVT();
2945
2946 if ((int)Mask.size() != NumElts)
2947 return SDValue();
2948
2949 auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
2950 SmallVector<int> DiffPos;
2951 for (int i = 0; i < NumElts; ++i) {
2952 if (Mask[i] == -1)
2953 continue;
2954 if (Mask[i] != Base + i) {
2955 DiffPos.push_back(i);
2956 if (DiffPos.size() > 2)
2957 return SDValue();
2958 }
2959 }
2960
2961 // Need exactly two differing element to lower into XVEXTRINS.
2962 // If only one differing element, the element at a distance of
2963 // HalfSize from it must be undef.
2964 if (DiffPos.size() == 1) {
2965 if (DiffPos[0] < HalfSize && Mask[DiffPos[0] + HalfSize] == -1)
2966 DiffPos.push_back(DiffPos[0] + HalfSize);
2967 else if (DiffPos[0] >= HalfSize && Mask[DiffPos[0] - HalfSize] == -1)
2968 DiffPos.insert(DiffPos.begin(), DiffPos[0] - HalfSize);
2969 else
2970 return SDValue();
2971 }
2972 if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + HalfSize)
2973 return SDValue();
2974
2975 // DiffMask must be in its low or high part.
2976 int DiffMaskLo = Mask[DiffPos[0]];
2977 int DiffMaskHi = Mask[DiffPos[1]];
2978 DiffMaskLo = DiffMaskLo == -1 ? DiffMaskHi - HalfSize : DiffMaskLo;
2979 DiffMaskHi = DiffMaskHi == -1 ? DiffMaskLo + HalfSize : DiffMaskHi;
2980 if (!(DiffMaskLo >= 0 && DiffMaskLo < HalfSize) &&
2981 !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + HalfSize))
2982 return SDValue();
2983 if (!(DiffMaskHi >= HalfSize && DiffMaskHi < NumElts) &&
2984 !(DiffMaskHi >= NumElts + HalfSize && DiffMaskHi < 2 * NumElts))
2985 return SDValue();
2986 if (DiffMaskHi != DiffMaskLo + HalfSize)
2987 return SDValue();
2988
2989 // Determine source vector and source index.
2990 SDValue SrcVec = (DiffMaskLo < HalfSize) ? V1 : V2;
2991 int SrcIdxLo =
2992 (DiffMaskLo < HalfSize) ? DiffMaskLo : (DiffMaskLo - NumElts);
2993 bool IsEltFP = EltVT.isFloatingPoint();
2994
2995 // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
2996 // the patterns of XVEXTRINS in tablegen.
2997 SDValue BaseVec = (Base == 0) ? V1 : V2;
2998 SDValue EltLo =
2999 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3000 SrcVec, DAG.getConstant(SrcIdxLo, DL, GRLenVT));
3001 SDValue InsLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, BaseVec, EltLo,
3002 DAG.getConstant(DiffPos[0], DL, GRLenVT));
3003 SDValue EltHi =
3004 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
3005 SrcVec, DAG.getConstant(SrcIdxLo + HalfSize, DL, GRLenVT));
3006 SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsLo, EltHi,
3007 DAG.getConstant(DiffPos[1], DL, GRLenVT));
3008
3009 return Result;
3010 };
3011
3012 // Try [0, n-1) insertion then [n, 2n-1) insertion.
3013 if (SDValue Result = tryLowerToExtrAndIns(0))
3014 return Result;
3015 return tryLowerToExtrAndIns(NumElts);
3016}
3017
3018/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
3019static SDValue
3021 SDValue V1, SDValue V2, SelectionDAG &DAG,
3022 const LoongArchSubtarget &Subtarget) {
3023 // LoongArch LASX only supports xvinsve0.{w/d}.
3024 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
3025 VT != MVT::v4f64)
3026 return SDValue();
3027
3028 MVT GRLenVT = Subtarget.getGRLenVT();
3029 int MaskSize = Mask.size();
3030 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
3031
3032 // Check if exactly one element of the Mask is replaced by 'Replaced', while
3033 // all other elements are either 'Base + i' or undef (-1). On success, return
3034 // the index of the replaced element. Otherwise, just return -1.
3035 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
3036 int Idx = -1;
3037 for (int i = 0; i < MaskSize; ++i) {
3038 if (Mask[i] == Base + i || Mask[i] == -1)
3039 continue;
3040 if (Mask[i] != Replaced)
3041 return -1;
3042 if (Idx == -1)
3043 Idx = i;
3044 else
3045 return -1;
3046 }
3047 return Idx;
3048 };
3049
3050 // Case 1: the lowest element of V2 replaces one element in V1.
3051 int Idx = checkReplaceOne(0, MaskSize);
3052 if (Idx != -1)
3053 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
3054 DAG.getConstant(Idx, DL, GRLenVT));
3055
3056 // Case 2: the lowest element of V1 replaces one element in V2.
3057 Idx = checkReplaceOne(MaskSize, 0);
3058 if (Idx != -1)
3059 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
3060 DAG.getConstant(Idx, DL, GRLenVT));
3061
3062 return SDValue();
3063}
3064
3065/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
3067 MVT VT, SDValue V1, SDValue V2,
3068 SelectionDAG &DAG) {
3069
3070 int MaskSize = Mask.size();
3071 int HalfSize = Mask.size() / 2;
3072 const auto &Begin = Mask.begin();
3073 const auto &Mid = Mask.begin() + HalfSize;
3074 const auto &End = Mask.end();
3075
3076 // VECTOR_SHUFFLE concatenates the vectors:
3077 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
3078 // shuffling ->
3079 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
3080 //
3081 // XVSHUF concatenates the vectors:
3082 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
3083 // shuffling ->
3084 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
3085 SmallVector<SDValue, 8> MaskAlloc;
3086 for (auto it = Begin; it < Mid; it++) {
3087 if (*it < 0) // UNDEF
3088 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3089 else if ((*it >= 0 && *it < HalfSize) ||
3090 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
3091 int M = *it < HalfSize ? *it : *it - HalfSize;
3092 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3093 } else
3094 return SDValue();
3095 }
3096 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
3097
3098 for (auto it = Mid; it < End; it++) {
3099 if (*it < 0) // UNDEF
3100 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
3101 else if ((*it >= HalfSize && *it < MaskSize) ||
3102 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
3103 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
3104 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
3105 } else
3106 return SDValue();
3107 }
3108 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
3109
3110 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
3111 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
3112 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
3113}
3114
3115/// Shuffle vectors by lane to generate more optimized instructions.
3116/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
3117///
3118/// Therefore, except for the following four cases, other cases are regarded
3119/// as cross-lane shuffles, where optimization is relatively limited.
3120///
3121/// - Shuffle high, low lanes of two inputs vector
3122/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
3123/// - Shuffle low, high lanes of two inputs vector
3124/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
3125/// - Shuffle low, low lanes of two inputs vector
3126/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
3127/// - Shuffle high, high lanes of two inputs vector
3128/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
3129///
3130/// The first case is the closest to LoongArch instructions and the other
3131/// cases need to be converted to it for processing.
3132///
3133/// This function will return true for the last three cases above and will
3134/// modify V1, V2 and Mask. Otherwise, return false for the first case and
3135/// cross-lane shuffle cases.
3137 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
3138 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
3139
3140 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
3141
3142 int MaskSize = Mask.size();
3143 int HalfSize = Mask.size() / 2;
3144 MVT GRLenVT = Subtarget.getGRLenVT();
3145
3146 HalfMaskType preMask = None, postMask = None;
3147
3148 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3149 return M < 0 || (M >= 0 && M < HalfSize) ||
3150 (M >= MaskSize && M < MaskSize + HalfSize);
3151 }))
3152 preMask = HighLaneTy;
3153 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
3154 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3155 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3156 }))
3157 preMask = LowLaneTy;
3158
3159 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3160 return M < 0 || (M >= HalfSize && M < MaskSize) ||
3161 (M >= MaskSize + HalfSize && M < MaskSize * 2);
3162 }))
3163 postMask = LowLaneTy;
3164 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
3165 return M < 0 || (M >= 0 && M < HalfSize) ||
3166 (M >= MaskSize && M < MaskSize + HalfSize);
3167 }))
3168 postMask = HighLaneTy;
3169
3170 // The pre-half of mask is high lane type, and the post-half of mask
3171 // is low lane type, which is closest to the LoongArch instructions.
3172 //
3173 // Note: In the LoongArch architecture, the high lane of mask corresponds
3174 // to the lower 128-bit of vector register, and the low lane of mask
3175 // corresponds the higher 128-bit of vector register.
3176 if (preMask == HighLaneTy && postMask == LowLaneTy) {
3177 return false;
3178 }
3179 if (preMask == LowLaneTy && postMask == HighLaneTy) {
3180 V1 = DAG.getBitcast(MVT::v4i64, V1);
3181 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3182 DAG.getConstant(0b01001110, DL, GRLenVT));
3183 V1 = DAG.getBitcast(VT, V1);
3184
3185 if (!V2.isUndef()) {
3186 V2 = DAG.getBitcast(MVT::v4i64, V2);
3187 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3188 DAG.getConstant(0b01001110, DL, GRLenVT));
3189 V2 = DAG.getBitcast(VT, V2);
3190 }
3191
3192 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3193 *it = *it < 0 ? *it : *it - HalfSize;
3194 }
3195 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3196 *it = *it < 0 ? *it : *it + HalfSize;
3197 }
3198 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
3199 V1 = DAG.getBitcast(MVT::v4i64, V1);
3200 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3201 DAG.getConstant(0b11101110, DL, GRLenVT));
3202 V1 = DAG.getBitcast(VT, V1);
3203
3204 if (!V2.isUndef()) {
3205 V2 = DAG.getBitcast(MVT::v4i64, V2);
3206 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3207 DAG.getConstant(0b11101110, DL, GRLenVT));
3208 V2 = DAG.getBitcast(VT, V2);
3209 }
3210
3211 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
3212 *it = *it < 0 ? *it : *it - HalfSize;
3213 }
3214 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
3215 V1 = DAG.getBitcast(MVT::v4i64, V1);
3216 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
3217 DAG.getConstant(0b01000100, DL, GRLenVT));
3218 V1 = DAG.getBitcast(VT, V1);
3219
3220 if (!V2.isUndef()) {
3221 V2 = DAG.getBitcast(MVT::v4i64, V2);
3222 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
3223 DAG.getConstant(0b01000100, DL, GRLenVT));
3224 V2 = DAG.getBitcast(VT, V2);
3225 }
3226
3227 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
3228 *it = *it < 0 ? *it : *it + HalfSize;
3229 }
3230 } else { // cross-lane
3231 return false;
3232 }
3233
3234 return true;
3235}
3236
3237/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
3238/// Only for 256-bit vector.
3239///
3240/// For example:
3241/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
3242/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
3243/// is lowerded to:
3244/// (XVPERMI $xr2, $xr0, 78)
3245/// (XVSHUF $xr1, $xr2, $xr0)
3246/// (XVORI $xr0, $xr1, 0)
3248 ArrayRef<int> Mask,
3249 MVT VT, SDValue V1,
3250 SDValue V2,
3251 SelectionDAG &DAG) {
3252 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
3253 int Size = Mask.size();
3254 int LaneSize = Size / 2;
3255
3256 bool LaneCrossing[2] = {false, false};
3257 for (int i = 0; i < Size; ++i)
3258 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
3259 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
3260
3261 // Ensure that all lanes ared involved.
3262 if (!LaneCrossing[0] && !LaneCrossing[1])
3263 return SDValue();
3264
3265 SmallVector<int> InLaneMask;
3266 InLaneMask.assign(Mask.begin(), Mask.end());
3267 for (int i = 0; i < Size; ++i) {
3268 int &M = InLaneMask[i];
3269 if (M < 0)
3270 continue;
3271 if (((M % Size) / LaneSize) != (i / LaneSize))
3272 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
3273 }
3274
3275 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
3276 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
3277 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
3278 Flipped = DAG.getBitcast(VT, Flipped);
3279 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
3280}
3281
3282/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
3283///
3284/// This routine breaks down the specific type of 256-bit shuffle and
3285/// dispatches to the lowering routines accordingly.
3287 SDValue V1, SDValue V2, SelectionDAG &DAG,
3288 const LoongArchSubtarget &Subtarget) {
3289 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
3290 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
3291 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
3292 "Vector type is unsupported for lasx!");
3294 "Two operands have different types!");
3295 assert(VT.getVectorNumElements() == Mask.size() &&
3296 "Unexpected mask size for shuffle!");
3297 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
3298 assert(Mask.size() >= 4 && "Mask size is less than 4.");
3299
3300 APInt KnownUndef, KnownZero;
3301 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
3302 APInt Zeroable = KnownUndef | KnownZero;
3303
3304 SDValue Result;
3305 // TODO: Add more comparison patterns.
3306 if (V2.isUndef()) {
3307 if ((Result =
3308 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
3309 return Result;
3310 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
3311 Subtarget)))
3312 return Result;
3313 // Try to widen vectors to gain more optimization opportunities.
3314 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
3315 return NewShuffle;
3316 if ((Result =
3317 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3318 return Result;
3319 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
3320 return Result;
3321 if ((Result =
3322 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
3323 return Result;
3324
3325 // TODO: This comment may be enabled in the future to better match the
3326 // pattern for instruction selection.
3327 /* V2 = V1; */
3328 }
3329
3330 // It is recommended not to change the pattern comparison order for better
3331 // performance.
3332 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
3333 return Result;
3334 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
3335 return Result;
3336 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
3337 return Result;
3338 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
3339 return Result;
3340 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
3341 return Result;
3342 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
3343 return Result;
3344 if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
3345 (Result =
3346 lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3347 return Result;
3348 if ((Result =
3349 lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3350 return Result;
3351 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
3352 Zeroable)))
3353 return Result;
3354 if ((Result =
3355 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3356 return Result;
3357 if ((Result =
3358 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
3359 return Result;
3360 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
3361 Subtarget)))
3362 return Result;
3363
3364 // canonicalize non cross-lane shuffle vector
3365 SmallVector<int> NewMask(Mask);
3366 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
3367 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
3368
3369 // FIXME: Handling the remaining cases earlier can degrade performance
3370 // in some situations. Further analysis is required to enable more
3371 // effective optimizations.
3372 if (V2.isUndef()) {
3373 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
3374 V1, V2, DAG)))
3375 return Result;
3376 }
3377
3378 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
3379 return NewShuffle;
3380 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
3381 return Result;
3382
3383 return SDValue();
3384}
3385
3386SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3387 SelectionDAG &DAG) const {
3388 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
3389 ArrayRef<int> OrigMask = SVOp->getMask();
3390 SDValue V1 = Op.getOperand(0);
3391 SDValue V2 = Op.getOperand(1);
3392 MVT VT = Op.getSimpleValueType();
3393 int NumElements = VT.getVectorNumElements();
3394 SDLoc DL(Op);
3395
3396 bool V1IsUndef = V1.isUndef();
3397 bool V2IsUndef = V2.isUndef();
3398 if (V1IsUndef && V2IsUndef)
3399 return DAG.getUNDEF(VT);
3400
3401 // When we create a shuffle node we put the UNDEF node to second operand,
3402 // but in some cases the first operand may be transformed to UNDEF.
3403 // In this case we should just commute the node.
3404 if (V1IsUndef)
3405 return DAG.getCommutedVectorShuffle(*SVOp);
3406
3407 // Check for non-undef masks pointing at an undef vector and make the masks
3408 // undef as well. This makes it easier to match the shuffle based solely on
3409 // the mask.
3410 if (V2IsUndef &&
3411 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
3412 SmallVector<int, 8> NewMask(OrigMask);
3413 for (int &M : NewMask)
3414 if (M >= NumElements)
3415 M = -1;
3416 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
3417 }
3418
3419 // Check for illegal shuffle mask element index values.
3420 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
3421 (void)MaskUpperLimit;
3422 assert(llvm::all_of(OrigMask,
3423 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
3424 "Out of bounds shuffle index");
3425
3426 // For each vector width, delegate to a specialized lowering routine.
3427 if (VT.is128BitVector())
3428 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3429
3430 if (VT.is256BitVector())
3431 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
3432
3433 return SDValue();
3434}
3435
3436SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
3437 SelectionDAG &DAG) const {
3438 // Custom lower to ensure the libcall return is passed in an FPR on hard
3439 // float ABIs.
3440 SDLoc DL(Op);
3441 MakeLibCallOptions CallOptions;
3442 SDValue Op0 = Op.getOperand(0);
3443 SDValue Chain = SDValue();
3444 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
3445 SDValue Res;
3446 std::tie(Res, Chain) =
3447 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
3448 if (Subtarget.is64Bit())
3449 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3450 return DAG.getBitcast(MVT::i32, Res);
3451}
3452
3453SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
3454 SelectionDAG &DAG) const {
3455 // Custom lower to ensure the libcall argument is passed in an FPR on hard
3456 // float ABIs.
3457 SDLoc DL(Op);
3458 MakeLibCallOptions CallOptions;
3459 SDValue Op0 = Op.getOperand(0);
3460 SDValue Chain = SDValue();
3461 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3462 DL, MVT::f32, Op0)
3463 : DAG.getBitcast(MVT::f32, Op0);
3464 SDValue Res;
3465 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
3466 CallOptions, DL, Chain);
3467 return Res;
3468}
3469
3470SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
3471 SelectionDAG &DAG) const {
3472 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3473 SDLoc DL(Op);
3474 MakeLibCallOptions CallOptions;
3475 RTLIB::Libcall LC =
3476 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
3477 SDValue Res =
3478 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
3479 if (Subtarget.is64Bit())
3480 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
3481 return DAG.getBitcast(MVT::i32, Res);
3482}
3483
3484SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
3485 SelectionDAG &DAG) const {
3486 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
3487 MVT VT = Op.getSimpleValueType();
3488 SDLoc DL(Op);
3489 Op = DAG.getNode(
3490 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
3491 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
3492 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
3493 DL, MVT::f32, Op)
3494 : DAG.getBitcast(MVT::f32, Op);
3495 if (VT != MVT::f32)
3496 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
3497 return Res;
3498}
3499
3500// Lower BUILD_VECTOR as broadcast load (if possible).
3501// For example:
3502// %a = load i8, ptr %ptr
3503// %b = build_vector %a, %a, %a, %a
3504// is lowered to :
3505// (VLDREPL_B $a0, 0)
3507 const SDLoc &DL,
3508 SelectionDAG &DAG) {
3509 MVT VT = BVOp->getSimpleValueType(0);
3510 int NumOps = BVOp->getNumOperands();
3511
3512 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3513 "Unsupported vector type for broadcast.");
3514
3515 SDValue IdentitySrc;
3516 bool IsIdeneity = true;
3517
3518 for (int i = 0; i != NumOps; i++) {
3519 SDValue Op = BVOp->getOperand(i);
3520 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3521 IsIdeneity = false;
3522 break;
3523 }
3524 IdentitySrc = BVOp->getOperand(0);
3525 }
3526
3527 // make sure that this load is valid and only has one user.
3528 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3529 return SDValue();
3530
3531 auto *LN = cast<LoadSDNode>(IdentitySrc);
3532 auto ExtType = LN->getExtensionType();
3533
3534 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3535 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3536 // Indexed loads and stores are not supported on LoongArch.
3537 assert(LN->isUnindexed() && "Unexpected indexed load.");
3538
3539 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3540 // The offset operand of unindexed load is always undefined, so there is
3541 // no need to pass it to VLDREPL.
3542 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3543 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3544 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3545 return BCast;
3546 }
3547 return SDValue();
3548}
3549
3550// Sequentially insert elements from Ops into Vector, from low to high indices.
3551// Note: Ops can have fewer elements than Vector.
3553 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3554 EVT ResTy) {
3555 assert(Ops.size() <= ResTy.getVectorNumElements());
3556
3557 SDValue Op0 = Ops[0];
3558 if (!Op0.isUndef())
3559 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3560 for (unsigned i = 1; i < Ops.size(); ++i) {
3561 SDValue Opi = Ops[i];
3562 if (Opi.isUndef())
3563 continue;
3564 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3565 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3566 }
3567}
3568
3569// Build a ResTy subvector from Node, taking NumElts elements starting at index
3570// 'first'.
3572 SelectionDAG &DAG, SDLoc DL,
3573 const LoongArchSubtarget &Subtarget,
3574 EVT ResTy, unsigned first) {
3575 unsigned NumElts = ResTy.getVectorNumElements();
3576
3577 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3578
3579 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3580 Node->op_begin() + first + NumElts);
3581 SDValue Vector = DAG.getUNDEF(ResTy);
3582 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3583 return Vector;
3584}
3585
3586SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3587 SelectionDAG &DAG) const {
3588 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3589 MVT VT = Node->getSimpleValueType(0);
3590 EVT ResTy = Op->getValueType(0);
3591 unsigned NumElts = ResTy.getVectorNumElements();
3592 SDLoc DL(Op);
3593 APInt SplatValue, SplatUndef;
3594 unsigned SplatBitSize;
3595 bool HasAnyUndefs;
3596 bool IsConstant = false;
3597 bool UseSameConstant = true;
3598 SDValue ConstantValue;
3599 bool Is128Vec = ResTy.is128BitVector();
3600 bool Is256Vec = ResTy.is256BitVector();
3601
3602 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3603 (!Subtarget.hasExtLASX() || !Is256Vec))
3604 return SDValue();
3605
3606 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3607 return Result;
3608
3609 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3610 /*MinSplatBits=*/8) &&
3611 SplatBitSize <= 64) {
3612 // We can only cope with 8, 16, 32, or 64-bit elements.
3613 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3614 SplatBitSize != 64)
3615 return SDValue();
3616
3617 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3618 // We can only handle 64-bit elements that are within
3619 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3620 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3621 if (!SplatValue.isSignedIntN(10) &&
3622 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3623 return SDValue();
3624 if ((Is128Vec && ResTy == MVT::v4i32) ||
3625 (Is256Vec && ResTy == MVT::v8i32))
3626 return Op;
3627 }
3628
3629 EVT ViaVecTy;
3630
3631 switch (SplatBitSize) {
3632 default:
3633 return SDValue();
3634 case 8:
3635 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3636 break;
3637 case 16:
3638 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3639 break;
3640 case 32:
3641 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3642 break;
3643 case 64:
3644 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3645 break;
3646 }
3647
3648 // SelectionDAG::getConstant will promote SplatValue appropriately.
3649 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3650
3651 // Bitcast to the type we originally wanted.
3652 if (ViaVecTy != ResTy)
3653 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3654
3655 return Result;
3656 }
3657
3658 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3659 return Op;
3660
3661 for (unsigned i = 0; i < NumElts; ++i) {
3662 SDValue Opi = Node->getOperand(i);
3663 if (isIntOrFPConstant(Opi)) {
3664 IsConstant = true;
3665 if (!ConstantValue.getNode())
3666 ConstantValue = Opi;
3667 else if (ConstantValue != Opi)
3668 UseSameConstant = false;
3669 }
3670 }
3671
3672 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3673 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3674 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3675 for (unsigned i = 0; i < NumElts; ++i) {
3676 SDValue Opi = Node->getOperand(i);
3677 if (!isIntOrFPConstant(Opi))
3678 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3679 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3680 }
3681 return Result;
3682 }
3683
3684 if (!IsConstant) {
3685 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3686 // the sub-sequence of the vector and then broadcast the sub-sequence.
3687 //
3688 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3689 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3690 // generates worse code in some cases. This could be further optimized
3691 // with more consideration.
3693 BitVector UndefElements;
3694 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3695 UndefElements.count() == 0) {
3696 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3697 // because the high part can be simply treated as undef.
3698 SDValue Vector = DAG.getUNDEF(ResTy);
3699 EVT FillTy = Is256Vec
3701 : ResTy;
3702 SDValue FillVec =
3703 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3704
3705 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3706
3707 unsigned SeqLen = Sequence.size();
3708 unsigned SplatLen = NumElts / SeqLen;
3709 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3710 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3711
3712 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3713 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3714 if (SplatEltTy == MVT::i128)
3715 SplatTy = MVT::v4i64;
3716
3717 SDValue SplatVec;
3718 SDValue SrcVec = DAG.getBitcast(
3719 SplatTy,
3720 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3721 if (Is256Vec) {
3722 SplatVec =
3723 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3724 : LoongArchISD::XVREPLVE0,
3725 DL, SplatTy, SrcVec);
3726 } else {
3727 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3728 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3729 }
3730
3731 return DAG.getBitcast(ResTy, SplatVec);
3732 }
3733
3734 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3735 // using memory operations is much lower.
3736 //
3737 // For 256-bit vectors, normally split into two halves and concatenate.
3738 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3739 // one non-undef element, skip spliting to avoid a worse result.
3740 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3741 ResTy == MVT::v4f64) {
3742 unsigned NonUndefCount = 0;
3743 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3744 if (!Node->getOperand(i).isUndef()) {
3745 ++NonUndefCount;
3746 if (NonUndefCount > 1)
3747 break;
3748 }
3749 }
3750 if (NonUndefCount == 1)
3751 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3752 }
3753
3754 EVT VecTy =
3755 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3756 SDValue Vector =
3757 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3758
3759 if (Is128Vec)
3760 return Vector;
3761
3762 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3763 VecTy, NumElts / 2);
3764
3765 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3766 }
3767
3768 return SDValue();
3769}
3770
3771SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3772 SelectionDAG &DAG) const {
3773 SDLoc DL(Op);
3774 MVT ResVT = Op.getSimpleValueType();
3775 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3776
3777 unsigned NumOperands = Op.getNumOperands();
3778 unsigned NumFreezeUndef = 0;
3779 unsigned NumZero = 0;
3780 unsigned NumNonZero = 0;
3781 unsigned NonZeros = 0;
3782 SmallSet<SDValue, 4> Undefs;
3783 for (unsigned i = 0; i != NumOperands; ++i) {
3784 SDValue SubVec = Op.getOperand(i);
3785 if (SubVec.isUndef())
3786 continue;
3787 if (ISD::isFreezeUndef(SubVec.getNode())) {
3788 // If the freeze(undef) has multiple uses then we must fold to zero.
3789 if (SubVec.hasOneUse()) {
3790 ++NumFreezeUndef;
3791 } else {
3792 ++NumZero;
3793 Undefs.insert(SubVec);
3794 }
3795 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3796 ++NumZero;
3797 else {
3798 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3799 NonZeros |= 1 << i;
3800 ++NumNonZero;
3801 }
3802 }
3803
3804 // If we have more than 2 non-zeros, build each half separately.
3805 if (NumNonZero > 2) {
3806 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3807 ArrayRef<SDUse> Ops = Op->ops();
3808 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3809 Ops.slice(0, NumOperands / 2));
3810 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3811 Ops.slice(NumOperands / 2));
3812 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3813 }
3814
3815 // Otherwise, build it up through insert_subvectors.
3816 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3817 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3818 : DAG.getUNDEF(ResVT));
3819
3820 // Replace Undef operands with ZeroVector.
3821 for (SDValue U : Undefs)
3822 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3823
3824 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3825 unsigned NumSubElems = SubVT.getVectorNumElements();
3826 for (unsigned i = 0; i != NumOperands; ++i) {
3827 if ((NonZeros & (1 << i)) == 0)
3828 continue;
3829
3830 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3831 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3832 }
3833
3834 return Vec;
3835}
3836
3837SDValue
3838LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3839 SelectionDAG &DAG) const {
3840 MVT EltVT = Op.getSimpleValueType();
3841 SDValue Vec = Op->getOperand(0);
3842 EVT VecTy = Vec->getValueType(0);
3843 SDValue Idx = Op->getOperand(1);
3844 SDLoc DL(Op);
3845 MVT GRLenVT = Subtarget.getGRLenVT();
3846
3847 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3848
3849 if (isa<ConstantSDNode>(Idx))
3850 return Op;
3851
3852 switch (VecTy.getSimpleVT().SimpleTy) {
3853 default:
3854 llvm_unreachable("Unexpected type");
3855 case MVT::v32i8:
3856 case MVT::v16i16:
3857 case MVT::v4i64:
3858 case MVT::v4f64: {
3859 // Extract the high half subvector and place it to the low half of a new
3860 // vector. It doesn't matter what the high half of the new vector is.
3861 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3862 SDValue VecHi =
3863 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3864 SDValue TmpVec =
3865 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3866 VecHi, DAG.getConstant(0, DL, GRLenVT));
3867
3868 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3869 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3870 // desired element.
3871 SDValue IdxCp =
3872 Subtarget.is64Bit()
3873 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3874 : DAG.getBitcast(MVT::f32, Idx);
3875 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3876 SDValue MaskVec =
3877 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3878 SDValue ResVec =
3879 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3880
3881 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3882 DAG.getConstant(0, DL, GRLenVT));
3883 }
3884 case MVT::v8i32:
3885 case MVT::v8f32: {
3886 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3887 SDValue SplatValue =
3888 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3889
3890 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3891 DAG.getConstant(0, DL, GRLenVT));
3892 }
3893 }
3894}
3895
3896SDValue
3897LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3898 SelectionDAG &DAG) const {
3899 MVT VT = Op.getSimpleValueType();
3900 MVT EltVT = VT.getVectorElementType();
3901 unsigned NumElts = VT.getVectorNumElements();
3902 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3903 SDLoc DL(Op);
3904 SDValue Op0 = Op.getOperand(0);
3905 SDValue Op1 = Op.getOperand(1);
3906 SDValue Op2 = Op.getOperand(2);
3907
3908 if (isa<ConstantSDNode>(Op2))
3909 return Op;
3910
3911 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3912 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3913
3914 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3915 return SDValue();
3916
3917 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3918 SmallVector<SDValue, 32> RawIndices;
3919 SDValue SplatIdx;
3920 SDValue Indices;
3921
3922 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3923 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3924 for (unsigned i = 0; i < NumElts; ++i) {
3925 RawIndices.push_back(Op2);
3926 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3927 }
3928 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3929 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3930
3931 RawIndices.clear();
3932 for (unsigned i = 0; i < NumElts; ++i) {
3933 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3934 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3935 }
3936 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3937 Indices = DAG.getBitcast(IdxVTy, Indices);
3938 } else {
3939 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3940
3941 for (unsigned i = 0; i < NumElts; ++i)
3942 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3943 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3944 }
3945
3946 // insert vec, elt, idx
3947 // =>
3948 // select (splatidx == {0,1,2...}) ? splatelt : vec
3949 SDValue SelectCC =
3950 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3951 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3952}
3953
3954SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3955 SelectionDAG &DAG) const {
3956 SDLoc DL(Op);
3957 SyncScope::ID FenceSSID =
3958 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3959
3960 // singlethread fences only synchronize with signal handlers on the same
3961 // thread and thus only need to preserve instruction order, not actually
3962 // enforce memory ordering.
3963 if (FenceSSID == SyncScope::SingleThread)
3964 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3965 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3966
3967 return Op;
3968}
3969
3970SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3971 SelectionDAG &DAG) const {
3972
3973 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3974 DAG.getContext()->emitError(
3975 "On LA64, only 64-bit registers can be written.");
3976 return Op.getOperand(0);
3977 }
3978
3979 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3980 DAG.getContext()->emitError(
3981 "On LA32, only 32-bit registers can be written.");
3982 return Op.getOperand(0);
3983 }
3984
3985 return Op;
3986}
3987
3988SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3989 SelectionDAG &DAG) const {
3990 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3991 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3992 "be a constant integer");
3993 return SDValue();
3994 }
3995
3996 MachineFunction &MF = DAG.getMachineFunction();
3998 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3999 EVT VT = Op.getValueType();
4000 SDLoc DL(Op);
4001 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
4002 unsigned Depth = Op.getConstantOperandVal(0);
4003 int GRLenInBytes = Subtarget.getGRLen() / 8;
4004
4005 while (Depth--) {
4006 int Offset = -(GRLenInBytes * 2);
4007 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
4008 DAG.getSignedConstant(Offset, DL, VT));
4009 FrameAddr =
4010 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
4011 }
4012 return FrameAddr;
4013}
4014
4015SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
4016 SelectionDAG &DAG) const {
4017 // Currently only support lowering return address for current frame.
4018 if (Op.getConstantOperandVal(0) != 0) {
4019 DAG.getContext()->emitError(
4020 "return address can only be determined for the current frame");
4021 return SDValue();
4022 }
4023
4024 MachineFunction &MF = DAG.getMachineFunction();
4026 MVT GRLenVT = Subtarget.getGRLenVT();
4027
4028 // Return the value of the return address register, marking it an implicit
4029 // live-in.
4030 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
4031 getRegClassFor(GRLenVT));
4032 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
4033}
4034
4035SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
4036 SelectionDAG &DAG) const {
4037 MachineFunction &MF = DAG.getMachineFunction();
4038 auto Size = Subtarget.getGRLen() / 8;
4039 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
4040 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4041}
4042
4043SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
4044 SelectionDAG &DAG) const {
4045 MachineFunction &MF = DAG.getMachineFunction();
4046 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
4047
4048 SDLoc DL(Op);
4049 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4051
4052 // vastart just stores the address of the VarArgsFrameIndex slot into the
4053 // memory location argument.
4054 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4055 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
4056 MachinePointerInfo(SV));
4057}
4058
4059SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
4060 SelectionDAG &DAG) const {
4061 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4062 !Subtarget.hasBasicD() && "unexpected target features");
4063
4064 SDLoc DL(Op);
4065 SDValue Op0 = Op.getOperand(0);
4066 if (Op0->getOpcode() == ISD::AND) {
4067 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
4068 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
4069 return Op;
4070 }
4071
4072 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
4073 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
4074 Op0.getConstantOperandVal(2) == UINT64_C(0))
4075 return Op;
4076
4077 if (Op0.getOpcode() == ISD::AssertZext &&
4078 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
4079 return Op;
4080
4081 EVT OpVT = Op0.getValueType();
4082 EVT RetVT = Op.getValueType();
4083 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
4084 MakeLibCallOptions CallOptions;
4085 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4086 SDValue Chain = SDValue();
4088 std::tie(Result, Chain) =
4089 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4090 return Result;
4091}
4092
4093SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
4094 SelectionDAG &DAG) const {
4095 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
4096 !Subtarget.hasBasicD() && "unexpected target features");
4097
4098 SDLoc DL(Op);
4099 SDValue Op0 = Op.getOperand(0);
4100
4101 if ((Op0.getOpcode() == ISD::AssertSext ||
4103 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
4104 return Op;
4105
4106 EVT OpVT = Op0.getValueType();
4107 EVT RetVT = Op.getValueType();
4108 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
4109 MakeLibCallOptions CallOptions;
4110 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
4111 SDValue Chain = SDValue();
4113 std::tie(Result, Chain) =
4114 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
4115 return Result;
4116}
4117
4118SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
4119 SelectionDAG &DAG) const {
4120
4121 SDLoc DL(Op);
4122 EVT VT = Op.getValueType();
4123 SDValue Op0 = Op.getOperand(0);
4124 EVT Op0VT = Op0.getValueType();
4125
4126 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
4127 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
4128 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
4129 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
4130 }
4131 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
4132 SDValue Lo, Hi;
4133 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
4134 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
4135 }
4136 return Op;
4137}
4138
4139SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
4140 SelectionDAG &DAG) const {
4141
4142 SDLoc DL(Op);
4143 SDValue Op0 = Op.getOperand(0);
4144
4145 if (Op0.getValueType() == MVT::f16)
4146 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
4147
4148 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
4149 !Subtarget.hasBasicD()) {
4150 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
4151 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
4152 }
4153
4154 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
4155 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
4156 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
4157}
4158
4160 SelectionDAG &DAG, unsigned Flags) {
4161 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
4162}
4163
4165 SelectionDAG &DAG, unsigned Flags) {
4166 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
4167 Flags);
4168}
4169
4171 SelectionDAG &DAG, unsigned Flags) {
4172 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
4173 N->getOffset(), Flags);
4174}
4175
4177 SelectionDAG &DAG, unsigned Flags) {
4178 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
4179}
4180
4181template <class NodeTy>
4182SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4184 bool IsLocal) const {
4185 SDLoc DL(N);
4186 EVT Ty = getPointerTy(DAG.getDataLayout());
4187 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
4188 SDValue Load;
4189
4190 switch (M) {
4191 default:
4192 report_fatal_error("Unsupported code model");
4193
4194 case CodeModel::Large: {
4195 assert(Subtarget.is64Bit() && "Large code model requires LA64");
4196
4197 // This is not actually used, but is necessary for successfully matching
4198 // the PseudoLA_*_LARGE nodes.
4199 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4200 if (IsLocal) {
4201 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
4202 // eventually becomes the desired 5-insn code sequence.
4203 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
4204 Tmp, Addr),
4205 0);
4206 } else {
4207 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
4208 // eventually becomes the desired 5-insn code sequence.
4209 Load = SDValue(
4210 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
4211 0);
4212 }
4213 break;
4214 }
4215
4216 case CodeModel::Small:
4217 case CodeModel::Medium:
4218 if (IsLocal) {
4219 // This generates the pattern (PseudoLA_PCREL sym), which
4220 //
4221 // for la32r expands to:
4222 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4223 //
4224 // for la32s and la64 expands to:
4225 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
4226 Load = SDValue(
4227 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
4228 } else {
4229 // This generates the pattern (PseudoLA_GOT sym), which
4230 //
4231 // for la32r expands to:
4232 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
4233 //
4234 // for la32s and la64 expands to:
4235 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
4236 Load =
4237 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
4238 }
4239 }
4240
4241 if (!IsLocal) {
4242 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4243 MachineFunction &MF = DAG.getMachineFunction();
4244 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4248 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4249 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
4250 }
4251
4252 return Load;
4253}
4254
4255SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
4256 SelectionDAG &DAG) const {
4257 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
4258 DAG.getTarget().getCodeModel());
4259}
4260
4261SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
4262 SelectionDAG &DAG) const {
4263 return getAddr(cast<JumpTableSDNode>(Op), DAG,
4264 DAG.getTarget().getCodeModel());
4265}
4266
4267SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
4268 SelectionDAG &DAG) const {
4269 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
4270 DAG.getTarget().getCodeModel());
4271}
4272
4273SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
4274 SelectionDAG &DAG) const {
4275 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4276 assert(N->getOffset() == 0 && "unexpected offset in global node");
4277 auto CM = DAG.getTarget().getCodeModel();
4278 const GlobalValue *GV = N->getGlobal();
4279
4280 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
4281 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
4282 CM = *GCM;
4283 }
4284
4285 return getAddr(N, DAG, CM, GV->isDSOLocal());
4286}
4287
4288SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
4289 SelectionDAG &DAG,
4290 unsigned Opc, bool UseGOT,
4291 bool Large) const {
4292 SDLoc DL(N);
4293 EVT Ty = getPointerTy(DAG.getDataLayout());
4294 MVT GRLenVT = Subtarget.getGRLenVT();
4295
4296 // This is not actually used, but is necessary for successfully matching the
4297 // PseudoLA_*_LARGE nodes.
4298 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4299 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4300
4301 // Only IE needs an extra argument for large code model.
4302 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
4303 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4304 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4305
4306 // If it is LE for normal/medium code model, the add tp operation will occur
4307 // during the pseudo-instruction expansion.
4308 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
4309 return Offset;
4310
4311 if (UseGOT) {
4312 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
4313 MachineFunction &MF = DAG.getMachineFunction();
4314 MachineMemOperand *MemOp = MF.getMachineMemOperand(
4318 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
4319 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
4320 }
4321
4322 // Add the thread pointer.
4323 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
4324 DAG.getRegister(LoongArch::R2, GRLenVT));
4325}
4326
4327SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
4328 SelectionDAG &DAG,
4329 unsigned Opc,
4330 bool Large) const {
4331 SDLoc DL(N);
4332 EVT Ty = getPointerTy(DAG.getDataLayout());
4333 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
4334
4335 // This is not actually used, but is necessary for successfully matching the
4336 // PseudoLA_*_LARGE nodes.
4337 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4338
4339 // Use a PC-relative addressing mode to access the dynamic GOT address.
4340 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
4341 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4342 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4343
4344 // Prepare argument list to generate call.
4346 Args.emplace_back(Load, CallTy);
4347
4348 // Setup call to __tls_get_addr.
4349 TargetLowering::CallLoweringInfo CLI(DAG);
4350 CLI.setDebugLoc(DL)
4351 .setChain(DAG.getEntryNode())
4352 .setLibCallee(CallingConv::C, CallTy,
4353 DAG.getExternalSymbol("__tls_get_addr", Ty),
4354 std::move(Args));
4355
4356 return LowerCallTo(CLI).first;
4357}
4358
4359SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
4360 SelectionDAG &DAG, unsigned Opc,
4361 bool Large) const {
4362 SDLoc DL(N);
4363 EVT Ty = getPointerTy(DAG.getDataLayout());
4364 const GlobalValue *GV = N->getGlobal();
4365
4366 // This is not actually used, but is necessary for successfully matching the
4367 // PseudoLA_*_LARGE nodes.
4368 SDValue Tmp = DAG.getConstant(0, DL, Ty);
4369
4370 // Use a PC-relative addressing mode to access the global dynamic GOT address.
4371 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
4372 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
4373 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
4374 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
4375}
4376
4377SDValue
4378LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
4379 SelectionDAG &DAG) const {
4382 report_fatal_error("In GHC calling convention TLS is not supported");
4383
4384 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
4385 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
4386
4387 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
4388 assert(N->getOffset() == 0 && "unexpected offset in global node");
4389
4390 if (DAG.getTarget().useEmulatedTLS())
4391 reportFatalUsageError("the emulated TLS is prohibited");
4392
4393 bool IsDesc = DAG.getTarget().useTLSDESC();
4394
4395 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
4397 // In this model, application code calls the dynamic linker function
4398 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
4399 // runtime.
4400 if (!IsDesc)
4401 return getDynamicTLSAddr(N, DAG,
4402 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
4403 : LoongArch::PseudoLA_TLS_GD,
4404 Large);
4405 break;
4407 // Same as GeneralDynamic, except for assembly modifiers and relocation
4408 // records.
4409 if (!IsDesc)
4410 return getDynamicTLSAddr(N, DAG,
4411 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
4412 : LoongArch::PseudoLA_TLS_LD,
4413 Large);
4414 break;
4416 // This model uses the GOT to resolve TLS offsets.
4417 return getStaticTLSAddr(N, DAG,
4418 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
4419 : LoongArch::PseudoLA_TLS_IE,
4420 /*UseGOT=*/true, Large);
4422 // This model is used when static linking as the TLS offsets are resolved
4423 // during program linking.
4424 //
4425 // This node doesn't need an extra argument for the large code model.
4426 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
4427 /*UseGOT=*/false, Large);
4428 }
4429
4430 return getTLSDescAddr(N, DAG,
4431 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
4432 : LoongArch::PseudoLA_TLS_DESC,
4433 Large);
4434}
4435
4436template <unsigned N>
4438 SelectionDAG &DAG, bool IsSigned = false) {
4439 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
4440 // Check the ImmArg.
4441 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
4442 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
4443 DAG.getContext()->emitError(Op->getOperationName(0) +
4444 ": argument out of range.");
4445 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
4446 }
4447 return SDValue();
4448}
4449
4450SDValue
4451LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4452 SelectionDAG &DAG) const {
4453 switch (Op.getConstantOperandVal(0)) {
4454 default:
4455 return SDValue(); // Don't custom lower most intrinsics.
4456 case Intrinsic::thread_pointer: {
4457 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4458 return DAG.getRegister(LoongArch::R2, PtrVT);
4459 }
4460 case Intrinsic::loongarch_lsx_vpickve2gr_d:
4461 case Intrinsic::loongarch_lsx_vpickve2gr_du:
4462 case Intrinsic::loongarch_lsx_vreplvei_d:
4463 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
4464 return checkIntrinsicImmArg<1>(Op, 2, DAG);
4465 case Intrinsic::loongarch_lsx_vreplvei_w:
4466 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
4467 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
4468 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
4469 case Intrinsic::loongarch_lasx_xvpickve_d:
4470 case Intrinsic::loongarch_lasx_xvpickve_d_f:
4471 return checkIntrinsicImmArg<2>(Op, 2, DAG);
4472 case Intrinsic::loongarch_lasx_xvinsve0_d:
4473 return checkIntrinsicImmArg<2>(Op, 3, DAG);
4474 case Intrinsic::loongarch_lsx_vsat_b:
4475 case Intrinsic::loongarch_lsx_vsat_bu:
4476 case Intrinsic::loongarch_lsx_vrotri_b:
4477 case Intrinsic::loongarch_lsx_vsllwil_h_b:
4478 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
4479 case Intrinsic::loongarch_lsx_vsrlri_b:
4480 case Intrinsic::loongarch_lsx_vsrari_b:
4481 case Intrinsic::loongarch_lsx_vreplvei_h:
4482 case Intrinsic::loongarch_lasx_xvsat_b:
4483 case Intrinsic::loongarch_lasx_xvsat_bu:
4484 case Intrinsic::loongarch_lasx_xvrotri_b:
4485 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
4486 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
4487 case Intrinsic::loongarch_lasx_xvsrlri_b:
4488 case Intrinsic::loongarch_lasx_xvsrari_b:
4489 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
4490 case Intrinsic::loongarch_lasx_xvpickve_w:
4491 case Intrinsic::loongarch_lasx_xvpickve_w_f:
4492 return checkIntrinsicImmArg<3>(Op, 2, DAG);
4493 case Intrinsic::loongarch_lasx_xvinsve0_w:
4494 return checkIntrinsicImmArg<3>(Op, 3, DAG);
4495 case Intrinsic::loongarch_lsx_vsat_h:
4496 case Intrinsic::loongarch_lsx_vsat_hu:
4497 case Intrinsic::loongarch_lsx_vrotri_h:
4498 case Intrinsic::loongarch_lsx_vsllwil_w_h:
4499 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
4500 case Intrinsic::loongarch_lsx_vsrlri_h:
4501 case Intrinsic::loongarch_lsx_vsrari_h:
4502 case Intrinsic::loongarch_lsx_vreplvei_b:
4503 case Intrinsic::loongarch_lasx_xvsat_h:
4504 case Intrinsic::loongarch_lasx_xvsat_hu:
4505 case Intrinsic::loongarch_lasx_xvrotri_h:
4506 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
4507 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
4508 case Intrinsic::loongarch_lasx_xvsrlri_h:
4509 case Intrinsic::loongarch_lasx_xvsrari_h:
4510 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4511 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4512 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4513 case Intrinsic::loongarch_lsx_vsrani_b_h:
4514 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4515 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4516 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4517 case Intrinsic::loongarch_lsx_vssrani_b_h:
4518 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4519 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4520 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4521 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4522 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4523 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4524 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4525 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4526 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4527 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4528 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4529 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4530 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4531 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4532 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4533 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4534 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4535 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4536 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4537 case Intrinsic::loongarch_lsx_vsat_w:
4538 case Intrinsic::loongarch_lsx_vsat_wu:
4539 case Intrinsic::loongarch_lsx_vrotri_w:
4540 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4541 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4542 case Intrinsic::loongarch_lsx_vsrlri_w:
4543 case Intrinsic::loongarch_lsx_vsrari_w:
4544 case Intrinsic::loongarch_lsx_vslei_bu:
4545 case Intrinsic::loongarch_lsx_vslei_hu:
4546 case Intrinsic::loongarch_lsx_vslei_wu:
4547 case Intrinsic::loongarch_lsx_vslei_du:
4548 case Intrinsic::loongarch_lsx_vslti_bu:
4549 case Intrinsic::loongarch_lsx_vslti_hu:
4550 case Intrinsic::loongarch_lsx_vslti_wu:
4551 case Intrinsic::loongarch_lsx_vslti_du:
4552 case Intrinsic::loongarch_lsx_vbsll_v:
4553 case Intrinsic::loongarch_lsx_vbsrl_v:
4554 case Intrinsic::loongarch_lasx_xvsat_w:
4555 case Intrinsic::loongarch_lasx_xvsat_wu:
4556 case Intrinsic::loongarch_lasx_xvrotri_w:
4557 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4558 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4559 case Intrinsic::loongarch_lasx_xvsrlri_w:
4560 case Intrinsic::loongarch_lasx_xvsrari_w:
4561 case Intrinsic::loongarch_lasx_xvslei_bu:
4562 case Intrinsic::loongarch_lasx_xvslei_hu:
4563 case Intrinsic::loongarch_lasx_xvslei_wu:
4564 case Intrinsic::loongarch_lasx_xvslei_du:
4565 case Intrinsic::loongarch_lasx_xvslti_bu:
4566 case Intrinsic::loongarch_lasx_xvslti_hu:
4567 case Intrinsic::loongarch_lasx_xvslti_wu:
4568 case Intrinsic::loongarch_lasx_xvslti_du:
4569 case Intrinsic::loongarch_lasx_xvbsll_v:
4570 case Intrinsic::loongarch_lasx_xvbsrl_v:
4571 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4572 case Intrinsic::loongarch_lsx_vseqi_b:
4573 case Intrinsic::loongarch_lsx_vseqi_h:
4574 case Intrinsic::loongarch_lsx_vseqi_w:
4575 case Intrinsic::loongarch_lsx_vseqi_d:
4576 case Intrinsic::loongarch_lsx_vslei_b:
4577 case Intrinsic::loongarch_lsx_vslei_h:
4578 case Intrinsic::loongarch_lsx_vslei_w:
4579 case Intrinsic::loongarch_lsx_vslei_d:
4580 case Intrinsic::loongarch_lsx_vslti_b:
4581 case Intrinsic::loongarch_lsx_vslti_h:
4582 case Intrinsic::loongarch_lsx_vslti_w:
4583 case Intrinsic::loongarch_lsx_vslti_d:
4584 case Intrinsic::loongarch_lasx_xvseqi_b:
4585 case Intrinsic::loongarch_lasx_xvseqi_h:
4586 case Intrinsic::loongarch_lasx_xvseqi_w:
4587 case Intrinsic::loongarch_lasx_xvseqi_d:
4588 case Intrinsic::loongarch_lasx_xvslei_b:
4589 case Intrinsic::loongarch_lasx_xvslei_h:
4590 case Intrinsic::loongarch_lasx_xvslei_w:
4591 case Intrinsic::loongarch_lasx_xvslei_d:
4592 case Intrinsic::loongarch_lasx_xvslti_b:
4593 case Intrinsic::loongarch_lasx_xvslti_h:
4594 case Intrinsic::loongarch_lasx_xvslti_w:
4595 case Intrinsic::loongarch_lasx_xvslti_d:
4596 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4597 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4598 case Intrinsic::loongarch_lsx_vsrani_h_w:
4599 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4600 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4601 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4602 case Intrinsic::loongarch_lsx_vssrani_h_w:
4603 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4604 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4605 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4606 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4607 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4608 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4609 case Intrinsic::loongarch_lsx_vfrstpi_b:
4610 case Intrinsic::loongarch_lsx_vfrstpi_h:
4611 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4612 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4613 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4614 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4615 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4616 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4617 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4618 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4619 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4620 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4621 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4622 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4623 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4624 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4625 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4626 case Intrinsic::loongarch_lsx_vsat_d:
4627 case Intrinsic::loongarch_lsx_vsat_du:
4628 case Intrinsic::loongarch_lsx_vrotri_d:
4629 case Intrinsic::loongarch_lsx_vsrlri_d:
4630 case Intrinsic::loongarch_lsx_vsrari_d:
4631 case Intrinsic::loongarch_lasx_xvsat_d:
4632 case Intrinsic::loongarch_lasx_xvsat_du:
4633 case Intrinsic::loongarch_lasx_xvrotri_d:
4634 case Intrinsic::loongarch_lasx_xvsrlri_d:
4635 case Intrinsic::loongarch_lasx_xvsrari_d:
4636 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4637 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4638 case Intrinsic::loongarch_lsx_vsrani_w_d:
4639 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4640 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4641 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4642 case Intrinsic::loongarch_lsx_vssrani_w_d:
4643 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4644 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4645 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4646 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4647 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4648 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4649 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4650 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4651 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4652 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4653 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4654 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4655 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4656 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4657 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4658 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4659 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4660 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4661 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4662 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4663 case Intrinsic::loongarch_lsx_vsrani_d_q:
4664 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4665 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4666 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4667 case Intrinsic::loongarch_lsx_vssrani_d_q:
4668 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4669 case Intrinsic::loongarch_lsx_vssrani_du_q:
4670 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4671 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4672 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4673 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4674 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4675 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4676 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4677 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4678 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4679 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4680 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4681 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4682 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4683 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4684 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4685 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4686 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4687 case Intrinsic::loongarch_lsx_vnori_b:
4688 case Intrinsic::loongarch_lsx_vshuf4i_b:
4689 case Intrinsic::loongarch_lsx_vshuf4i_h:
4690 case Intrinsic::loongarch_lsx_vshuf4i_w:
4691 case Intrinsic::loongarch_lasx_xvnori_b:
4692 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4693 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4694 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4695 case Intrinsic::loongarch_lasx_xvpermi_d:
4696 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4697 case Intrinsic::loongarch_lsx_vshuf4i_d:
4698 case Intrinsic::loongarch_lsx_vpermi_w:
4699 case Intrinsic::loongarch_lsx_vbitseli_b:
4700 case Intrinsic::loongarch_lsx_vextrins_b:
4701 case Intrinsic::loongarch_lsx_vextrins_h:
4702 case Intrinsic::loongarch_lsx_vextrins_w:
4703 case Intrinsic::loongarch_lsx_vextrins_d:
4704 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4705 case Intrinsic::loongarch_lasx_xvpermi_w:
4706 case Intrinsic::loongarch_lasx_xvpermi_q:
4707 case Intrinsic::loongarch_lasx_xvbitseli_b:
4708 case Intrinsic::loongarch_lasx_xvextrins_b:
4709 case Intrinsic::loongarch_lasx_xvextrins_h:
4710 case Intrinsic::loongarch_lasx_xvextrins_w:
4711 case Intrinsic::loongarch_lasx_xvextrins_d:
4712 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4713 case Intrinsic::loongarch_lsx_vrepli_b:
4714 case Intrinsic::loongarch_lsx_vrepli_h:
4715 case Intrinsic::loongarch_lsx_vrepli_w:
4716 case Intrinsic::loongarch_lsx_vrepli_d:
4717 case Intrinsic::loongarch_lasx_xvrepli_b:
4718 case Intrinsic::loongarch_lasx_xvrepli_h:
4719 case Intrinsic::loongarch_lasx_xvrepli_w:
4720 case Intrinsic::loongarch_lasx_xvrepli_d:
4721 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4722 case Intrinsic::loongarch_lsx_vldi:
4723 case Intrinsic::loongarch_lasx_xvldi:
4724 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4725 }
4726}
4727
4728// Helper function that emits error message for intrinsics with chain and return
4729// merge values of a UNDEF and the chain.
4731 StringRef ErrorMsg,
4732 SelectionDAG &DAG) {
4733 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4734 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4735 SDLoc(Op));
4736}
4737
4738SDValue
4739LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4740 SelectionDAG &DAG) const {
4741 SDLoc DL(Op);
4742 MVT GRLenVT = Subtarget.getGRLenVT();
4743 EVT VT = Op.getValueType();
4744 SDValue Chain = Op.getOperand(0);
4745 const StringRef ErrorMsgOOR = "argument out of range";
4746 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4747 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4748
4749 switch (Op.getConstantOperandVal(1)) {
4750 default:
4751 return Op;
4752 case Intrinsic::loongarch_crc_w_b_w:
4753 case Intrinsic::loongarch_crc_w_h_w:
4754 case Intrinsic::loongarch_crc_w_w_w:
4755 case Intrinsic::loongarch_crc_w_d_w:
4756 case Intrinsic::loongarch_crcc_w_b_w:
4757 case Intrinsic::loongarch_crcc_w_h_w:
4758 case Intrinsic::loongarch_crcc_w_w_w:
4759 case Intrinsic::loongarch_crcc_w_d_w:
4760 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4761 case Intrinsic::loongarch_csrrd_w:
4762 case Intrinsic::loongarch_csrrd_d: {
4763 unsigned Imm = Op.getConstantOperandVal(2);
4764 return !isUInt<14>(Imm)
4765 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4766 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4767 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4768 }
4769 case Intrinsic::loongarch_csrwr_w:
4770 case Intrinsic::loongarch_csrwr_d: {
4771 unsigned Imm = Op.getConstantOperandVal(3);
4772 return !isUInt<14>(Imm)
4773 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4774 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4775 {Chain, Op.getOperand(2),
4776 DAG.getConstant(Imm, DL, GRLenVT)});
4777 }
4778 case Intrinsic::loongarch_csrxchg_w:
4779 case Intrinsic::loongarch_csrxchg_d: {
4780 unsigned Imm = Op.getConstantOperandVal(4);
4781 return !isUInt<14>(Imm)
4782 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4783 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4784 {Chain, Op.getOperand(2), Op.getOperand(3),
4785 DAG.getConstant(Imm, DL, GRLenVT)});
4786 }
4787 case Intrinsic::loongarch_iocsrrd_d: {
4788 return DAG.getNode(
4789 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4790 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4791 }
4792#define IOCSRRD_CASE(NAME, NODE) \
4793 case Intrinsic::loongarch_##NAME: { \
4794 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4795 {Chain, Op.getOperand(2)}); \
4796 }
4797 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4798 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4799 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4800#undef IOCSRRD_CASE
4801 case Intrinsic::loongarch_cpucfg: {
4802 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4803 {Chain, Op.getOperand(2)});
4804 }
4805 case Intrinsic::loongarch_lddir_d: {
4806 unsigned Imm = Op.getConstantOperandVal(3);
4807 return !isUInt<8>(Imm)
4808 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4809 : Op;
4810 }
4811 case Intrinsic::loongarch_movfcsr2gr: {
4812 if (!Subtarget.hasBasicF())
4813 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4814 unsigned Imm = Op.getConstantOperandVal(2);
4815 return !isUInt<2>(Imm)
4816 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4817 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4818 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4819 }
4820 case Intrinsic::loongarch_lsx_vld:
4821 case Intrinsic::loongarch_lsx_vldrepl_b:
4822 case Intrinsic::loongarch_lasx_xvld:
4823 case Intrinsic::loongarch_lasx_xvldrepl_b:
4824 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4825 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4826 : SDValue();
4827 case Intrinsic::loongarch_lsx_vldrepl_h:
4828 case Intrinsic::loongarch_lasx_xvldrepl_h:
4829 return !isShiftedInt<11, 1>(
4830 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4832 Op, "argument out of range or not a multiple of 2", DAG)
4833 : SDValue();
4834 case Intrinsic::loongarch_lsx_vldrepl_w:
4835 case Intrinsic::loongarch_lasx_xvldrepl_w:
4836 return !isShiftedInt<10, 2>(
4837 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4839 Op, "argument out of range or not a multiple of 4", DAG)
4840 : SDValue();
4841 case Intrinsic::loongarch_lsx_vldrepl_d:
4842 case Intrinsic::loongarch_lasx_xvldrepl_d:
4843 return !isShiftedInt<9, 3>(
4844 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4846 Op, "argument out of range or not a multiple of 8", DAG)
4847 : SDValue();
4848 }
4849}
4850
4851// Helper function that emits error message for intrinsics with void return
4852// value and return the chain.
4854 SelectionDAG &DAG) {
4855
4856 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4857 return Op.getOperand(0);
4858}
4859
4860SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4861 SelectionDAG &DAG) const {
4862 SDLoc DL(Op);
4863 MVT GRLenVT = Subtarget.getGRLenVT();
4864 SDValue Chain = Op.getOperand(0);
4865 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4866 SDValue Op2 = Op.getOperand(2);
4867 const StringRef ErrorMsgOOR = "argument out of range";
4868 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4869 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4870 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4871
4872 switch (IntrinsicEnum) {
4873 default:
4874 // TODO: Add more Intrinsics.
4875 return SDValue();
4876 case Intrinsic::loongarch_cacop_d:
4877 case Intrinsic::loongarch_cacop_w: {
4878 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4879 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4880 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4881 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4882 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4883 unsigned Imm1 = Op2->getAsZExtVal();
4884 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4885 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4886 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4887 return Op;
4888 }
4889 case Intrinsic::loongarch_dbar: {
4890 unsigned Imm = Op2->getAsZExtVal();
4891 return !isUInt<15>(Imm)
4892 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4893 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4894 DAG.getConstant(Imm, DL, GRLenVT));
4895 }
4896 case Intrinsic::loongarch_ibar: {
4897 unsigned Imm = Op2->getAsZExtVal();
4898 return !isUInt<15>(Imm)
4899 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4900 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4901 DAG.getConstant(Imm, DL, GRLenVT));
4902 }
4903 case Intrinsic::loongarch_break: {
4904 unsigned Imm = Op2->getAsZExtVal();
4905 return !isUInt<15>(Imm)
4906 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4907 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4908 DAG.getConstant(Imm, DL, GRLenVT));
4909 }
4910 case Intrinsic::loongarch_movgr2fcsr: {
4911 if (!Subtarget.hasBasicF())
4912 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4913 unsigned Imm = Op2->getAsZExtVal();
4914 return !isUInt<2>(Imm)
4915 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4916 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4917 DAG.getConstant(Imm, DL, GRLenVT),
4918 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4919 Op.getOperand(3)));
4920 }
4921 case Intrinsic::loongarch_syscall: {
4922 unsigned Imm = Op2->getAsZExtVal();
4923 return !isUInt<15>(Imm)
4924 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4925 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4926 DAG.getConstant(Imm, DL, GRLenVT));
4927 }
4928#define IOCSRWR_CASE(NAME, NODE) \
4929 case Intrinsic::loongarch_##NAME: { \
4930 SDValue Op3 = Op.getOperand(3); \
4931 return Subtarget.is64Bit() \
4932 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4933 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4934 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4935 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4936 Op3); \
4937 }
4938 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4939 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4940 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4941#undef IOCSRWR_CASE
4942 case Intrinsic::loongarch_iocsrwr_d: {
4943 return !Subtarget.is64Bit()
4944 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4945 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4946 Op2,
4947 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4948 Op.getOperand(3)));
4949 }
4950#define ASRT_LE_GT_CASE(NAME) \
4951 case Intrinsic::loongarch_##NAME: { \
4952 return !Subtarget.is64Bit() \
4953 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4954 : Op; \
4955 }
4956 ASRT_LE_GT_CASE(asrtle_d)
4957 ASRT_LE_GT_CASE(asrtgt_d)
4958#undef ASRT_LE_GT_CASE
4959 case Intrinsic::loongarch_ldpte_d: {
4960 unsigned Imm = Op.getConstantOperandVal(3);
4961 return !Subtarget.is64Bit()
4962 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4963 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4964 : Op;
4965 }
4966 case Intrinsic::loongarch_lsx_vst:
4967 case Intrinsic::loongarch_lasx_xvst:
4968 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4969 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4970 : SDValue();
4971 case Intrinsic::loongarch_lasx_xvstelm_b:
4972 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4973 !isUInt<5>(Op.getConstantOperandVal(5)))
4974 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4975 : SDValue();
4976 case Intrinsic::loongarch_lsx_vstelm_b:
4977 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4978 !isUInt<4>(Op.getConstantOperandVal(5)))
4979 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4980 : SDValue();
4981 case Intrinsic::loongarch_lasx_xvstelm_h:
4982 return (!isShiftedInt<8, 1>(
4983 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4984 !isUInt<4>(Op.getConstantOperandVal(5)))
4986 Op, "argument out of range or not a multiple of 2", DAG)
4987 : SDValue();
4988 case Intrinsic::loongarch_lsx_vstelm_h:
4989 return (!isShiftedInt<8, 1>(
4990 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4991 !isUInt<3>(Op.getConstantOperandVal(5)))
4993 Op, "argument out of range or not a multiple of 2", DAG)
4994 : SDValue();
4995 case Intrinsic::loongarch_lasx_xvstelm_w:
4996 return (!isShiftedInt<8, 2>(
4997 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4998 !isUInt<3>(Op.getConstantOperandVal(5)))
5000 Op, "argument out of range or not a multiple of 4", DAG)
5001 : SDValue();
5002 case Intrinsic::loongarch_lsx_vstelm_w:
5003 return (!isShiftedInt<8, 2>(
5004 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5005 !isUInt<2>(Op.getConstantOperandVal(5)))
5007 Op, "argument out of range or not a multiple of 4", DAG)
5008 : SDValue();
5009 case Intrinsic::loongarch_lasx_xvstelm_d:
5010 return (!isShiftedInt<8, 3>(
5011 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5012 !isUInt<2>(Op.getConstantOperandVal(5)))
5014 Op, "argument out of range or not a multiple of 8", DAG)
5015 : SDValue();
5016 case Intrinsic::loongarch_lsx_vstelm_d:
5017 return (!isShiftedInt<8, 3>(
5018 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
5019 !isUInt<1>(Op.getConstantOperandVal(5)))
5021 Op, "argument out of range or not a multiple of 8", DAG)
5022 : SDValue();
5023 }
5024}
5025
5026SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
5027 SelectionDAG &DAG) const {
5028 SDLoc DL(Op);
5029 SDValue Lo = Op.getOperand(0);
5030 SDValue Hi = Op.getOperand(1);
5031 SDValue Shamt = Op.getOperand(2);
5032 EVT VT = Lo.getValueType();
5033
5034 // if Shamt-GRLen < 0: // Shamt < GRLen
5035 // Lo = Lo << Shamt
5036 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
5037 // else:
5038 // Lo = 0
5039 // Hi = Lo << (Shamt-GRLen)
5040
5041 SDValue Zero = DAG.getConstant(0, DL, VT);
5042 SDValue One = DAG.getConstant(1, DL, VT);
5043 SDValue MinusGRLen =
5044 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5045 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5046 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5047 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5048
5049 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
5050 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
5051 SDValue ShiftRightLo =
5052 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
5053 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
5054 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
5055 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
5056
5057 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5058
5059 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
5060 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5061
5062 SDValue Parts[2] = {Lo, Hi};
5063 return DAG.getMergeValues(Parts, DL);
5064}
5065
5066SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
5067 SelectionDAG &DAG,
5068 bool IsSRA) const {
5069 SDLoc DL(Op);
5070 SDValue Lo = Op.getOperand(0);
5071 SDValue Hi = Op.getOperand(1);
5072 SDValue Shamt = Op.getOperand(2);
5073 EVT VT = Lo.getValueType();
5074
5075 // SRA expansion:
5076 // if Shamt-GRLen < 0: // Shamt < GRLen
5077 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5078 // Hi = Hi >>s Shamt
5079 // else:
5080 // Lo = Hi >>s (Shamt-GRLen);
5081 // Hi = Hi >>s (GRLen-1)
5082 //
5083 // SRL expansion:
5084 // if Shamt-GRLen < 0: // Shamt < GRLen
5085 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
5086 // Hi = Hi >>u Shamt
5087 // else:
5088 // Lo = Hi >>u (Shamt-GRLen);
5089 // Hi = 0;
5090
5091 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
5092
5093 SDValue Zero = DAG.getConstant(0, DL, VT);
5094 SDValue One = DAG.getConstant(1, DL, VT);
5095 SDValue MinusGRLen =
5096 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
5097 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
5098 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
5099 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
5100
5101 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
5102 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
5103 SDValue ShiftLeftHi =
5104 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
5105 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
5106 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
5107 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
5108 SDValue HiFalse =
5109 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
5110
5111 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
5112
5113 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
5114 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
5115
5116 SDValue Parts[2] = {Lo, Hi};
5117 return DAG.getMergeValues(Parts, DL);
5118}
5119
5120// Returns the opcode of the target-specific SDNode that implements the 32-bit
5121// form of the given Opcode.
5122static unsigned getLoongArchWOpcode(unsigned Opcode) {
5123 switch (Opcode) {
5124 default:
5125 llvm_unreachable("Unexpected opcode");
5126 case ISD::SDIV:
5127 return LoongArchISD::DIV_W;
5128 case ISD::UDIV:
5129 return LoongArchISD::DIV_WU;
5130 case ISD::SREM:
5131 return LoongArchISD::MOD_W;
5132 case ISD::UREM:
5133 return LoongArchISD::MOD_WU;
5134 case ISD::SHL:
5135 return LoongArchISD::SLL_W;
5136 case ISD::SRA:
5137 return LoongArchISD::SRA_W;
5138 case ISD::SRL:
5139 return LoongArchISD::SRL_W;
5140 case ISD::ROTL:
5141 case ISD::ROTR:
5142 return LoongArchISD::ROTR_W;
5143 case ISD::CTTZ:
5144 return LoongArchISD::CTZ_W;
5145 case ISD::CTLZ:
5146 return LoongArchISD::CLZ_W;
5147 }
5148}
5149
5150// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
5151// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
5152// otherwise be promoted to i64, making it difficult to select the
5153// SLL_W/.../*W later one because the fact the operation was originally of
5154// type i8/i16/i32 is lost.
5156 unsigned ExtOpc = ISD::ANY_EXTEND) {
5157 SDLoc DL(N);
5158 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
5159 SDValue NewOp0, NewRes;
5160
5161 switch (NumOp) {
5162 default:
5163 llvm_unreachable("Unexpected NumOp");
5164 case 1: {
5165 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5166 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
5167 break;
5168 }
5169 case 2: {
5170 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
5171 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
5172 if (N->getOpcode() == ISD::ROTL) {
5173 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
5174 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
5175 }
5176 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
5177 break;
5178 }
5179 // TODO:Handle more NumOp.
5180 }
5181
5182 // ReplaceNodeResults requires we maintain the same type for the return
5183 // value.
5184 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
5185}
5186
5187// Converts the given 32-bit operation to a i64 operation with signed extension
5188// semantic to reduce the signed extension instructions.
5190 SDLoc DL(N);
5191 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
5192 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
5193 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
5194 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
5195 DAG.getValueType(MVT::i32));
5196 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
5197}
5198
5199// Helper function that emits error message for intrinsics with/without chain
5200// and return a UNDEF or and the chain as the results.
5203 StringRef ErrorMsg, bool WithChain = true) {
5204 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
5205 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
5206 if (!WithChain)
5207 return;
5208 Results.push_back(N->getOperand(0));
5209}
5210
5211template <unsigned N>
5212static void
5214 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
5215 unsigned ResOp) {
5216 const StringRef ErrorMsgOOR = "argument out of range";
5217 unsigned Imm = Node->getConstantOperandVal(2);
5218 if (!isUInt<N>(Imm)) {
5220 /*WithChain=*/false);
5221 return;
5222 }
5223 SDLoc DL(Node);
5224 SDValue Vec = Node->getOperand(1);
5225
5226 SDValue PickElt =
5227 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
5228 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
5230 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
5231 PickElt.getValue(0)));
5232}
5233
5236 SelectionDAG &DAG,
5237 const LoongArchSubtarget &Subtarget,
5238 unsigned ResOp) {
5239 SDLoc DL(N);
5240 SDValue Vec = N->getOperand(1);
5241
5242 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
5243 Results.push_back(
5244 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
5245}
5246
5247static void
5249 SelectionDAG &DAG,
5250 const LoongArchSubtarget &Subtarget) {
5251 switch (N->getConstantOperandVal(0)) {
5252 default:
5253 llvm_unreachable("Unexpected Intrinsic.");
5254 case Intrinsic::loongarch_lsx_vpickve2gr_b:
5255 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5256 LoongArchISD::VPICK_SEXT_ELT);
5257 break;
5258 case Intrinsic::loongarch_lsx_vpickve2gr_h:
5259 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
5260 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5261 LoongArchISD::VPICK_SEXT_ELT);
5262 break;
5263 case Intrinsic::loongarch_lsx_vpickve2gr_w:
5264 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5265 LoongArchISD::VPICK_SEXT_ELT);
5266 break;
5267 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
5268 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
5269 LoongArchISD::VPICK_ZEXT_ELT);
5270 break;
5271 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
5272 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
5273 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
5274 LoongArchISD::VPICK_ZEXT_ELT);
5275 break;
5276 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
5277 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
5278 LoongArchISD::VPICK_ZEXT_ELT);
5279 break;
5280 case Intrinsic::loongarch_lsx_bz_b:
5281 case Intrinsic::loongarch_lsx_bz_h:
5282 case Intrinsic::loongarch_lsx_bz_w:
5283 case Intrinsic::loongarch_lsx_bz_d:
5284 case Intrinsic::loongarch_lasx_xbz_b:
5285 case Intrinsic::loongarch_lasx_xbz_h:
5286 case Intrinsic::loongarch_lasx_xbz_w:
5287 case Intrinsic::loongarch_lasx_xbz_d:
5288 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5289 LoongArchISD::VALL_ZERO);
5290 break;
5291 case Intrinsic::loongarch_lsx_bz_v:
5292 case Intrinsic::loongarch_lasx_xbz_v:
5293 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5294 LoongArchISD::VANY_ZERO);
5295 break;
5296 case Intrinsic::loongarch_lsx_bnz_b:
5297 case Intrinsic::loongarch_lsx_bnz_h:
5298 case Intrinsic::loongarch_lsx_bnz_w:
5299 case Intrinsic::loongarch_lsx_bnz_d:
5300 case Intrinsic::loongarch_lasx_xbnz_b:
5301 case Intrinsic::loongarch_lasx_xbnz_h:
5302 case Intrinsic::loongarch_lasx_xbnz_w:
5303 case Intrinsic::loongarch_lasx_xbnz_d:
5304 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5305 LoongArchISD::VALL_NONZERO);
5306 break;
5307 case Intrinsic::loongarch_lsx_bnz_v:
5308 case Intrinsic::loongarch_lasx_xbnz_v:
5309 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
5310 LoongArchISD::VANY_NONZERO);
5311 break;
5312 }
5313}
5314
5317 SelectionDAG &DAG) {
5318 assert(N->getValueType(0) == MVT::i128 &&
5319 "AtomicCmpSwap on types less than 128 should be legal");
5320 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
5321
5322 unsigned Opcode;
5323 switch (MemOp->getMergedOrdering()) {
5327 Opcode = LoongArch::PseudoCmpXchg128Acquire;
5328 break;
5331 Opcode = LoongArch::PseudoCmpXchg128;
5332 break;
5333 default:
5334 llvm_unreachable("Unexpected ordering!");
5335 }
5336
5337 SDLoc DL(N);
5338 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
5339 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
5340 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
5341 NewVal.first, NewVal.second, N->getOperand(0)};
5342
5343 SDNode *CmpSwap = DAG.getMachineNode(
5344 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
5345 Ops);
5346 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
5347 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
5348 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
5349 Results.push_back(SDValue(CmpSwap, 3));
5350}
5351
5354 SDLoc DL(N);
5355 EVT VT = N->getValueType(0);
5356 switch (N->getOpcode()) {
5357 default:
5358 llvm_unreachable("Don't know how to legalize this operation");
5359 case ISD::ADD:
5360 case ISD::SUB:
5361 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
5362 "Unexpected custom legalisation");
5363 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
5364 break;
5365 case ISD::SDIV:
5366 case ISD::UDIV:
5367 case ISD::SREM:
5368 case ISD::UREM:
5369 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5370 "Unexpected custom legalisation");
5371 Results.push_back(customLegalizeToWOp(N, DAG, 2,
5372 Subtarget.hasDiv32() && VT == MVT::i32
5374 : ISD::SIGN_EXTEND));
5375 break;
5376 case ISD::SHL:
5377 case ISD::SRA:
5378 case ISD::SRL:
5379 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5380 "Unexpected custom legalisation");
5381 if (N->getOperand(1).getOpcode() != ISD::Constant) {
5382 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5383 break;
5384 }
5385 break;
5386 case ISD::ROTL:
5387 case ISD::ROTR:
5388 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5389 "Unexpected custom legalisation");
5390 Results.push_back(customLegalizeToWOp(N, DAG, 2));
5391 break;
5392 case ISD::LOAD: {
5393 // Use an f64 load and a scalar_to_vector for v2f32 loads. This avoids
5394 // scalarizing in 32-bit mode. In 64-bit mode this avoids a int->fp
5395 // cast since type legalization will try to use an i64 load.
5396 MVT VT = N->getSimpleValueType(0);
5397 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5398 "Unexpected custom legalisation");
5400 "Unexpected type action!");
5401 if (!ISD::isNON_EXTLoad(N))
5402 return;
5403 auto *Ld = cast<LoadSDNode>(N);
5404 SDValue Res = DAG.getLoad(MVT::f64, DL, Ld->getChain(), Ld->getBasePtr(),
5405 Ld->getPointerInfo(), Ld->getBaseAlign(),
5406 Ld->getMemOperand()->getFlags());
5407 SDValue Chain = Res.getValue(1);
5408 MVT VecVT = MVT::getVectorVT(MVT::f64, 2);
5409 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Res);
5410 EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT);
5411 Res = DAG.getBitcast(WideVT, Res);
5412 Results.push_back(Res);
5413 Results.push_back(Chain);
5414 break;
5415 }
5416 case ISD::FP_TO_SINT: {
5417 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5418 "Unexpected custom legalisation");
5419 SDValue Src = N->getOperand(0);
5420 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
5421 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
5423 if (!isTypeLegal(Src.getValueType()))
5424 return;
5425 if (Src.getValueType() == MVT::f16)
5426 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
5427 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
5428 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
5429 return;
5430 }
5431 // If the FP type needs to be softened, emit a library call using the 'si'
5432 // version. If we left it to default legalization we'd end up with 'di'.
5433 RTLIB::Libcall LC;
5434 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
5435 MakeLibCallOptions CallOptions;
5436 EVT OpVT = Src.getValueType();
5437 CallOptions.setTypeListBeforeSoften(OpVT, VT);
5438 SDValue Chain = SDValue();
5439 SDValue Result;
5440 std::tie(Result, Chain) =
5441 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
5442 Results.push_back(Result);
5443 break;
5444 }
5445 case ISD::BITCAST: {
5446 SDValue Src = N->getOperand(0);
5447 EVT SrcVT = Src.getValueType();
5448 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
5449 Subtarget.hasBasicF()) {
5450 SDValue Dst =
5451 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
5452 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
5453 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
5454 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
5455 DAG.getVTList(MVT::i32, MVT::i32), Src);
5456 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
5457 NewReg.getValue(0), NewReg.getValue(1));
5458 Results.push_back(RetReg);
5459 }
5460 break;
5461 }
5462 case ISD::FP_TO_UINT: {
5463 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5464 "Unexpected custom legalisation");
5465 auto &TLI = DAG.getTargetLoweringInfo();
5466 SDValue Tmp1, Tmp2;
5467 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
5468 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
5469 break;
5470 }
5471 case ISD::FP_ROUND: {
5472 assert(VT == MVT::v2f32 && Subtarget.hasExtLSX() &&
5473 "Unexpected custom legalisation");
5474 // On LSX platforms, rounding from v2f64 to v4f32 (after legalization from
5475 // v2f32) is scalarized. Add a customized v2f32 widening to convert it into
5476 // a target-specific LoongArchISD::VFCVT to optimize it.
5477 SDValue Op0 = N->getOperand(0);
5478 EVT OpVT = Op0.getValueType();
5479 if (OpVT == MVT::v2f64) {
5480 SDValue Undef = DAG.getUNDEF(OpVT);
5481 SDValue Dst =
5482 DAG.getNode(LoongArchISD::VFCVT, DL, MVT::v4f32, Undef, Op0);
5483 Results.push_back(Dst);
5484 }
5485 break;
5486 }
5487 case ISD::BSWAP: {
5488 SDValue Src = N->getOperand(0);
5489 assert((VT == MVT::i16 || VT == MVT::i32) &&
5490 "Unexpected custom legalization");
5491 MVT GRLenVT = Subtarget.getGRLenVT();
5492 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5493 SDValue Tmp;
5494 switch (VT.getSizeInBits()) {
5495 default:
5496 llvm_unreachable("Unexpected operand width");
5497 case 16:
5498 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
5499 break;
5500 case 32:
5501 // Only LA64 will get to here due to the size mismatch between VT and
5502 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
5503 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
5504 break;
5505 }
5506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5507 break;
5508 }
5509 case ISD::BITREVERSE: {
5510 SDValue Src = N->getOperand(0);
5511 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
5512 "Unexpected custom legalization");
5513 MVT GRLenVT = Subtarget.getGRLenVT();
5514 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
5515 SDValue Tmp;
5516 switch (VT.getSizeInBits()) {
5517 default:
5518 llvm_unreachable("Unexpected operand width");
5519 case 8:
5520 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
5521 break;
5522 case 32:
5523 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
5524 break;
5525 }
5526 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
5527 break;
5528 }
5529 case ISD::CTLZ:
5530 case ISD::CTTZ: {
5531 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
5532 "Unexpected custom legalisation");
5533 Results.push_back(customLegalizeToWOp(N, DAG, 1));
5534 break;
5535 }
5537 SDValue Chain = N->getOperand(0);
5538 SDValue Op2 = N->getOperand(2);
5539 MVT GRLenVT = Subtarget.getGRLenVT();
5540 const StringRef ErrorMsgOOR = "argument out of range";
5541 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
5542 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
5543
5544 switch (N->getConstantOperandVal(1)) {
5545 default:
5546 llvm_unreachable("Unexpected Intrinsic.");
5547 case Intrinsic::loongarch_movfcsr2gr: {
5548 if (!Subtarget.hasBasicF()) {
5549 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5550 return;
5551 }
5552 unsigned Imm = Op2->getAsZExtVal();
5553 if (!isUInt<2>(Imm)) {
5554 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5555 return;
5556 }
5557 SDValue MOVFCSR2GRResults = DAG.getNode(
5558 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5559 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5560 Results.push_back(
5561 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5562 Results.push_back(MOVFCSR2GRResults.getValue(1));
5563 break;
5564 }
5565#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5566 case Intrinsic::loongarch_##NAME: { \
5567 SDValue NODE = DAG.getNode( \
5568 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5569 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5570 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5571 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5572 Results.push_back(NODE.getValue(1)); \
5573 break; \
5574 }
5575 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5576 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5577 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5578 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5579 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5580 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5581#undef CRC_CASE_EXT_BINARYOP
5582
5583#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5584 case Intrinsic::loongarch_##NAME: { \
5585 SDValue NODE = DAG.getNode( \
5586 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5587 {Chain, Op2, \
5588 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5589 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5590 Results.push_back(NODE.getValue(1)); \
5591 break; \
5592 }
5593 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5594 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5595#undef CRC_CASE_EXT_UNARYOP
5596#define CSR_CASE(ID) \
5597 case Intrinsic::loongarch_##ID: { \
5598 if (!Subtarget.is64Bit()) \
5599 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5600 break; \
5601 }
5602 CSR_CASE(csrrd_d);
5603 CSR_CASE(csrwr_d);
5604 CSR_CASE(csrxchg_d);
5605 CSR_CASE(iocsrrd_d);
5606#undef CSR_CASE
5607 case Intrinsic::loongarch_csrrd_w: {
5608 unsigned Imm = Op2->getAsZExtVal();
5609 if (!isUInt<14>(Imm)) {
5610 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5611 return;
5612 }
5613 SDValue CSRRDResults =
5614 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5615 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5616 Results.push_back(
5617 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5618 Results.push_back(CSRRDResults.getValue(1));
5619 break;
5620 }
5621 case Intrinsic::loongarch_csrwr_w: {
5622 unsigned Imm = N->getConstantOperandVal(3);
5623 if (!isUInt<14>(Imm)) {
5624 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5625 return;
5626 }
5627 SDValue CSRWRResults =
5628 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5629 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5630 DAG.getConstant(Imm, DL, GRLenVT)});
5631 Results.push_back(
5632 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5633 Results.push_back(CSRWRResults.getValue(1));
5634 break;
5635 }
5636 case Intrinsic::loongarch_csrxchg_w: {
5637 unsigned Imm = N->getConstantOperandVal(4);
5638 if (!isUInt<14>(Imm)) {
5639 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5640 return;
5641 }
5642 SDValue CSRXCHGResults = DAG.getNode(
5643 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5644 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5645 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5646 DAG.getConstant(Imm, DL, GRLenVT)});
5647 Results.push_back(
5648 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5649 Results.push_back(CSRXCHGResults.getValue(1));
5650 break;
5651 }
5652#define IOCSRRD_CASE(NAME, NODE) \
5653 case Intrinsic::loongarch_##NAME: { \
5654 SDValue IOCSRRDResults = \
5655 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5656 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5657 Results.push_back( \
5658 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5659 Results.push_back(IOCSRRDResults.getValue(1)); \
5660 break; \
5661 }
5662 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5663 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5664 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5665#undef IOCSRRD_CASE
5666 case Intrinsic::loongarch_cpucfg: {
5667 SDValue CPUCFGResults =
5668 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5669 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5670 Results.push_back(
5671 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5672 Results.push_back(CPUCFGResults.getValue(1));
5673 break;
5674 }
5675 case Intrinsic::loongarch_lddir_d: {
5676 if (!Subtarget.is64Bit()) {
5677 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5678 return;
5679 }
5680 break;
5681 }
5682 }
5683 break;
5684 }
5685 case ISD::READ_REGISTER: {
5686 if (Subtarget.is64Bit())
5687 DAG.getContext()->emitError(
5688 "On LA64, only 64-bit registers can be read.");
5689 else
5690 DAG.getContext()->emitError(
5691 "On LA32, only 32-bit registers can be read.");
5692 Results.push_back(DAG.getUNDEF(VT));
5693 Results.push_back(N->getOperand(0));
5694 break;
5695 }
5697 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5698 break;
5699 }
5700 case ISD::LROUND: {
5701 SDValue Op0 = N->getOperand(0);
5702 EVT OpVT = Op0.getValueType();
5703 RTLIB::Libcall LC =
5704 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5705 MakeLibCallOptions CallOptions;
5706 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5707 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5708 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5709 Results.push_back(Result);
5710 break;
5711 }
5712 case ISD::ATOMIC_CMP_SWAP: {
5714 break;
5715 }
5716 case ISD::TRUNCATE: {
5717 MVT VT = N->getSimpleValueType(0);
5718 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5719 return;
5720
5721 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5722 SDValue In = N->getOperand(0);
5723 EVT InVT = In.getValueType();
5724 EVT InEltVT = InVT.getVectorElementType();
5725 EVT EltVT = VT.getVectorElementType();
5726 unsigned MinElts = VT.getVectorNumElements();
5727 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5728 unsigned InBits = InVT.getSizeInBits();
5729
5730 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5731 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5732 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5733 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5734 for (unsigned I = 0; I < MinElts; ++I)
5735 TruncMask[I] = Scale * I;
5736
5737 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5738 MVT SVT = In.getSimpleValueType().getScalarType();
5739 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5740 SDValue WidenIn =
5741 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5742 DAG.getVectorIdxConstant(0, DL));
5743 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5744 "Illegal vector type in truncation");
5745 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5746 Results.push_back(
5747 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5748 return;
5749 }
5750 }
5751
5752 break;
5753 }
5754 }
5755}
5756
5757/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5759 SelectionDAG &DAG) {
5760 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5761
5762 MVT VT = N->getSimpleValueType(0);
5763 if (!VT.is128BitVector() && !VT.is256BitVector())
5764 return SDValue();
5765
5766 SDValue X, Y;
5767 SDValue N0 = N->getOperand(0);
5768 SDValue N1 = N->getOperand(1);
5769
5770 if (SDValue Not = isNOT(N0, DAG)) {
5771 X = Not;
5772 Y = N1;
5773 } else if (SDValue Not = isNOT(N1, DAG)) {
5774 X = Not;
5775 Y = N0;
5776 } else
5777 return SDValue();
5778
5779 X = DAG.getBitcast(VT, X);
5780 Y = DAG.getBitcast(VT, Y);
5781 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5782}
5783
5784static bool isConstantSplatVector(SDValue N, APInt &SplatValue,
5785 unsigned MinSizeInBits) {
5788
5789 if (!Node)
5790 return false;
5791
5792 APInt SplatUndef;
5793 unsigned SplatBitSize;
5794 bool HasAnyUndefs;
5795
5796 return Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
5797 HasAnyUndefs, MinSizeInBits,
5798 /*IsBigEndian=*/false);
5799}
5800
5803 const LoongArchSubtarget &Subtarget) {
5804 if (DCI.isBeforeLegalizeOps())
5805 return SDValue();
5806
5807 EVT VT = N->getValueType(0);
5808 if (!VT.isVector())
5809 return SDValue();
5810
5811 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
5812 return SDValue();
5813
5814 EVT EltVT = VT.getVectorElementType();
5815 if (!EltVT.isInteger())
5816 return SDValue();
5817
5818 // match:
5819 //
5820 // add
5821 // (and
5822 // (srl X, shift-1) / X
5823 // 1)
5824 // (srl/sra X, shift)
5825
5826 SDValue Add0 = N->getOperand(0);
5827 SDValue Add1 = N->getOperand(1);
5828 SDValue And;
5829 SDValue Shr;
5830
5831 if (Add0.getOpcode() == ISD::AND) {
5832 And = Add0;
5833 Shr = Add1;
5834 } else if (Add1.getOpcode() == ISD::AND) {
5835 And = Add1;
5836 Shr = Add0;
5837 } else {
5838 return SDValue();
5839 }
5840
5841 // match:
5842 //
5843 // srl/sra X, shift
5844
5845 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
5846 return SDValue();
5847
5848 SDValue X = Shr.getOperand(0);
5849 SDValue Shift = Shr.getOperand(1);
5850 APInt ShiftVal;
5851
5852 if (!isConstantSplatVector(Shift, ShiftVal, EltVT.getSizeInBits()))
5853 return SDValue();
5854
5855 if (ShiftVal == 0)
5856 return SDValue();
5857
5858 // match:
5859 //
5860 // and
5861 // (srl X, shift-1) / X
5862 // 1
5863
5864 SDValue One = And.getOperand(1);
5865 APInt SplatVal;
5866
5867 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
5868 return SDValue();
5869
5870 if (SplatVal != 1)
5871 return SDValue();
5872
5873 if (And.getOperand(0) == X) {
5874 // match:
5875 //
5876 // shift == 1
5877
5878 if (ShiftVal != 1)
5879 return SDValue();
5880 } else {
5881 // match:
5882 //
5883 // srl X, shift-1
5884
5885 SDValue Srl = And.getOperand(0);
5886
5887 if (Srl.getOpcode() != ISD::SRL)
5888 return SDValue();
5889
5890 if (Srl.getOperand(0) != X)
5891 return SDValue();
5892
5893 // match:
5894 //
5895 // shift-1
5896
5897 SDValue ShiftMinus1 = Srl.getOperand(1);
5898
5899 if (!isConstantSplatVector(ShiftMinus1, SplatVal, EltVT.getSizeInBits()))
5900 return SDValue();
5901
5902 if (ShiftVal != (SplatVal + 1))
5903 return SDValue();
5904 }
5905
5906 // We matched a rounded right shift pattern and can lower it
5907 // to a single vector rounded shift instruction.
5908
5909 SDLoc DL(N);
5910 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
5911 : LoongArchISD::VSRAR,
5912 DL, VT, X, Shift);
5913}
5914
5917 const LoongArchSubtarget &Subtarget) {
5918 if (DCI.isBeforeLegalizeOps())
5919 return SDValue();
5920
5921 SDValue FirstOperand = N->getOperand(0);
5922 SDValue SecondOperand = N->getOperand(1);
5923 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5924 EVT ValTy = N->getValueType(0);
5925 SDLoc DL(N);
5926 uint64_t lsb, msb;
5927 unsigned SMIdx, SMLen;
5928 ConstantSDNode *CN;
5929 SDValue NewOperand;
5930 MVT GRLenVT = Subtarget.getGRLenVT();
5931
5932 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5933 return R;
5934
5935 // BSTRPICK requires the 32S feature.
5936 if (!Subtarget.has32S())
5937 return SDValue();
5938
5939 // Op's second operand must be a shifted mask.
5940 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5941 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5942 return SDValue();
5943
5944 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5945 // Pattern match BSTRPICK.
5946 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5947 // => BSTRPICK $dst, $src, msb, lsb
5948 // where msb = lsb + len - 1
5949
5950 // The second operand of the shift must be an immediate.
5951 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5952 return SDValue();
5953
5954 lsb = CN->getZExtValue();
5955
5956 // Return if the shifted mask does not start at bit 0 or the sum of its
5957 // length and lsb exceeds the word's size.
5958 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5959 return SDValue();
5960
5961 NewOperand = FirstOperand.getOperand(0);
5962 } else {
5963 // Pattern match BSTRPICK.
5964 // $dst = and $src, (2**len- 1) , if len > 12
5965 // => BSTRPICK $dst, $src, msb, lsb
5966 // where lsb = 0 and msb = len - 1
5967
5968 // If the mask is <= 0xfff, andi can be used instead.
5969 if (CN->getZExtValue() <= 0xfff)
5970 return SDValue();
5971
5972 // Return if the MSB exceeds.
5973 if (SMIdx + SMLen > ValTy.getSizeInBits())
5974 return SDValue();
5975
5976 if (SMIdx > 0) {
5977 // Omit if the constant has more than 2 uses. This a conservative
5978 // decision. Whether it is a win depends on the HW microarchitecture.
5979 // However it should always be better for 1 and 2 uses.
5980 if (CN->use_size() > 2)
5981 return SDValue();
5982 // Return if the constant can be composed by a single LU12I.W.
5983 if ((CN->getZExtValue() & 0xfff) == 0)
5984 return SDValue();
5985 // Return if the constand can be composed by a single ADDI with
5986 // the zero register.
5987 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5988 return SDValue();
5989 }
5990
5991 lsb = SMIdx;
5992 NewOperand = FirstOperand;
5993 }
5994
5995 msb = lsb + SMLen - 1;
5996 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5997 DAG.getConstant(msb, DL, GRLenVT),
5998 DAG.getConstant(lsb, DL, GRLenVT));
5999 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
6000 return NR0;
6001 // Try to optimize to
6002 // bstrpick $Rd, $Rs, msb, lsb
6003 // slli $Rd, $Rd, lsb
6004 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
6005 DAG.getConstant(lsb, DL, GRLenVT));
6006}
6007
6010 const LoongArchSubtarget &Subtarget) {
6011 // BSTRPICK requires the 32S feature.
6012 if (!Subtarget.has32S())
6013 return SDValue();
6014
6015 if (DCI.isBeforeLegalizeOps())
6016 return SDValue();
6017
6018 // $dst = srl (and $src, Mask), Shamt
6019 // =>
6020 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
6021 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
6022 //
6023
6024 SDValue FirstOperand = N->getOperand(0);
6025 ConstantSDNode *CN;
6026 EVT ValTy = N->getValueType(0);
6027 SDLoc DL(N);
6028 MVT GRLenVT = Subtarget.getGRLenVT();
6029 unsigned MaskIdx, MaskLen;
6030 uint64_t Shamt;
6031
6032 // The first operand must be an AND and the second operand of the AND must be
6033 // a shifted mask.
6034 if (FirstOperand.getOpcode() != ISD::AND ||
6035 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
6036 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
6037 return SDValue();
6038
6039 // The second operand (shift amount) must be an immediate.
6040 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
6041 return SDValue();
6042
6043 Shamt = CN->getZExtValue();
6044 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
6045 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
6046 FirstOperand->getOperand(0),
6047 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6048 DAG.getConstant(Shamt, DL, GRLenVT));
6049
6050 return SDValue();
6051}
6052
6053// Helper to peek through bitops/trunc/setcc to determine size of source vector.
6054// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
6055static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
6056 unsigned Depth) {
6057 // Limit recursion.
6059 return false;
6060 switch (Src.getOpcode()) {
6061 case ISD::SETCC:
6062 case ISD::TRUNCATE:
6063 return Src.getOperand(0).getValueSizeInBits() == Size;
6064 case ISD::FREEZE:
6065 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
6066 case ISD::AND:
6067 case ISD::XOR:
6068 case ISD::OR:
6069 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
6070 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
6071 case ISD::SELECT:
6072 case ISD::VSELECT:
6073 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
6074 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
6075 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
6076 case ISD::BUILD_VECTOR:
6077 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
6078 ISD::isBuildVectorAllOnes(Src.getNode());
6079 }
6080 return false;
6081}
6082
6083// Helper to push sign extension of vXi1 SETCC result through bitops.
6085 SDValue Src, const SDLoc &DL) {
6086 switch (Src.getOpcode()) {
6087 case ISD::SETCC:
6088 case ISD::FREEZE:
6089 case ISD::TRUNCATE:
6090 case ISD::BUILD_VECTOR:
6091 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6092 case ISD::AND:
6093 case ISD::XOR:
6094 case ISD::OR:
6095 return DAG.getNode(
6096 Src.getOpcode(), DL, SExtVT,
6097 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
6098 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
6099 case ISD::SELECT:
6100 case ISD::VSELECT:
6101 return DAG.getSelect(
6102 DL, SExtVT, Src.getOperand(0),
6103 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
6104 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
6105 }
6106 llvm_unreachable("Unexpected node type for vXi1 sign extension");
6107}
6108
6109static SDValue
6112 const LoongArchSubtarget &Subtarget) {
6113 SDLoc DL(N);
6114 EVT VT = N->getValueType(0);
6115 SDValue Src = N->getOperand(0);
6116 EVT SrcVT = Src.getValueType();
6117
6118 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
6119 return SDValue();
6120
6121 bool UseLASX;
6122 unsigned Opc = ISD::DELETED_NODE;
6123 EVT CmpVT = Src.getOperand(0).getValueType();
6124 EVT EltVT = CmpVT.getVectorElementType();
6125
6126 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
6127 UseLASX = false;
6128 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
6129 CmpVT.getSizeInBits() == 256)
6130 UseLASX = true;
6131 else
6132 return SDValue();
6133
6134 SDValue SrcN1 = Src.getOperand(1);
6135 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
6136 default:
6137 break;
6138 case ISD::SETEQ:
6139 // x == 0 => not (vmsknez.b x)
6140 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6141 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
6142 break;
6143 case ISD::SETGT:
6144 // x > -1 => vmskgez.b x
6145 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
6146 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6147 break;
6148 case ISD::SETGE:
6149 // x >= 0 => vmskgez.b x
6150 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6151 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
6152 break;
6153 case ISD::SETLT:
6154 // x < 0 => vmskltz.{b,h,w,d} x
6155 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
6156 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6157 EltVT == MVT::i64))
6158 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6159 break;
6160 case ISD::SETLE:
6161 // x <= -1 => vmskltz.{b,h,w,d} x
6162 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
6163 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
6164 EltVT == MVT::i64))
6165 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6166 break;
6167 case ISD::SETNE:
6168 // x != 0 => vmsknez.b x
6169 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
6170 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
6171 break;
6172 }
6173
6174 if (Opc == ISD::DELETED_NODE)
6175 return SDValue();
6176
6177 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
6179 V = DAG.getZExtOrTrunc(V, DL, T);
6180 return DAG.getBitcast(VT, V);
6181}
6182
6185 const LoongArchSubtarget &Subtarget) {
6186 SDLoc DL(N);
6187 EVT VT = N->getValueType(0);
6188 SDValue Src = N->getOperand(0);
6189 EVT SrcVT = Src.getValueType();
6190 MVT GRLenVT = Subtarget.getGRLenVT();
6191
6192 if (!DCI.isBeforeLegalizeOps())
6193 return SDValue();
6194
6195 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
6196 return SDValue();
6197
6198 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
6199 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
6200 if (Res)
6201 return Res;
6202
6203 // Generate vXi1 using [X]VMSKLTZ
6204 MVT SExtVT;
6205 unsigned Opc;
6206 bool UseLASX = false;
6207 bool PropagateSExt = false;
6208
6209 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
6210 EVT CmpVT = Src.getOperand(0).getValueType();
6211 if (CmpVT.getSizeInBits() > 256)
6212 return SDValue();
6213 }
6214
6215 switch (SrcVT.getSimpleVT().SimpleTy) {
6216 default:
6217 return SDValue();
6218 case MVT::v2i1:
6219 SExtVT = MVT::v2i64;
6220 break;
6221 case MVT::v4i1:
6222 SExtVT = MVT::v4i32;
6223 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6224 SExtVT = MVT::v4i64;
6225 UseLASX = true;
6226 PropagateSExt = true;
6227 }
6228 break;
6229 case MVT::v8i1:
6230 SExtVT = MVT::v8i16;
6231 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6232 SExtVT = MVT::v8i32;
6233 UseLASX = true;
6234 PropagateSExt = true;
6235 }
6236 break;
6237 case MVT::v16i1:
6238 SExtVT = MVT::v16i8;
6239 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
6240 SExtVT = MVT::v16i16;
6241 UseLASX = true;
6242 PropagateSExt = true;
6243 }
6244 break;
6245 case MVT::v32i1:
6246 SExtVT = MVT::v32i8;
6247 UseLASX = true;
6248 break;
6249 };
6250 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
6251 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
6252
6253 SDValue V;
6254 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
6255 if (Src.getSimpleValueType() == MVT::v32i8) {
6256 SDValue Lo, Hi;
6257 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
6258 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
6259 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
6260 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
6261 DAG.getShiftAmountConstant(16, GRLenVT, DL));
6262 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
6263 } else if (UseLASX) {
6264 return SDValue();
6265 }
6266 }
6267
6268 if (!V) {
6269 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
6270 V = DAG.getNode(Opc, DL, GRLenVT, Src);
6271 }
6272
6274 V = DAG.getZExtOrTrunc(V, DL, T);
6275 return DAG.getBitcast(VT, V);
6276}
6277
6280 const LoongArchSubtarget &Subtarget) {
6281 MVT GRLenVT = Subtarget.getGRLenVT();
6282 EVT ValTy = N->getValueType(0);
6283 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
6284 ConstantSDNode *CN0, *CN1;
6285 SDLoc DL(N);
6286 unsigned ValBits = ValTy.getSizeInBits();
6287 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
6288 unsigned Shamt;
6289 bool SwapAndRetried = false;
6290
6291 // BSTRPICK requires the 32S feature.
6292 if (!Subtarget.has32S())
6293 return SDValue();
6294
6295 if (DCI.isBeforeLegalizeOps())
6296 return SDValue();
6297
6298 if (ValBits != 32 && ValBits != 64)
6299 return SDValue();
6300
6301Retry:
6302 // 1st pattern to match BSTRINS:
6303 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
6304 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
6305 // =>
6306 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6307 if (N0.getOpcode() == ISD::AND &&
6308 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6309 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6310 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
6311 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6312 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6313 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
6314 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6315 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6316 (MaskIdx0 + MaskLen0 <= ValBits)) {
6317 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
6318 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6319 N1.getOperand(0).getOperand(0),
6320 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6321 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6322 }
6323
6324 // 2nd pattern to match BSTRINS:
6325 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
6326 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
6327 // =>
6328 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
6329 if (N0.getOpcode() == ISD::AND &&
6330 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6331 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6332 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6333 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6334 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
6335 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6336 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
6337 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
6338 (MaskIdx0 + MaskLen0 <= ValBits)) {
6339 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
6340 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6341 N1.getOperand(0).getOperand(0),
6342 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
6343 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6344 }
6345
6346 // 3rd pattern to match BSTRINS:
6347 // R = or (and X, mask0), (and Y, mask1)
6348 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
6349 // =>
6350 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
6351 // where msb = lsb + size - 1
6352 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6353 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6354 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6355 (MaskIdx0 + MaskLen0 <= 64) &&
6356 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
6357 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6358 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
6359 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6360 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
6361 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
6362 DAG.getConstant(ValBits == 32
6363 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6364 : (MaskIdx0 + MaskLen0 - 1),
6365 DL, GRLenVT),
6366 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6367 }
6368
6369 // 4th pattern to match BSTRINS:
6370 // R = or (and X, mask), (shl Y, shamt)
6371 // where mask = (2**shamt - 1)
6372 // =>
6373 // R = BSTRINS X, Y, ValBits - 1, shamt
6374 // where ValBits = 32 or 64
6375 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
6376 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6377 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
6378 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6379 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
6380 (MaskIdx0 + MaskLen0 <= ValBits)) {
6381 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
6382 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6383 N1.getOperand(0),
6384 DAG.getConstant((ValBits - 1), DL, GRLenVT),
6385 DAG.getConstant(Shamt, DL, GRLenVT));
6386 }
6387
6388 // 5th pattern to match BSTRINS:
6389 // R = or (and X, mask), const
6390 // where ~mask = (2**size - 1) << lsb, mask & const = 0
6391 // =>
6392 // R = BSTRINS X, (const >> lsb), msb, lsb
6393 // where msb = lsb + size - 1
6394 if (N0.getOpcode() == ISD::AND &&
6395 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
6396 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
6397 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
6398 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
6399 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
6400 return DAG.getNode(
6401 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
6402 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
6403 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
6404 : (MaskIdx0 + MaskLen0 - 1),
6405 DL, GRLenVT),
6406 DAG.getConstant(MaskIdx0, DL, GRLenVT));
6407 }
6408
6409 // 6th pattern.
6410 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
6411 // by the incoming bits are known to be zero.
6412 // =>
6413 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
6414 //
6415 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
6416 // pattern is more common than the 1st. So we put the 1st before the 6th in
6417 // order to match as many nodes as possible.
6418 ConstantSDNode *CNMask, *CNShamt;
6419 unsigned MaskIdx, MaskLen;
6420 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
6421 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6422 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6423 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6424 CNShamt->getZExtValue() + MaskLen <= ValBits) {
6425 Shamt = CNShamt->getZExtValue();
6426 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
6427 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6428 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
6429 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6430 N1.getOperand(0).getOperand(0),
6431 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
6432 DAG.getConstant(Shamt, DL, GRLenVT));
6433 }
6434 }
6435
6436 // 7th pattern.
6437 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
6438 // overwritten by the incoming bits are known to be zero.
6439 // =>
6440 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
6441 //
6442 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
6443 // before the 7th in order to match as many nodes as possible.
6444 if (N1.getOpcode() == ISD::AND &&
6445 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6446 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
6447 N1.getOperand(0).getOpcode() == ISD::SHL &&
6448 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
6449 CNShamt->getZExtValue() == MaskIdx) {
6450 APInt ShMask(ValBits, CNMask->getZExtValue());
6451 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6452 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
6453 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6454 N1.getOperand(0).getOperand(0),
6455 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6456 DAG.getConstant(MaskIdx, DL, GRLenVT));
6457 }
6458 }
6459
6460 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
6461 if (!SwapAndRetried) {
6462 std::swap(N0, N1);
6463 SwapAndRetried = true;
6464 goto Retry;
6465 }
6466
6467 SwapAndRetried = false;
6468Retry2:
6469 // 8th pattern.
6470 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
6471 // the incoming bits are known to be zero.
6472 // =>
6473 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
6474 //
6475 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
6476 // we put it here in order to match as many nodes as possible or generate less
6477 // instructions.
6478 if (N1.getOpcode() == ISD::AND &&
6479 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
6480 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
6481 APInt ShMask(ValBits, CNMask->getZExtValue());
6482 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
6483 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
6484 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
6485 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
6486 N1->getOperand(0),
6487 DAG.getConstant(MaskIdx, DL, GRLenVT)),
6488 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
6489 DAG.getConstant(MaskIdx, DL, GRLenVT));
6490 }
6491 }
6492 // Swap N0/N1 and retry.
6493 if (!SwapAndRetried) {
6494 std::swap(N0, N1);
6495 SwapAndRetried = true;
6496 goto Retry2;
6497 }
6498
6499 return SDValue();
6500}
6501
6502static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
6503 ExtType = ISD::NON_EXTLOAD;
6504
6505 switch (V.getNode()->getOpcode()) {
6506 case ISD::LOAD: {
6507 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
6508 if ((LoadNode->getMemoryVT() == MVT::i8) ||
6509 (LoadNode->getMemoryVT() == MVT::i16)) {
6510 ExtType = LoadNode->getExtensionType();
6511 return true;
6512 }
6513 return false;
6514 }
6515 case ISD::AssertSext: {
6516 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6517 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6518 ExtType = ISD::SEXTLOAD;
6519 return true;
6520 }
6521 return false;
6522 }
6523 case ISD::AssertZext: {
6524 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
6525 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
6526 ExtType = ISD::ZEXTLOAD;
6527 return true;
6528 }
6529 return false;
6530 }
6531 default:
6532 return false;
6533 }
6534
6535 return false;
6536}
6537
6538// Eliminate redundant truncation and zero-extension nodes.
6539// * Case 1:
6540// +------------+ +------------+ +------------+
6541// | Input1 | | Input2 | | CC |
6542// +------------+ +------------+ +------------+
6543// | | |
6544// V V +----+
6545// +------------+ +------------+ |
6546// | TRUNCATE | | TRUNCATE | |
6547// +------------+ +------------+ |
6548// | | |
6549// V V |
6550// +------------+ +------------+ |
6551// | ZERO_EXT | | ZERO_EXT | |
6552// +------------+ +------------+ |
6553// | | |
6554// | +-------------+ |
6555// V V | |
6556// +----------------+ | |
6557// | AND | | |
6558// +----------------+ | |
6559// | | |
6560// +---------------+ | |
6561// | | |
6562// V V V
6563// +-------------+
6564// | CMP |
6565// +-------------+
6566// * Case 2:
6567// +------------+ +------------+ +-------------+ +------------+ +------------+
6568// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
6569// +------------+ +------------+ +-------------+ +------------+ +------------+
6570// | | | | |
6571// V | | | |
6572// +------------+ | | | |
6573// | XOR |<---------------------+ | |
6574// +------------+ | | |
6575// | | | |
6576// V V +---------------+ |
6577// +------------+ +------------+ | |
6578// | TRUNCATE | | TRUNCATE | | +-------------------------+
6579// +------------+ +------------+ | |
6580// | | | |
6581// V V | |
6582// +------------+ +------------+ | |
6583// | ZERO_EXT | | ZERO_EXT | | |
6584// +------------+ +------------+ | |
6585// | | | |
6586// V V | |
6587// +----------------+ | |
6588// | AND | | |
6589// +----------------+ | |
6590// | | |
6591// +---------------+ | |
6592// | | |
6593// V V V
6594// +-------------+
6595// | CMP |
6596// +-------------+
6599 const LoongArchSubtarget &Subtarget) {
6600 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
6601
6602 SDNode *AndNode = N->getOperand(0).getNode();
6603 if (AndNode->getOpcode() != ISD::AND)
6604 return SDValue();
6605
6606 SDValue AndInputValue2 = AndNode->getOperand(1);
6607 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
6608 return SDValue();
6609
6610 SDValue CmpInputValue = N->getOperand(1);
6611 SDValue AndInputValue1 = AndNode->getOperand(0);
6612 if (AndInputValue1.getOpcode() == ISD::XOR) {
6613 if (CC != ISD::SETEQ && CC != ISD::SETNE)
6614 return SDValue();
6615 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
6616 if (!CN || !CN->isAllOnes())
6617 return SDValue();
6618 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
6619 if (!CN || !CN->isZero())
6620 return SDValue();
6621 AndInputValue1 = AndInputValue1.getOperand(0);
6622 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
6623 return SDValue();
6624 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
6625 if (AndInputValue2 != CmpInputValue)
6626 return SDValue();
6627 } else {
6628 return SDValue();
6629 }
6630
6631 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
6632 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
6633 return SDValue();
6634
6635 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
6636 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
6637 return SDValue();
6638
6639 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
6640 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
6641 ISD::LoadExtType ExtType1;
6642 ISD::LoadExtType ExtType2;
6643
6644 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
6645 !checkValueWidth(TruncInputValue2, ExtType2))
6646 return SDValue();
6647
6648 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
6649 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
6650 return SDValue();
6651
6652 if ((ExtType2 != ISD::ZEXTLOAD) &&
6653 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
6654 return SDValue();
6655
6656 // These truncation and zero-extension nodes are not necessary, remove them.
6657 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
6658 TruncInputValue1, TruncInputValue2);
6659 SDValue NewSetCC =
6660 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
6661 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
6662 return SDValue(N, 0);
6663}
6664
6665// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
6668 const LoongArchSubtarget &Subtarget) {
6669 if (DCI.isBeforeLegalizeOps())
6670 return SDValue();
6671
6672 SDValue Src = N->getOperand(0);
6673 if (Src.getOpcode() != LoongArchISD::REVB_2W)
6674 return SDValue();
6675
6676 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
6677 Src.getOperand(0));
6678}
6679
6680// Perform common combines for BR_CC and SELECT_CC conditions.
6681static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6682 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6683 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6684
6685 // As far as arithmetic right shift always saves the sign,
6686 // shift can be omitted.
6687 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6688 // setge (sra X, N), 0 -> setge X, 0
6689 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6690 LHS.getOpcode() == ISD::SRA) {
6691 LHS = LHS.getOperand(0);
6692 return true;
6693 }
6694
6695 if (!ISD::isIntEqualitySetCC(CCVal))
6696 return false;
6697
6698 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6699 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6700 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6701 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6702 // If we're looking for eq 0 instead of ne 0, we need to invert the
6703 // condition.
6704 bool Invert = CCVal == ISD::SETEQ;
6705 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6706 if (Invert)
6707 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6708
6709 RHS = LHS.getOperand(1);
6710 LHS = LHS.getOperand(0);
6711 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6712
6713 CC = DAG.getCondCode(CCVal);
6714 return true;
6715 }
6716
6717 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6718 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6719 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6720 SDValue LHS0 = LHS.getOperand(0);
6721 if (LHS0.getOpcode() == ISD::AND &&
6722 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6723 uint64_t Mask = LHS0.getConstantOperandVal(1);
6724 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6725 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6726 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6727 CC = DAG.getCondCode(CCVal);
6728
6729 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6730 LHS = LHS0.getOperand(0);
6731 if (ShAmt != 0)
6732 LHS =
6733 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6734 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6735 return true;
6736 }
6737 }
6738 }
6739
6740 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6741 // This can occur when legalizing some floating point comparisons.
6742 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6743 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6744 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6745 CC = DAG.getCondCode(CCVal);
6746 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6747 return true;
6748 }
6749
6750 return false;
6751}
6752
6755 const LoongArchSubtarget &Subtarget) {
6756 SDValue LHS = N->getOperand(1);
6757 SDValue RHS = N->getOperand(2);
6758 SDValue CC = N->getOperand(3);
6759 SDLoc DL(N);
6760
6761 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6762 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6763 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6764
6765 return SDValue();
6766}
6767
6770 const LoongArchSubtarget &Subtarget) {
6771 // Transform
6772 SDValue LHS = N->getOperand(0);
6773 SDValue RHS = N->getOperand(1);
6774 SDValue CC = N->getOperand(2);
6775 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6776 SDValue TrueV = N->getOperand(3);
6777 SDValue FalseV = N->getOperand(4);
6778 SDLoc DL(N);
6779 EVT VT = N->getValueType(0);
6780
6781 // If the True and False values are the same, we don't need a select_cc.
6782 if (TrueV == FalseV)
6783 return TrueV;
6784
6785 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6786 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6787 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6789 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6790 if (CCVal == ISD::CondCode::SETGE)
6791 std::swap(TrueV, FalseV);
6792
6793 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6794 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6795 // Only handle simm12, if it is not in this range, it can be considered as
6796 // register.
6797 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6798 isInt<12>(TrueSImm - FalseSImm)) {
6799 SDValue SRA =
6800 DAG.getNode(ISD::SRA, DL, VT, LHS,
6801 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6802 SDValue AND =
6803 DAG.getNode(ISD::AND, DL, VT, SRA,
6804 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6805 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6806 }
6807
6808 if (CCVal == ISD::CondCode::SETGE)
6809 std::swap(TrueV, FalseV);
6810 }
6811
6812 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6813 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6814 {LHS, RHS, CC, TrueV, FalseV});
6815
6816 return SDValue();
6817}
6818
6819template <unsigned N>
6821 SelectionDAG &DAG,
6822 const LoongArchSubtarget &Subtarget,
6823 bool IsSigned = false) {
6824 SDLoc DL(Node);
6825 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6826 // Check the ImmArg.
6827 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6828 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6829 DAG.getContext()->emitError(Node->getOperationName(0) +
6830 ": argument out of range.");
6831 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6832 }
6833 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6834}
6835
6836template <unsigned N>
6837static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6838 SelectionDAG &DAG, bool IsSigned = false) {
6839 SDLoc DL(Node);
6840 EVT ResTy = Node->getValueType(0);
6841 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6842
6843 // Check the ImmArg.
6844 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6845 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6846 DAG.getContext()->emitError(Node->getOperationName(0) +
6847 ": argument out of range.");
6848 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6849 }
6850 return DAG.getConstant(
6852 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6853 DL, ResTy);
6854}
6855
6857 SDLoc DL(Node);
6858 EVT ResTy = Node->getValueType(0);
6859 SDValue Vec = Node->getOperand(2);
6860 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6861 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6862}
6863
6865 SDLoc DL(Node);
6866 EVT ResTy = Node->getValueType(0);
6867 SDValue One = DAG.getConstant(1, DL, ResTy);
6868 SDValue Bit =
6869 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6870
6871 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6872 DAG.getNOT(DL, Bit, ResTy));
6873}
6874
6875template <unsigned N>
6877 SDLoc DL(Node);
6878 EVT ResTy = Node->getValueType(0);
6879 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6880 // Check the unsigned ImmArg.
6881 if (!isUInt<N>(CImm->getZExtValue())) {
6882 DAG.getContext()->emitError(Node->getOperationName(0) +
6883 ": argument out of range.");
6884 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6885 }
6886
6887 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6888 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6889
6890 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6891}
6892
6893template <unsigned N>
6895 SDLoc DL(Node);
6896 EVT ResTy = Node->getValueType(0);
6897 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6898 // Check the unsigned ImmArg.
6899 if (!isUInt<N>(CImm->getZExtValue())) {
6900 DAG.getContext()->emitError(Node->getOperationName(0) +
6901 ": argument out of range.");
6902 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6903 }
6904
6905 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6906 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6907 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6908}
6909
6910template <unsigned N>
6912 SDLoc DL(Node);
6913 EVT ResTy = Node->getValueType(0);
6914 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6915 // Check the unsigned ImmArg.
6916 if (!isUInt<N>(CImm->getZExtValue())) {
6917 DAG.getContext()->emitError(Node->getOperationName(0) +
6918 ": argument out of range.");
6919 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6920 }
6921
6922 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6923 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6924 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6925}
6926
6927template <unsigned W>
6929 unsigned ResOp) {
6930 unsigned Imm = N->getConstantOperandVal(2);
6931 if (!isUInt<W>(Imm)) {
6932 const StringRef ErrorMsg = "argument out of range";
6933 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6934 return DAG.getUNDEF(N->getValueType(0));
6935 }
6936 SDLoc DL(N);
6937 SDValue Vec = N->getOperand(1);
6938 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6940 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6941}
6942
6943static SDValue
6946 const LoongArchSubtarget &Subtarget) {
6947 SDLoc DL(N);
6948 switch (N->getConstantOperandVal(0)) {
6949 default:
6950 break;
6951 case Intrinsic::loongarch_lsx_vadd_b:
6952 case Intrinsic::loongarch_lsx_vadd_h:
6953 case Intrinsic::loongarch_lsx_vadd_w:
6954 case Intrinsic::loongarch_lsx_vadd_d:
6955 case Intrinsic::loongarch_lasx_xvadd_b:
6956 case Intrinsic::loongarch_lasx_xvadd_h:
6957 case Intrinsic::loongarch_lasx_xvadd_w:
6958 case Intrinsic::loongarch_lasx_xvadd_d:
6959 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6960 N->getOperand(2));
6961 case Intrinsic::loongarch_lsx_vaddi_bu:
6962 case Intrinsic::loongarch_lsx_vaddi_hu:
6963 case Intrinsic::loongarch_lsx_vaddi_wu:
6964 case Intrinsic::loongarch_lsx_vaddi_du:
6965 case Intrinsic::loongarch_lasx_xvaddi_bu:
6966 case Intrinsic::loongarch_lasx_xvaddi_hu:
6967 case Intrinsic::loongarch_lasx_xvaddi_wu:
6968 case Intrinsic::loongarch_lasx_xvaddi_du:
6969 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6970 lowerVectorSplatImm<5>(N, 2, DAG));
6971 case Intrinsic::loongarch_lsx_vsub_b:
6972 case Intrinsic::loongarch_lsx_vsub_h:
6973 case Intrinsic::loongarch_lsx_vsub_w:
6974 case Intrinsic::loongarch_lsx_vsub_d:
6975 case Intrinsic::loongarch_lasx_xvsub_b:
6976 case Intrinsic::loongarch_lasx_xvsub_h:
6977 case Intrinsic::loongarch_lasx_xvsub_w:
6978 case Intrinsic::loongarch_lasx_xvsub_d:
6979 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6980 N->getOperand(2));
6981 case Intrinsic::loongarch_lsx_vsubi_bu:
6982 case Intrinsic::loongarch_lsx_vsubi_hu:
6983 case Intrinsic::loongarch_lsx_vsubi_wu:
6984 case Intrinsic::loongarch_lsx_vsubi_du:
6985 case Intrinsic::loongarch_lasx_xvsubi_bu:
6986 case Intrinsic::loongarch_lasx_xvsubi_hu:
6987 case Intrinsic::loongarch_lasx_xvsubi_wu:
6988 case Intrinsic::loongarch_lasx_xvsubi_du:
6989 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6990 lowerVectorSplatImm<5>(N, 2, DAG));
6991 case Intrinsic::loongarch_lsx_vneg_b:
6992 case Intrinsic::loongarch_lsx_vneg_h:
6993 case Intrinsic::loongarch_lsx_vneg_w:
6994 case Intrinsic::loongarch_lsx_vneg_d:
6995 case Intrinsic::loongarch_lasx_xvneg_b:
6996 case Intrinsic::loongarch_lasx_xvneg_h:
6997 case Intrinsic::loongarch_lasx_xvneg_w:
6998 case Intrinsic::loongarch_lasx_xvneg_d:
6999 return DAG.getNode(
7000 ISD::SUB, DL, N->getValueType(0),
7001 DAG.getConstant(
7002 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
7003 /*isSigned=*/true),
7004 SDLoc(N), N->getValueType(0)),
7005 N->getOperand(1));
7006 case Intrinsic::loongarch_lsx_vmax_b:
7007 case Intrinsic::loongarch_lsx_vmax_h:
7008 case Intrinsic::loongarch_lsx_vmax_w:
7009 case Intrinsic::loongarch_lsx_vmax_d:
7010 case Intrinsic::loongarch_lasx_xvmax_b:
7011 case Intrinsic::loongarch_lasx_xvmax_h:
7012 case Intrinsic::loongarch_lasx_xvmax_w:
7013 case Intrinsic::loongarch_lasx_xvmax_d:
7014 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7015 N->getOperand(2));
7016 case Intrinsic::loongarch_lsx_vmax_bu:
7017 case Intrinsic::loongarch_lsx_vmax_hu:
7018 case Intrinsic::loongarch_lsx_vmax_wu:
7019 case Intrinsic::loongarch_lsx_vmax_du:
7020 case Intrinsic::loongarch_lasx_xvmax_bu:
7021 case Intrinsic::loongarch_lasx_xvmax_hu:
7022 case Intrinsic::loongarch_lasx_xvmax_wu:
7023 case Intrinsic::loongarch_lasx_xvmax_du:
7024 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7025 N->getOperand(2));
7026 case Intrinsic::loongarch_lsx_vmaxi_b:
7027 case Intrinsic::loongarch_lsx_vmaxi_h:
7028 case Intrinsic::loongarch_lsx_vmaxi_w:
7029 case Intrinsic::loongarch_lsx_vmaxi_d:
7030 case Intrinsic::loongarch_lasx_xvmaxi_b:
7031 case Intrinsic::loongarch_lasx_xvmaxi_h:
7032 case Intrinsic::loongarch_lasx_xvmaxi_w:
7033 case Intrinsic::loongarch_lasx_xvmaxi_d:
7034 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
7035 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7036 case Intrinsic::loongarch_lsx_vmaxi_bu:
7037 case Intrinsic::loongarch_lsx_vmaxi_hu:
7038 case Intrinsic::loongarch_lsx_vmaxi_wu:
7039 case Intrinsic::loongarch_lsx_vmaxi_du:
7040 case Intrinsic::loongarch_lasx_xvmaxi_bu:
7041 case Intrinsic::loongarch_lasx_xvmaxi_hu:
7042 case Intrinsic::loongarch_lasx_xvmaxi_wu:
7043 case Intrinsic::loongarch_lasx_xvmaxi_du:
7044 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
7045 lowerVectorSplatImm<5>(N, 2, DAG));
7046 case Intrinsic::loongarch_lsx_vmin_b:
7047 case Intrinsic::loongarch_lsx_vmin_h:
7048 case Intrinsic::loongarch_lsx_vmin_w:
7049 case Intrinsic::loongarch_lsx_vmin_d:
7050 case Intrinsic::loongarch_lasx_xvmin_b:
7051 case Intrinsic::loongarch_lasx_xvmin_h:
7052 case Intrinsic::loongarch_lasx_xvmin_w:
7053 case Intrinsic::loongarch_lasx_xvmin_d:
7054 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7055 N->getOperand(2));
7056 case Intrinsic::loongarch_lsx_vmin_bu:
7057 case Intrinsic::loongarch_lsx_vmin_hu:
7058 case Intrinsic::loongarch_lsx_vmin_wu:
7059 case Intrinsic::loongarch_lsx_vmin_du:
7060 case Intrinsic::loongarch_lasx_xvmin_bu:
7061 case Intrinsic::loongarch_lasx_xvmin_hu:
7062 case Intrinsic::loongarch_lasx_xvmin_wu:
7063 case Intrinsic::loongarch_lasx_xvmin_du:
7064 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7065 N->getOperand(2));
7066 case Intrinsic::loongarch_lsx_vmini_b:
7067 case Intrinsic::loongarch_lsx_vmini_h:
7068 case Intrinsic::loongarch_lsx_vmini_w:
7069 case Intrinsic::loongarch_lsx_vmini_d:
7070 case Intrinsic::loongarch_lasx_xvmini_b:
7071 case Intrinsic::loongarch_lasx_xvmini_h:
7072 case Intrinsic::loongarch_lasx_xvmini_w:
7073 case Intrinsic::loongarch_lasx_xvmini_d:
7074 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
7075 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
7076 case Intrinsic::loongarch_lsx_vmini_bu:
7077 case Intrinsic::loongarch_lsx_vmini_hu:
7078 case Intrinsic::loongarch_lsx_vmini_wu:
7079 case Intrinsic::loongarch_lsx_vmini_du:
7080 case Intrinsic::loongarch_lasx_xvmini_bu:
7081 case Intrinsic::loongarch_lasx_xvmini_hu:
7082 case Intrinsic::loongarch_lasx_xvmini_wu:
7083 case Intrinsic::loongarch_lasx_xvmini_du:
7084 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
7085 lowerVectorSplatImm<5>(N, 2, DAG));
7086 case Intrinsic::loongarch_lsx_vmul_b:
7087 case Intrinsic::loongarch_lsx_vmul_h:
7088 case Intrinsic::loongarch_lsx_vmul_w:
7089 case Intrinsic::loongarch_lsx_vmul_d:
7090 case Intrinsic::loongarch_lasx_xvmul_b:
7091 case Intrinsic::loongarch_lasx_xvmul_h:
7092 case Intrinsic::loongarch_lasx_xvmul_w:
7093 case Intrinsic::loongarch_lasx_xvmul_d:
7094 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
7095 N->getOperand(2));
7096 case Intrinsic::loongarch_lsx_vmadd_b:
7097 case Intrinsic::loongarch_lsx_vmadd_h:
7098 case Intrinsic::loongarch_lsx_vmadd_w:
7099 case Intrinsic::loongarch_lsx_vmadd_d:
7100 case Intrinsic::loongarch_lasx_xvmadd_b:
7101 case Intrinsic::loongarch_lasx_xvmadd_h:
7102 case Intrinsic::loongarch_lasx_xvmadd_w:
7103 case Intrinsic::loongarch_lasx_xvmadd_d: {
7104 EVT ResTy = N->getValueType(0);
7105 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
7106 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7107 N->getOperand(3)));
7108 }
7109 case Intrinsic::loongarch_lsx_vmsub_b:
7110 case Intrinsic::loongarch_lsx_vmsub_h:
7111 case Intrinsic::loongarch_lsx_vmsub_w:
7112 case Intrinsic::loongarch_lsx_vmsub_d:
7113 case Intrinsic::loongarch_lasx_xvmsub_b:
7114 case Intrinsic::loongarch_lasx_xvmsub_h:
7115 case Intrinsic::loongarch_lasx_xvmsub_w:
7116 case Intrinsic::loongarch_lasx_xvmsub_d: {
7117 EVT ResTy = N->getValueType(0);
7118 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
7119 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
7120 N->getOperand(3)));
7121 }
7122 case Intrinsic::loongarch_lsx_vdiv_b:
7123 case Intrinsic::loongarch_lsx_vdiv_h:
7124 case Intrinsic::loongarch_lsx_vdiv_w:
7125 case Intrinsic::loongarch_lsx_vdiv_d:
7126 case Intrinsic::loongarch_lasx_xvdiv_b:
7127 case Intrinsic::loongarch_lasx_xvdiv_h:
7128 case Intrinsic::loongarch_lasx_xvdiv_w:
7129 case Intrinsic::loongarch_lasx_xvdiv_d:
7130 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
7131 N->getOperand(2));
7132 case Intrinsic::loongarch_lsx_vdiv_bu:
7133 case Intrinsic::loongarch_lsx_vdiv_hu:
7134 case Intrinsic::loongarch_lsx_vdiv_wu:
7135 case Intrinsic::loongarch_lsx_vdiv_du:
7136 case Intrinsic::loongarch_lasx_xvdiv_bu:
7137 case Intrinsic::loongarch_lasx_xvdiv_hu:
7138 case Intrinsic::loongarch_lasx_xvdiv_wu:
7139 case Intrinsic::loongarch_lasx_xvdiv_du:
7140 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
7141 N->getOperand(2));
7142 case Intrinsic::loongarch_lsx_vmod_b:
7143 case Intrinsic::loongarch_lsx_vmod_h:
7144 case Intrinsic::loongarch_lsx_vmod_w:
7145 case Intrinsic::loongarch_lsx_vmod_d:
7146 case Intrinsic::loongarch_lasx_xvmod_b:
7147 case Intrinsic::loongarch_lasx_xvmod_h:
7148 case Intrinsic::loongarch_lasx_xvmod_w:
7149 case Intrinsic::loongarch_lasx_xvmod_d:
7150 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
7151 N->getOperand(2));
7152 case Intrinsic::loongarch_lsx_vmod_bu:
7153 case Intrinsic::loongarch_lsx_vmod_hu:
7154 case Intrinsic::loongarch_lsx_vmod_wu:
7155 case Intrinsic::loongarch_lsx_vmod_du:
7156 case Intrinsic::loongarch_lasx_xvmod_bu:
7157 case Intrinsic::loongarch_lasx_xvmod_hu:
7158 case Intrinsic::loongarch_lasx_xvmod_wu:
7159 case Intrinsic::loongarch_lasx_xvmod_du:
7160 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
7161 N->getOperand(2));
7162 case Intrinsic::loongarch_lsx_vand_v:
7163 case Intrinsic::loongarch_lasx_xvand_v:
7164 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7165 N->getOperand(2));
7166 case Intrinsic::loongarch_lsx_vor_v:
7167 case Intrinsic::loongarch_lasx_xvor_v:
7168 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7169 N->getOperand(2));
7170 case Intrinsic::loongarch_lsx_vxor_v:
7171 case Intrinsic::loongarch_lasx_xvxor_v:
7172 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7173 N->getOperand(2));
7174 case Intrinsic::loongarch_lsx_vnor_v:
7175 case Intrinsic::loongarch_lasx_xvnor_v: {
7176 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7177 N->getOperand(2));
7178 return DAG.getNOT(DL, Res, Res->getValueType(0));
7179 }
7180 case Intrinsic::loongarch_lsx_vandi_b:
7181 case Intrinsic::loongarch_lasx_xvandi_b:
7182 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
7183 lowerVectorSplatImm<8>(N, 2, DAG));
7184 case Intrinsic::loongarch_lsx_vori_b:
7185 case Intrinsic::loongarch_lasx_xvori_b:
7186 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
7187 lowerVectorSplatImm<8>(N, 2, DAG));
7188 case Intrinsic::loongarch_lsx_vxori_b:
7189 case Intrinsic::loongarch_lasx_xvxori_b:
7190 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
7191 lowerVectorSplatImm<8>(N, 2, DAG));
7192 case Intrinsic::loongarch_lsx_vsll_b:
7193 case Intrinsic::loongarch_lsx_vsll_h:
7194 case Intrinsic::loongarch_lsx_vsll_w:
7195 case Intrinsic::loongarch_lsx_vsll_d:
7196 case Intrinsic::loongarch_lasx_xvsll_b:
7197 case Intrinsic::loongarch_lasx_xvsll_h:
7198 case Intrinsic::loongarch_lasx_xvsll_w:
7199 case Intrinsic::loongarch_lasx_xvsll_d:
7200 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7201 truncateVecElts(N, DAG));
7202 case Intrinsic::loongarch_lsx_vslli_b:
7203 case Intrinsic::loongarch_lasx_xvslli_b:
7204 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7205 lowerVectorSplatImm<3>(N, 2, DAG));
7206 case Intrinsic::loongarch_lsx_vslli_h:
7207 case Intrinsic::loongarch_lasx_xvslli_h:
7208 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7209 lowerVectorSplatImm<4>(N, 2, DAG));
7210 case Intrinsic::loongarch_lsx_vslli_w:
7211 case Intrinsic::loongarch_lasx_xvslli_w:
7212 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7213 lowerVectorSplatImm<5>(N, 2, DAG));
7214 case Intrinsic::loongarch_lsx_vslli_d:
7215 case Intrinsic::loongarch_lasx_xvslli_d:
7216 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
7217 lowerVectorSplatImm<6>(N, 2, DAG));
7218 case Intrinsic::loongarch_lsx_vsrl_b:
7219 case Intrinsic::loongarch_lsx_vsrl_h:
7220 case Intrinsic::loongarch_lsx_vsrl_w:
7221 case Intrinsic::loongarch_lsx_vsrl_d:
7222 case Intrinsic::loongarch_lasx_xvsrl_b:
7223 case Intrinsic::loongarch_lasx_xvsrl_h:
7224 case Intrinsic::loongarch_lasx_xvsrl_w:
7225 case Intrinsic::loongarch_lasx_xvsrl_d:
7226 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7227 truncateVecElts(N, DAG));
7228 case Intrinsic::loongarch_lsx_vsrli_b:
7229 case Intrinsic::loongarch_lasx_xvsrli_b:
7230 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7231 lowerVectorSplatImm<3>(N, 2, DAG));
7232 case Intrinsic::loongarch_lsx_vsrli_h:
7233 case Intrinsic::loongarch_lasx_xvsrli_h:
7234 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7235 lowerVectorSplatImm<4>(N, 2, DAG));
7236 case Intrinsic::loongarch_lsx_vsrli_w:
7237 case Intrinsic::loongarch_lasx_xvsrli_w:
7238 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7239 lowerVectorSplatImm<5>(N, 2, DAG));
7240 case Intrinsic::loongarch_lsx_vsrli_d:
7241 case Intrinsic::loongarch_lasx_xvsrli_d:
7242 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
7243 lowerVectorSplatImm<6>(N, 2, DAG));
7244 case Intrinsic::loongarch_lsx_vsra_b:
7245 case Intrinsic::loongarch_lsx_vsra_h:
7246 case Intrinsic::loongarch_lsx_vsra_w:
7247 case Intrinsic::loongarch_lsx_vsra_d:
7248 case Intrinsic::loongarch_lasx_xvsra_b:
7249 case Intrinsic::loongarch_lasx_xvsra_h:
7250 case Intrinsic::loongarch_lasx_xvsra_w:
7251 case Intrinsic::loongarch_lasx_xvsra_d:
7252 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7253 truncateVecElts(N, DAG));
7254 case Intrinsic::loongarch_lsx_vsrai_b:
7255 case Intrinsic::loongarch_lasx_xvsrai_b:
7256 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7257 lowerVectorSplatImm<3>(N, 2, DAG));
7258 case Intrinsic::loongarch_lsx_vsrai_h:
7259 case Intrinsic::loongarch_lasx_xvsrai_h:
7260 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7261 lowerVectorSplatImm<4>(N, 2, DAG));
7262 case Intrinsic::loongarch_lsx_vsrai_w:
7263 case Intrinsic::loongarch_lasx_xvsrai_w:
7264 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7265 lowerVectorSplatImm<5>(N, 2, DAG));
7266 case Intrinsic::loongarch_lsx_vsrai_d:
7267 case Intrinsic::loongarch_lasx_xvsrai_d:
7268 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
7269 lowerVectorSplatImm<6>(N, 2, DAG));
7270 case Intrinsic::loongarch_lsx_vclz_b:
7271 case Intrinsic::loongarch_lsx_vclz_h:
7272 case Intrinsic::loongarch_lsx_vclz_w:
7273 case Intrinsic::loongarch_lsx_vclz_d:
7274 case Intrinsic::loongarch_lasx_xvclz_b:
7275 case Intrinsic::loongarch_lasx_xvclz_h:
7276 case Intrinsic::loongarch_lasx_xvclz_w:
7277 case Intrinsic::loongarch_lasx_xvclz_d:
7278 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
7279 case Intrinsic::loongarch_lsx_vpcnt_b:
7280 case Intrinsic::loongarch_lsx_vpcnt_h:
7281 case Intrinsic::loongarch_lsx_vpcnt_w:
7282 case Intrinsic::loongarch_lsx_vpcnt_d:
7283 case Intrinsic::loongarch_lasx_xvpcnt_b:
7284 case Intrinsic::loongarch_lasx_xvpcnt_h:
7285 case Intrinsic::loongarch_lasx_xvpcnt_w:
7286 case Intrinsic::loongarch_lasx_xvpcnt_d:
7287 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
7288 case Intrinsic::loongarch_lsx_vbitclr_b:
7289 case Intrinsic::loongarch_lsx_vbitclr_h:
7290 case Intrinsic::loongarch_lsx_vbitclr_w:
7291 case Intrinsic::loongarch_lsx_vbitclr_d:
7292 case Intrinsic::loongarch_lasx_xvbitclr_b:
7293 case Intrinsic::loongarch_lasx_xvbitclr_h:
7294 case Intrinsic::loongarch_lasx_xvbitclr_w:
7295 case Intrinsic::loongarch_lasx_xvbitclr_d:
7296 return lowerVectorBitClear(N, DAG);
7297 case Intrinsic::loongarch_lsx_vbitclri_b:
7298 case Intrinsic::loongarch_lasx_xvbitclri_b:
7299 return lowerVectorBitClearImm<3>(N, DAG);
7300 case Intrinsic::loongarch_lsx_vbitclri_h:
7301 case Intrinsic::loongarch_lasx_xvbitclri_h:
7302 return lowerVectorBitClearImm<4>(N, DAG);
7303 case Intrinsic::loongarch_lsx_vbitclri_w:
7304 case Intrinsic::loongarch_lasx_xvbitclri_w:
7305 return lowerVectorBitClearImm<5>(N, DAG);
7306 case Intrinsic::loongarch_lsx_vbitclri_d:
7307 case Intrinsic::loongarch_lasx_xvbitclri_d:
7308 return lowerVectorBitClearImm<6>(N, DAG);
7309 case Intrinsic::loongarch_lsx_vbitset_b:
7310 case Intrinsic::loongarch_lsx_vbitset_h:
7311 case Intrinsic::loongarch_lsx_vbitset_w:
7312 case Intrinsic::loongarch_lsx_vbitset_d:
7313 case Intrinsic::loongarch_lasx_xvbitset_b:
7314 case Intrinsic::loongarch_lasx_xvbitset_h:
7315 case Intrinsic::loongarch_lasx_xvbitset_w:
7316 case Intrinsic::loongarch_lasx_xvbitset_d: {
7317 EVT VecTy = N->getValueType(0);
7318 SDValue One = DAG.getConstant(1, DL, VecTy);
7319 return DAG.getNode(
7320 ISD::OR, DL, VecTy, N->getOperand(1),
7321 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7322 }
7323 case Intrinsic::loongarch_lsx_vbitseti_b:
7324 case Intrinsic::loongarch_lasx_xvbitseti_b:
7325 return lowerVectorBitSetImm<3>(N, DAG);
7326 case Intrinsic::loongarch_lsx_vbitseti_h:
7327 case Intrinsic::loongarch_lasx_xvbitseti_h:
7328 return lowerVectorBitSetImm<4>(N, DAG);
7329 case Intrinsic::loongarch_lsx_vbitseti_w:
7330 case Intrinsic::loongarch_lasx_xvbitseti_w:
7331 return lowerVectorBitSetImm<5>(N, DAG);
7332 case Intrinsic::loongarch_lsx_vbitseti_d:
7333 case Intrinsic::loongarch_lasx_xvbitseti_d:
7334 return lowerVectorBitSetImm<6>(N, DAG);
7335 case Intrinsic::loongarch_lsx_vbitrev_b:
7336 case Intrinsic::loongarch_lsx_vbitrev_h:
7337 case Intrinsic::loongarch_lsx_vbitrev_w:
7338 case Intrinsic::loongarch_lsx_vbitrev_d:
7339 case Intrinsic::loongarch_lasx_xvbitrev_b:
7340 case Intrinsic::loongarch_lasx_xvbitrev_h:
7341 case Intrinsic::loongarch_lasx_xvbitrev_w:
7342 case Intrinsic::loongarch_lasx_xvbitrev_d: {
7343 EVT VecTy = N->getValueType(0);
7344 SDValue One = DAG.getConstant(1, DL, VecTy);
7345 return DAG.getNode(
7346 ISD::XOR, DL, VecTy, N->getOperand(1),
7347 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
7348 }
7349 case Intrinsic::loongarch_lsx_vbitrevi_b:
7350 case Intrinsic::loongarch_lasx_xvbitrevi_b:
7351 return lowerVectorBitRevImm<3>(N, DAG);
7352 case Intrinsic::loongarch_lsx_vbitrevi_h:
7353 case Intrinsic::loongarch_lasx_xvbitrevi_h:
7354 return lowerVectorBitRevImm<4>(N, DAG);
7355 case Intrinsic::loongarch_lsx_vbitrevi_w:
7356 case Intrinsic::loongarch_lasx_xvbitrevi_w:
7357 return lowerVectorBitRevImm<5>(N, DAG);
7358 case Intrinsic::loongarch_lsx_vbitrevi_d:
7359 case Intrinsic::loongarch_lasx_xvbitrevi_d:
7360 return lowerVectorBitRevImm<6>(N, DAG);
7361 case Intrinsic::loongarch_lsx_vfadd_s:
7362 case Intrinsic::loongarch_lsx_vfadd_d:
7363 case Intrinsic::loongarch_lasx_xvfadd_s:
7364 case Intrinsic::loongarch_lasx_xvfadd_d:
7365 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
7366 N->getOperand(2));
7367 case Intrinsic::loongarch_lsx_vfsub_s:
7368 case Intrinsic::loongarch_lsx_vfsub_d:
7369 case Intrinsic::loongarch_lasx_xvfsub_s:
7370 case Intrinsic::loongarch_lasx_xvfsub_d:
7371 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
7372 N->getOperand(2));
7373 case Intrinsic::loongarch_lsx_vfmul_s:
7374 case Intrinsic::loongarch_lsx_vfmul_d:
7375 case Intrinsic::loongarch_lasx_xvfmul_s:
7376 case Intrinsic::loongarch_lasx_xvfmul_d:
7377 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
7378 N->getOperand(2));
7379 case Intrinsic::loongarch_lsx_vfdiv_s:
7380 case Intrinsic::loongarch_lsx_vfdiv_d:
7381 case Intrinsic::loongarch_lasx_xvfdiv_s:
7382 case Intrinsic::loongarch_lasx_xvfdiv_d:
7383 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
7384 N->getOperand(2));
7385 case Intrinsic::loongarch_lsx_vfmadd_s:
7386 case Intrinsic::loongarch_lsx_vfmadd_d:
7387 case Intrinsic::loongarch_lasx_xvfmadd_s:
7388 case Intrinsic::loongarch_lasx_xvfmadd_d:
7389 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
7390 N->getOperand(2), N->getOperand(3));
7391 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
7392 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7393 N->getOperand(1), N->getOperand(2),
7394 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
7395 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
7396 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
7397 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7398 N->getOperand(1), N->getOperand(2),
7399 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
7400 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
7401 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
7402 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7403 N->getOperand(1), N->getOperand(2),
7404 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
7405 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
7406 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
7407 N->getOperand(1), N->getOperand(2),
7408 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
7409 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
7410 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
7411 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
7412 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
7413 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
7414 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
7415 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
7416 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
7417 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
7418 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7419 N->getOperand(1)));
7420 case Intrinsic::loongarch_lsx_vreplve_b:
7421 case Intrinsic::loongarch_lsx_vreplve_h:
7422 case Intrinsic::loongarch_lsx_vreplve_w:
7423 case Intrinsic::loongarch_lsx_vreplve_d:
7424 case Intrinsic::loongarch_lasx_xvreplve_b:
7425 case Intrinsic::loongarch_lasx_xvreplve_h:
7426 case Intrinsic::loongarch_lasx_xvreplve_w:
7427 case Intrinsic::loongarch_lasx_xvreplve_d:
7428 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
7429 N->getOperand(1),
7430 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
7431 N->getOperand(2)));
7432 case Intrinsic::loongarch_lsx_vpickve2gr_b:
7433 if (!Subtarget.is64Bit())
7434 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7435 break;
7436 case Intrinsic::loongarch_lsx_vpickve2gr_h:
7437 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
7438 if (!Subtarget.is64Bit())
7439 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7440 break;
7441 case Intrinsic::loongarch_lsx_vpickve2gr_w:
7442 if (!Subtarget.is64Bit())
7443 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
7444 break;
7445 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
7446 if (!Subtarget.is64Bit())
7447 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7448 break;
7449 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
7450 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
7451 if (!Subtarget.is64Bit())
7452 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7453 break;
7454 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
7455 if (!Subtarget.is64Bit())
7456 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
7457 break;
7458 case Intrinsic::loongarch_lsx_bz_b:
7459 case Intrinsic::loongarch_lsx_bz_h:
7460 case Intrinsic::loongarch_lsx_bz_w:
7461 case Intrinsic::loongarch_lsx_bz_d:
7462 case Intrinsic::loongarch_lasx_xbz_b:
7463 case Intrinsic::loongarch_lasx_xbz_h:
7464 case Intrinsic::loongarch_lasx_xbz_w:
7465 case Intrinsic::loongarch_lasx_xbz_d:
7466 if (!Subtarget.is64Bit())
7467 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
7468 N->getOperand(1));
7469 break;
7470 case Intrinsic::loongarch_lsx_bz_v:
7471 case Intrinsic::loongarch_lasx_xbz_v:
7472 if (!Subtarget.is64Bit())
7473 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
7474 N->getOperand(1));
7475 break;
7476 case Intrinsic::loongarch_lsx_bnz_b:
7477 case Intrinsic::loongarch_lsx_bnz_h:
7478 case Intrinsic::loongarch_lsx_bnz_w:
7479 case Intrinsic::loongarch_lsx_bnz_d:
7480 case Intrinsic::loongarch_lasx_xbnz_b:
7481 case Intrinsic::loongarch_lasx_xbnz_h:
7482 case Intrinsic::loongarch_lasx_xbnz_w:
7483 case Intrinsic::loongarch_lasx_xbnz_d:
7484 if (!Subtarget.is64Bit())
7485 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
7486 N->getOperand(1));
7487 break;
7488 case Intrinsic::loongarch_lsx_bnz_v:
7489 case Intrinsic::loongarch_lasx_xbnz_v:
7490 if (!Subtarget.is64Bit())
7491 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
7492 N->getOperand(1));
7493 break;
7494 case Intrinsic::loongarch_lasx_concat_128_s:
7495 case Intrinsic::loongarch_lasx_concat_128_d:
7496 case Intrinsic::loongarch_lasx_concat_128:
7497 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
7498 N->getOperand(1), N->getOperand(2));
7499 }
7500 return SDValue();
7501}
7502
7505 const LoongArchSubtarget &Subtarget) {
7506 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
7507 // conversion is unnecessary and can be replaced with the
7508 // MOVFR2GR_S_LA64 operand.
7509 SDValue Op0 = N->getOperand(0);
7510 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
7511 return Op0.getOperand(0);
7512 return SDValue();
7513}
7514
7517 const LoongArchSubtarget &Subtarget) {
7518 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
7519 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
7520 // operand.
7521 SDValue Op0 = N->getOperand(0);
7522 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
7523 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
7524 "Unexpected value type!");
7525 return Op0.getOperand(0);
7526 }
7527 return SDValue();
7528}
7529
7532 const LoongArchSubtarget &Subtarget) {
7533 MVT VT = N->getSimpleValueType(0);
7534 unsigned NumBits = VT.getScalarSizeInBits();
7535
7536 // Simplify the inputs.
7537 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7538 APInt DemandedMask(APInt::getAllOnes(NumBits));
7539 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
7540 return SDValue(N, 0);
7541
7542 return SDValue();
7543}
7544
7545static SDValue
7548 const LoongArchSubtarget &Subtarget) {
7549 SDValue Op0 = N->getOperand(0);
7550 SDLoc DL(N);
7551
7552 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
7553 // redundant. Instead, use BuildPairF64's operands directly.
7554 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
7555 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
7556
7557 if (Op0->isUndef()) {
7558 SDValue Lo = DAG.getUNDEF(MVT::i32);
7559 SDValue Hi = DAG.getUNDEF(MVT::i32);
7560 return DCI.CombineTo(N, Lo, Hi);
7561 }
7562
7563 // It's cheaper to materialise two 32-bit integers than to load a double
7564 // from the constant pool and transfer it to integer registers through the
7565 // stack.
7567 APInt V = C->getValueAPF().bitcastToAPInt();
7568 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
7569 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
7570 return DCI.CombineTo(N, Lo, Hi);
7571 }
7572
7573 return SDValue();
7574}
7575
7576/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
7579 const LoongArchSubtarget &Subtarget) {
7580 SDValue N0 = N->getOperand(0);
7581 SDValue N1 = N->getOperand(1);
7582 MVT VT = N->getSimpleValueType(0);
7583 SDLoc DL(N);
7584
7585 // VANDN(undef, x) -> 0
7586 // VANDN(x, undef) -> 0
7587 if (N0.isUndef() || N1.isUndef())
7588 return DAG.getConstant(0, DL, VT);
7589
7590 // VANDN(0, x) -> x
7592 return N1;
7593
7594 // VANDN(x, 0) -> 0
7596 return DAG.getConstant(0, DL, VT);
7597
7598 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
7600 return DAG.getNOT(DL, N0, VT);
7601
7602 // Turn VANDN back to AND if input is inverted.
7603 if (SDValue Not = isNOT(N0, DAG))
7604 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
7605
7606 // Folds for better commutativity:
7607 if (N1->hasOneUse()) {
7608 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
7609 if (SDValue Not = isNOT(N1, DAG))
7610 return DAG.getNOT(
7611 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
7612
7613 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
7614 // -> NOT(OR(x, SplatVector(-Imm))
7615 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
7616 // gain benefits.
7617 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
7618 N1.getOpcode() == ISD::BUILD_VECTOR) {
7619 if (SDValue SplatValue =
7620 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
7621 if (!N1->isOnlyUserOf(SplatValue.getNode()))
7622 return SDValue();
7623
7624 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
7625 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
7626 SDValue Not =
7627 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
7628 return DAG.getNOT(
7629 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
7630 VT);
7631 }
7632 }
7633 }
7634 }
7635
7636 return SDValue();
7637}
7638
7641 const LoongArchSubtarget &Subtarget) {
7642 SDLoc DL(N);
7643 EVT VT = N->getValueType(0);
7644
7645 if (VT != MVT::f32 && VT != MVT::f64)
7646 return SDValue();
7647 if (VT == MVT::f32 && !Subtarget.hasBasicF())
7648 return SDValue();
7649 if (VT == MVT::f64 && !Subtarget.hasBasicD())
7650 return SDValue();
7651
7652 // Only optimize when the source and destination types have the same width.
7653 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
7654 return SDValue();
7655
7656 SDValue Src = N->getOperand(0);
7657 // If the result of an integer load is only used by an integer-to-float
7658 // conversion, use a fp load instead. This eliminates an integer-to-float-move
7659 // (movgr2fr) instruction.
7660 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
7661 // Do not change the width of a volatile load. This condition check is
7662 // inspired by AArch64.
7663 !cast<LoadSDNode>(Src)->isVolatile()) {
7664 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
7665 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
7666 LN0->getPointerInfo(), LN0->getAlign(),
7667 LN0->getMemOperand()->getFlags());
7668
7669 // Make sure successors of the original load stay after it by updating them
7670 // to use the new Chain.
7671 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7672 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7673 }
7674
7675 return SDValue();
7676}
7677
7678// Try to widen AND, OR and XOR nodes to VT in order to remove casts around
7679// logical operations, like in the example below.
7680// or (and (truncate x, truncate y)),
7681// (xor (truncate z, build_vector (constants)))
7682// Given a target type \p VT, we generate
7683// or (and x, y), (xor z, zext(build_vector (constants)))
7684// given x, y and z are of type \p VT. We can do so, if operands are either
7685// truncates from VT types, the second operand is a vector of constants, can
7686// be recursively promoted or is an existing extension we can extend further.
7688 SelectionDAG &DAG,
7689 const LoongArchSubtarget &Subtarget,
7690 unsigned Depth) {
7691 // Limit recursion to avoid excessive compile times.
7693 return SDValue();
7694
7695 if (!ISD::isBitwiseLogicOp(N.getOpcode()))
7696 return SDValue();
7697
7698 SDValue N0 = N.getOperand(0);
7699 SDValue N1 = N.getOperand(1);
7700
7701 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7702 if (!TLI.isOperationLegalOrPromote(N.getOpcode(), VT))
7703 return SDValue();
7704
7705 if (SDValue NN0 =
7706 PromoteMaskArithmetic(N0, DL, VT, DAG, Subtarget, Depth + 1))
7707 N0 = NN0;
7708 else {
7709 // The left side has to be a 'trunc'.
7710 bool LHSTrunc = N0.getOpcode() == ISD::TRUNCATE &&
7711 N0.getOperand(0).getValueType() == VT;
7712 if (LHSTrunc)
7713 N0 = N0.getOperand(0);
7714 else
7715 return SDValue();
7716 }
7717
7718 if (SDValue NN1 =
7719 PromoteMaskArithmetic(N1, DL, VT, DAG, Subtarget, Depth + 1))
7720 N1 = NN1;
7721 else {
7722 // The right side has to be a 'trunc', a (foldable) constant or an
7723 // existing extension we can extend further.
7724 bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE &&
7725 N1.getOperand(0).getValueType() == VT;
7726 if (RHSTrunc)
7727 N1 = N1.getOperand(0);
7728 else if (ISD::isExtVecInRegOpcode(N1.getOpcode()) && VT.is256BitVector() &&
7729 Subtarget.hasExtLASX() && N1.hasOneUse())
7730 N1 = DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0));
7731 // On 32-bit platform, i64 is an illegal integer scalar type, and
7732 // FoldConstantArithmetic will fail for v4i64. This may be optimized in the
7733 // future.
7734 else if (SDValue Cst =
7736 N1 = Cst;
7737 else
7738 return SDValue();
7739 }
7740
7741 return DAG.getNode(N.getOpcode(), DL, VT, N0, N1);
7742}
7743
7744// On LASX the type v4i1/v8i1/v16i1 may be legalized to v4i32/v8i16/v16i8, which
7745// is LSX-sized register. In most cases we actually compare or select LASX-sized
7746// registers and mixing the two types creates horrible code. This method
7747// optimizes some of the transition sequences.
7749 SelectionDAG &DAG,
7750 const LoongArchSubtarget &Subtarget) {
7751 EVT VT = N.getValueType();
7752 assert(VT.isVector() && "Expected vector type");
7753 assert((N.getOpcode() == ISD::ANY_EXTEND ||
7754 N.getOpcode() == ISD::ZERO_EXTEND ||
7755 N.getOpcode() == ISD::SIGN_EXTEND) &&
7756 "Invalid Node");
7757
7758 if (!Subtarget.hasExtLASX() || !VT.is256BitVector())
7759 return SDValue();
7760
7761 SDValue Narrow = N.getOperand(0);
7762 EVT NarrowVT = Narrow.getValueType();
7763
7764 // Generate the wide operation.
7765 SDValue Op = PromoteMaskArithmetic(Narrow, DL, VT, DAG, Subtarget, 0);
7766 if (!Op)
7767 return SDValue();
7768 switch (N.getOpcode()) {
7769 default:
7770 llvm_unreachable("Unexpected opcode");
7771 case ISD::ANY_EXTEND:
7772 return Op;
7773 case ISD::ZERO_EXTEND:
7774 return DAG.getZeroExtendInReg(Op, DL, NarrowVT);
7775 case ISD::SIGN_EXTEND:
7776 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
7777 DAG.getValueType(NarrowVT));
7778 }
7779}
7780
7783 const LoongArchSubtarget &Subtarget) {
7784 EVT VT = N->getValueType(0);
7785 SDLoc DL(N);
7786
7787 if (VT.isVector())
7788 if (SDValue R = PromoteMaskArithmetic(SDValue(N, 0), DL, DAG, Subtarget))
7789 return R;
7790
7791 return SDValue();
7792}
7793
7794static SDValue
7797 const LoongArchSubtarget &Subtarget) {
7798 SDLoc DL(N);
7799 EVT VT = N->getValueType(0);
7800
7801 if (VT.isVector() && N->getNumOperands() == 2)
7802 if (SDValue R = combineFP_ROUND(SDValue(N, 0), DL, DAG, Subtarget))
7803 return R;
7804
7805 return SDValue();
7806}
7807
7810 const LoongArchSubtarget &Subtarget) {
7811 if (DCI.isBeforeLegalizeOps())
7812 return SDValue();
7813
7814 EVT VT = N->getValueType(0);
7815 if (!VT.isVector())
7816 return SDValue();
7817
7818 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
7819 return SDValue();
7820
7821 EVT EltVT = VT.getVectorElementType();
7822 if (!EltVT.isInteger())
7823 return SDValue();
7824
7825 SDValue Cond = N->getOperand(0);
7826 SDValue TrueVal = N->getOperand(1);
7827 SDValue FalseVal = N->getOperand(2);
7828
7829 // match:
7830 //
7831 // vselect (setcc shift, 0, seteq),
7832 // x,
7833 // rounded_shift
7834
7835 if (Cond.getOpcode() != ISD::SETCC)
7836 return SDValue();
7837
7838 if (!ISD::isConstantSplatVectorAllZeros(Cond.getOperand(1).getNode()))
7839 return SDValue();
7840
7841 auto *CC = cast<CondCodeSDNode>(Cond.getOperand(2));
7842 if (CC->get() != ISD::SETEQ)
7843 return SDValue();
7844
7845 SDValue Shift = Cond.getOperand(0);
7846
7847 // True branch must be original value:
7848 //
7849 // vselect cond, x, ...
7850
7851 SDValue X = TrueVal;
7852
7853 // Now match rounded shift pattern:
7854 //
7855 // add
7856 // (and
7857 // (srl X, shift-1)
7858 // 1)
7859 // (srl/sra X, shift)
7860
7861 if (FalseVal.getOpcode() != ISD::ADD)
7862 return SDValue();
7863
7864 SDValue Add0 = FalseVal.getOperand(0);
7865 SDValue Add1 = FalseVal.getOperand(1);
7866 SDValue And;
7867 SDValue Shr;
7868
7869 if (Add0.getOpcode() == ISD::AND) {
7870 And = Add0;
7871 Shr = Add1;
7872 } else if (Add1.getOpcode() == ISD::AND) {
7873 And = Add1;
7874 Shr = Add0;
7875 } else {
7876 return SDValue();
7877 }
7878
7879 // match:
7880 //
7881 // srl/sra X, shift
7882
7883 if (Shr.getOpcode() != ISD::SRL && Shr.getOpcode() != ISD::SRA)
7884 return SDValue();
7885
7886 if (Shr.getOperand(0) != X)
7887 return SDValue();
7888
7889 if (Shr.getOperand(1) != Shift)
7890 return SDValue();
7891
7892 // match:
7893 //
7894 // and
7895 // (srl X, shift-1)
7896 // 1
7897
7898 SDValue Srl = And.getOperand(0);
7899 SDValue One = And.getOperand(1);
7900 APInt SplatVal;
7901
7902 if (Srl.getOpcode() != ISD::SRL)
7903 return SDValue();
7904
7905 One = peekThroughBitcasts(One);
7906 if (!isConstantSplatVector(One, SplatVal, EltVT.getSizeInBits()))
7907 return SDValue();
7908
7909 if (SplatVal != 1)
7910 return SDValue();
7911
7912 if (Srl.getOperand(0) != X)
7913 return SDValue();
7914
7915 // match:
7916 //
7917 // shift-1
7918
7919 SDValue ShiftMinus1 = Srl.getOperand(1);
7920
7921 if (ShiftMinus1.getOpcode() != ISD::ADD)
7922 return SDValue();
7923
7924 if (ShiftMinus1.getOperand(0) != Shift)
7925 return SDValue();
7926
7928 return SDValue();
7929
7930 // We matched a rounded right shift pattern and can lower it
7931 // to a single vector rounded shift instruction.
7932
7933 SDLoc DL(N);
7934 return DAG.getNode(Shr.getOpcode() == ISD::SRL ? LoongArchISD::VSRLR
7935 : LoongArchISD::VSRAR,
7936 DL, VT, X, Shift);
7937}
7938
7940 DAGCombinerInfo &DCI) const {
7941 SelectionDAG &DAG = DCI.DAG;
7942 switch (N->getOpcode()) {
7943 default:
7944 break;
7945 case ISD::ADD:
7946 return performADDCombine(N, DAG, DCI, Subtarget);
7947 case ISD::AND:
7948 return performANDCombine(N, DAG, DCI, Subtarget);
7949 case ISD::OR:
7950 return performORCombine(N, DAG, DCI, Subtarget);
7951 case ISD::SETCC:
7952 return performSETCCCombine(N, DAG, DCI, Subtarget);
7953 case ISD::SRL:
7954 return performSRLCombine(N, DAG, DCI, Subtarget);
7955 case ISD::BITCAST:
7956 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7957 case ISD::ANY_EXTEND:
7958 case ISD::ZERO_EXTEND:
7959 case ISD::SIGN_EXTEND:
7960 return performEXTENDCombine(N, DAG, DCI, Subtarget);
7961 case ISD::SINT_TO_FP:
7962 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7963 case LoongArchISD::BITREV_W:
7964 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7965 case LoongArchISD::BR_CC:
7966 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7967 case LoongArchISD::SELECT_CC:
7968 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7970 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7971 case LoongArchISD::MOVGR2FR_W_LA64:
7972 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7973 case LoongArchISD::MOVFR2GR_S_LA64:
7974 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7975 case LoongArchISD::VMSKLTZ:
7976 case LoongArchISD::XVMSKLTZ:
7977 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7978 case LoongArchISD::SPLIT_PAIR_F64:
7979 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7980 case LoongArchISD::VANDN:
7981 return performVANDNCombine(N, DAG, DCI, Subtarget);
7983 return performCONCAT_VECTORSCombine(N, DAG, DCI, Subtarget);
7984 case ISD::VSELECT:
7985 return performVSELECTCombine(N, DAG, DCI, Subtarget);
7986 case LoongArchISD::VPACKEV:
7987 case LoongArchISD::VPERMI:
7988 if (SDValue Result =
7989 combineFP_ROUND(SDValue(N, 0), SDLoc(N), DAG, Subtarget))
7990 return Result;
7991 }
7992 return SDValue();
7993}
7994
7997 if (!ZeroDivCheck)
7998 return MBB;
7999
8000 // Build instructions:
8001 // MBB:
8002 // div(or mod) $dst, $dividend, $divisor
8003 // bne $divisor, $zero, SinkMBB
8004 // BreakMBB:
8005 // break 7 // BRK_DIVZERO
8006 // SinkMBB:
8007 // fallthrough
8008 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
8009 MachineFunction::iterator It = ++MBB->getIterator();
8010 MachineFunction *MF = MBB->getParent();
8011 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8012 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
8013 MF->insert(It, BreakMBB);
8014 MF->insert(It, SinkMBB);
8015
8016 // Transfer the remainder of MBB and its successor edges to SinkMBB.
8017 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
8018 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8019
8020 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
8021 DebugLoc DL = MI.getDebugLoc();
8022 MachineOperand &Divisor = MI.getOperand(2);
8023 Register DivisorReg = Divisor.getReg();
8024
8025 // MBB:
8026 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
8027 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
8028 .addReg(LoongArch::R0)
8029 .addMBB(SinkMBB);
8030 MBB->addSuccessor(BreakMBB);
8031 MBB->addSuccessor(SinkMBB);
8032
8033 // BreakMBB:
8034 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
8035 // definition of BRK_DIVZERO.
8036 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
8037 BreakMBB->addSuccessor(SinkMBB);
8038
8039 // Clear Divisor's kill flag.
8040 Divisor.setIsKill(false);
8041
8042 return SinkMBB;
8043}
8044
8045static MachineBasicBlock *
8047 const LoongArchSubtarget &Subtarget) {
8048 unsigned CondOpc;
8049 switch (MI.getOpcode()) {
8050 default:
8051 llvm_unreachable("Unexpected opcode");
8052 case LoongArch::PseudoVBZ:
8053 CondOpc = LoongArch::VSETEQZ_V;
8054 break;
8055 case LoongArch::PseudoVBZ_B:
8056 CondOpc = LoongArch::VSETANYEQZ_B;
8057 break;
8058 case LoongArch::PseudoVBZ_H:
8059 CondOpc = LoongArch::VSETANYEQZ_H;
8060 break;
8061 case LoongArch::PseudoVBZ_W:
8062 CondOpc = LoongArch::VSETANYEQZ_W;
8063 break;
8064 case LoongArch::PseudoVBZ_D:
8065 CondOpc = LoongArch::VSETANYEQZ_D;
8066 break;
8067 case LoongArch::PseudoVBNZ:
8068 CondOpc = LoongArch::VSETNEZ_V;
8069 break;
8070 case LoongArch::PseudoVBNZ_B:
8071 CondOpc = LoongArch::VSETALLNEZ_B;
8072 break;
8073 case LoongArch::PseudoVBNZ_H:
8074 CondOpc = LoongArch::VSETALLNEZ_H;
8075 break;
8076 case LoongArch::PseudoVBNZ_W:
8077 CondOpc = LoongArch::VSETALLNEZ_W;
8078 break;
8079 case LoongArch::PseudoVBNZ_D:
8080 CondOpc = LoongArch::VSETALLNEZ_D;
8081 break;
8082 case LoongArch::PseudoXVBZ:
8083 CondOpc = LoongArch::XVSETEQZ_V;
8084 break;
8085 case LoongArch::PseudoXVBZ_B:
8086 CondOpc = LoongArch::XVSETANYEQZ_B;
8087 break;
8088 case LoongArch::PseudoXVBZ_H:
8089 CondOpc = LoongArch::XVSETANYEQZ_H;
8090 break;
8091 case LoongArch::PseudoXVBZ_W:
8092 CondOpc = LoongArch::XVSETANYEQZ_W;
8093 break;
8094 case LoongArch::PseudoXVBZ_D:
8095 CondOpc = LoongArch::XVSETANYEQZ_D;
8096 break;
8097 case LoongArch::PseudoXVBNZ:
8098 CondOpc = LoongArch::XVSETNEZ_V;
8099 break;
8100 case LoongArch::PseudoXVBNZ_B:
8101 CondOpc = LoongArch::XVSETALLNEZ_B;
8102 break;
8103 case LoongArch::PseudoXVBNZ_H:
8104 CondOpc = LoongArch::XVSETALLNEZ_H;
8105 break;
8106 case LoongArch::PseudoXVBNZ_W:
8107 CondOpc = LoongArch::XVSETALLNEZ_W;
8108 break;
8109 case LoongArch::PseudoXVBNZ_D:
8110 CondOpc = LoongArch::XVSETALLNEZ_D;
8111 break;
8112 }
8113
8114 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8115 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8116 DebugLoc DL = MI.getDebugLoc();
8119
8120 MachineFunction *F = BB->getParent();
8121 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
8122 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
8123 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
8124
8125 F->insert(It, FalseBB);
8126 F->insert(It, TrueBB);
8127 F->insert(It, SinkBB);
8128
8129 // Transfer the remainder of MBB and its successor edges to Sink.
8130 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
8132
8133 // Insert the real instruction to BB.
8134 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
8135 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
8136
8137 // Insert branch.
8138 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
8139 BB->addSuccessor(FalseBB);
8140 BB->addSuccessor(TrueBB);
8141
8142 // FalseBB.
8143 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8144 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
8145 .addReg(LoongArch::R0)
8146 .addImm(0);
8147 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
8148 FalseBB->addSuccessor(SinkBB);
8149
8150 // TrueBB.
8151 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8152 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
8153 .addReg(LoongArch::R0)
8154 .addImm(1);
8155 TrueBB->addSuccessor(SinkBB);
8156
8157 // SinkBB: merge the results.
8158 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
8159 MI.getOperand(0).getReg())
8160 .addReg(RD1)
8161 .addMBB(FalseBB)
8162 .addReg(RD2)
8163 .addMBB(TrueBB);
8164
8165 // The pseudo instruction is gone now.
8166 MI.eraseFromParent();
8167 return SinkBB;
8168}
8169
8170static MachineBasicBlock *
8172 const LoongArchSubtarget &Subtarget) {
8173 unsigned InsOp;
8174 unsigned BroadcastOp;
8175 unsigned HalfSize;
8176 switch (MI.getOpcode()) {
8177 default:
8178 llvm_unreachable("Unexpected opcode");
8179 case LoongArch::PseudoXVINSGR2VR_B:
8180 HalfSize = 16;
8181 BroadcastOp = LoongArch::XVREPLGR2VR_B;
8182 InsOp = LoongArch::XVEXTRINS_B;
8183 break;
8184 case LoongArch::PseudoXVINSGR2VR_H:
8185 HalfSize = 8;
8186 BroadcastOp = LoongArch::XVREPLGR2VR_H;
8187 InsOp = LoongArch::XVEXTRINS_H;
8188 break;
8189 }
8190 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8191 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
8192 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
8193 DebugLoc DL = MI.getDebugLoc();
8195 // XDst = vector_insert XSrc, Elt, Idx
8196 Register XDst = MI.getOperand(0).getReg();
8197 Register XSrc = MI.getOperand(1).getReg();
8198 Register Elt = MI.getOperand(2).getReg();
8199 unsigned Idx = MI.getOperand(3).getImm();
8200
8201 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
8202 Idx < HalfSize) {
8203 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
8204 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
8205
8206 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
8207 .addReg(XSrc, {}, LoongArch::sub_128);
8208 BuildMI(*BB, MI, DL,
8209 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
8210 : LoongArch::VINSGR2VR_B),
8211 ScratchSubReg2)
8212 .addReg(ScratchSubReg1)
8213 .addReg(Elt)
8214 .addImm(Idx);
8215
8216 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
8217 .addReg(ScratchSubReg2)
8218 .addImm(LoongArch::sub_128);
8219 } else {
8220 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8221 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8222
8223 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
8224
8225 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
8226 .addReg(ScratchReg1)
8227 .addReg(XSrc)
8228 .addImm(Idx >= HalfSize ? 48 : 18);
8229
8230 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
8231 .addReg(XSrc)
8232 .addReg(ScratchReg2)
8233 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
8234 }
8235
8236 MI.eraseFromParent();
8237 return BB;
8238}
8239
8242 const LoongArchSubtarget &Subtarget) {
8243 assert(Subtarget.hasExtLSX());
8244 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8245 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8246 DebugLoc DL = MI.getDebugLoc();
8248 Register Dst = MI.getOperand(0).getReg();
8249 Register Src = MI.getOperand(1).getReg();
8250
8251 unsigned BroadcastOp, CTOp, PickOp;
8252 switch (MI.getOpcode()) {
8253 default:
8254 llvm_unreachable("Unexpected opcode");
8255 case LoongArch::PseudoCTPOP_B:
8256 BroadcastOp = LoongArch::VREPLGR2VR_B;
8257 CTOp = LoongArch::VPCNT_B;
8258 PickOp = LoongArch::VPICKVE2GR_B;
8259 break;
8260 case LoongArch::PseudoCTPOP_H:
8261 case LoongArch::PseudoCTPOP_H_LA32:
8262 BroadcastOp = LoongArch::VREPLGR2VR_H;
8263 CTOp = LoongArch::VPCNT_H;
8264 PickOp = LoongArch::VPICKVE2GR_H;
8265 break;
8266 case LoongArch::PseudoCTPOP_W:
8267 case LoongArch::PseudoCTPOP_W_LA32:
8268 BroadcastOp = LoongArch::VREPLGR2VR_W;
8269 CTOp = LoongArch::VPCNT_W;
8270 PickOp = LoongArch::VPICKVE2GR_W;
8271 break;
8272 case LoongArch::PseudoCTPOP_D:
8273 BroadcastOp = LoongArch::VREPLGR2VR_D;
8274 CTOp = LoongArch::VPCNT_D;
8275 PickOp = LoongArch::VPICKVE2GR_D;
8276 break;
8277 }
8278
8279 Register ScratchReg1 = MRI.createVirtualRegister(RC);
8280 Register ScratchReg2 = MRI.createVirtualRegister(RC);
8281 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Src);
8282 BuildMI(*BB, MI, DL, TII->get(CTOp), ScratchReg2).addReg(ScratchReg1);
8283 BuildMI(*BB, MI, DL, TII->get(PickOp), Dst).addReg(ScratchReg2).addImm(0);
8284
8285 MI.eraseFromParent();
8286 return BB;
8287}
8288
8289static MachineBasicBlock *
8291 const LoongArchSubtarget &Subtarget) {
8292 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8293 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
8294 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8296 Register Dst = MI.getOperand(0).getReg();
8297 Register Src = MI.getOperand(1).getReg();
8298 DebugLoc DL = MI.getDebugLoc();
8299 unsigned EleBits = 8;
8300 unsigned NotOpc = 0;
8301 unsigned MskOpc;
8302
8303 switch (MI.getOpcode()) {
8304 default:
8305 llvm_unreachable("Unexpected opcode");
8306 case LoongArch::PseudoVMSKLTZ_B:
8307 MskOpc = LoongArch::VMSKLTZ_B;
8308 break;
8309 case LoongArch::PseudoVMSKLTZ_H:
8310 MskOpc = LoongArch::VMSKLTZ_H;
8311 EleBits = 16;
8312 break;
8313 case LoongArch::PseudoVMSKLTZ_W:
8314 MskOpc = LoongArch::VMSKLTZ_W;
8315 EleBits = 32;
8316 break;
8317 case LoongArch::PseudoVMSKLTZ_D:
8318 MskOpc = LoongArch::VMSKLTZ_D;
8319 EleBits = 64;
8320 break;
8321 case LoongArch::PseudoVMSKGEZ_B:
8322 MskOpc = LoongArch::VMSKGEZ_B;
8323 break;
8324 case LoongArch::PseudoVMSKEQZ_B:
8325 MskOpc = LoongArch::VMSKNZ_B;
8326 NotOpc = LoongArch::VNOR_V;
8327 break;
8328 case LoongArch::PseudoVMSKNEZ_B:
8329 MskOpc = LoongArch::VMSKNZ_B;
8330 break;
8331 case LoongArch::PseudoXVMSKLTZ_B:
8332 MskOpc = LoongArch::XVMSKLTZ_B;
8333 RC = &LoongArch::LASX256RegClass;
8334 break;
8335 case LoongArch::PseudoXVMSKLTZ_H:
8336 MskOpc = LoongArch::XVMSKLTZ_H;
8337 RC = &LoongArch::LASX256RegClass;
8338 EleBits = 16;
8339 break;
8340 case LoongArch::PseudoXVMSKLTZ_W:
8341 MskOpc = LoongArch::XVMSKLTZ_W;
8342 RC = &LoongArch::LASX256RegClass;
8343 EleBits = 32;
8344 break;
8345 case LoongArch::PseudoXVMSKLTZ_D:
8346 MskOpc = LoongArch::XVMSKLTZ_D;
8347 RC = &LoongArch::LASX256RegClass;
8348 EleBits = 64;
8349 break;
8350 case LoongArch::PseudoXVMSKGEZ_B:
8351 MskOpc = LoongArch::XVMSKGEZ_B;
8352 RC = &LoongArch::LASX256RegClass;
8353 break;
8354 case LoongArch::PseudoXVMSKEQZ_B:
8355 MskOpc = LoongArch::XVMSKNZ_B;
8356 NotOpc = LoongArch::XVNOR_V;
8357 RC = &LoongArch::LASX256RegClass;
8358 break;
8359 case LoongArch::PseudoXVMSKNEZ_B:
8360 MskOpc = LoongArch::XVMSKNZ_B;
8361 RC = &LoongArch::LASX256RegClass;
8362 break;
8363 }
8364
8365 Register Msk = MRI.createVirtualRegister(RC);
8366 if (NotOpc) {
8367 Register Tmp = MRI.createVirtualRegister(RC);
8368 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
8369 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
8370 .addReg(Tmp, RegState::Kill)
8371 .addReg(Tmp, RegState::Kill);
8372 } else {
8373 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
8374 }
8375
8376 if (TRI->getRegSizeInBits(*RC) > 128) {
8377 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8378 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
8379 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
8380 .addReg(Msk)
8381 .addImm(0);
8382 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
8383 .addReg(Msk, RegState::Kill)
8384 .addImm(4);
8385 BuildMI(*BB, MI, DL,
8386 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
8387 : LoongArch::BSTRINS_W),
8388 Dst)
8391 .addImm(256 / EleBits - 1)
8392 .addImm(128 / EleBits);
8393 } else {
8394 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
8395 .addReg(Msk, RegState::Kill)
8396 .addImm(0);
8397 }
8398
8399 MI.eraseFromParent();
8400 return BB;
8401}
8402
8403static MachineBasicBlock *
8405 const LoongArchSubtarget &Subtarget) {
8406 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
8407 "Unexpected instruction");
8408
8409 MachineFunction &MF = *BB->getParent();
8410 DebugLoc DL = MI.getDebugLoc();
8412 Register LoReg = MI.getOperand(0).getReg();
8413 Register HiReg = MI.getOperand(1).getReg();
8414 Register SrcReg = MI.getOperand(2).getReg();
8415
8416 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
8417 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
8418 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
8419 MI.eraseFromParent(); // The pseudo instruction is gone now.
8420 return BB;
8421}
8422
8423static MachineBasicBlock *
8425 const LoongArchSubtarget &Subtarget) {
8426 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
8427 "Unexpected instruction");
8428
8429 MachineFunction &MF = *BB->getParent();
8430 DebugLoc DL = MI.getDebugLoc();
8433 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
8434 Register DstReg = MI.getOperand(0).getReg();
8435 Register LoReg = MI.getOperand(1).getReg();
8436 Register HiReg = MI.getOperand(2).getReg();
8437
8438 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
8439 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
8440 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
8441 .addReg(TmpReg, RegState::Kill)
8442 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
8443 MI.eraseFromParent(); // The pseudo instruction is gone now.
8444 return BB;
8445}
8446
8448 switch (MI.getOpcode()) {
8449 default:
8450 return false;
8451 case LoongArch::Select_GPR_Using_CC_GPR:
8452 return true;
8453 }
8454}
8455
8456static MachineBasicBlock *
8458 const LoongArchSubtarget &Subtarget) {
8459 // To "insert" Select_* instructions, we actually have to insert the triangle
8460 // control-flow pattern. The incoming instructions know the destination vreg
8461 // to set, the condition code register to branch on, the true/false values to
8462 // select between, and the condcode to use to select the appropriate branch.
8463 //
8464 // We produce the following control flow:
8465 // HeadMBB
8466 // | \
8467 // | IfFalseMBB
8468 // | /
8469 // TailMBB
8470 //
8471 // When we find a sequence of selects we attempt to optimize their emission
8472 // by sharing the control flow. Currently we only handle cases where we have
8473 // multiple selects with the exact same condition (same LHS, RHS and CC).
8474 // The selects may be interleaved with other instructions if the other
8475 // instructions meet some requirements we deem safe:
8476 // - They are not pseudo instructions.
8477 // - They are debug instructions. Otherwise,
8478 // - They do not have side-effects, do not access memory and their inputs do
8479 // not depend on the results of the select pseudo-instructions.
8480 // The TrueV/FalseV operands of the selects cannot depend on the result of
8481 // previous selects in the sequence.
8482 // These conditions could be further relaxed. See the X86 target for a
8483 // related approach and more information.
8484
8485 Register LHS = MI.getOperand(1).getReg();
8486 Register RHS;
8487 if (MI.getOperand(2).isReg())
8488 RHS = MI.getOperand(2).getReg();
8489 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
8490
8491 SmallVector<MachineInstr *, 4> SelectDebugValues;
8492 SmallSet<Register, 4> SelectDests;
8493 SelectDests.insert(MI.getOperand(0).getReg());
8494
8495 MachineInstr *LastSelectPseudo = &MI;
8496 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
8497 SequenceMBBI != E; ++SequenceMBBI) {
8498 if (SequenceMBBI->isDebugInstr())
8499 continue;
8500 if (isSelectPseudo(*SequenceMBBI)) {
8501 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
8502 !SequenceMBBI->getOperand(2).isReg() ||
8503 SequenceMBBI->getOperand(2).getReg() != RHS ||
8504 SequenceMBBI->getOperand(3).getImm() != CC ||
8505 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
8506 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
8507 break;
8508 LastSelectPseudo = &*SequenceMBBI;
8509 SequenceMBBI->collectDebugValues(SelectDebugValues);
8510 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
8511 continue;
8512 }
8513 if (SequenceMBBI->hasUnmodeledSideEffects() ||
8514 SequenceMBBI->mayLoadOrStore() ||
8515 SequenceMBBI->usesCustomInsertionHook())
8516 break;
8517 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
8518 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
8519 }))
8520 break;
8521 }
8522
8523 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
8524 const BasicBlock *LLVM_BB = BB->getBasicBlock();
8525 DebugLoc DL = MI.getDebugLoc();
8527
8528 MachineBasicBlock *HeadMBB = BB;
8529 MachineFunction *F = BB->getParent();
8530 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
8531 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
8532
8533 F->insert(I, IfFalseMBB);
8534 F->insert(I, TailMBB);
8535
8536 // Set the call frame size on entry to the new basic blocks.
8537 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
8538 IfFalseMBB->setCallFrameSize(CallFrameSize);
8539 TailMBB->setCallFrameSize(CallFrameSize);
8540
8541 // Transfer debug instructions associated with the selects to TailMBB.
8542 for (MachineInstr *DebugInstr : SelectDebugValues) {
8543 TailMBB->push_back(DebugInstr->removeFromParent());
8544 }
8545
8546 // Move all instructions after the sequence to TailMBB.
8547 TailMBB->splice(TailMBB->end(), HeadMBB,
8548 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
8549 // Update machine-CFG edges by transferring all successors of the current
8550 // block to the new block which will contain the Phi nodes for the selects.
8551 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
8552 // Set the successors for HeadMBB.
8553 HeadMBB->addSuccessor(IfFalseMBB);
8554 HeadMBB->addSuccessor(TailMBB);
8555
8556 // Insert appropriate branch.
8557 if (MI.getOperand(2).isImm())
8558 BuildMI(HeadMBB, DL, TII.get(CC))
8559 .addReg(LHS)
8560 .addImm(MI.getOperand(2).getImm())
8561 .addMBB(TailMBB);
8562 else
8563 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
8564
8565 // IfFalseMBB just falls through to TailMBB.
8566 IfFalseMBB->addSuccessor(TailMBB);
8567
8568 // Create PHIs for all of the select pseudo-instructions.
8569 auto SelectMBBI = MI.getIterator();
8570 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
8571 auto InsertionPoint = TailMBB->begin();
8572 while (SelectMBBI != SelectEnd) {
8573 auto Next = std::next(SelectMBBI);
8574 if (isSelectPseudo(*SelectMBBI)) {
8575 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
8576 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
8577 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
8578 .addReg(SelectMBBI->getOperand(4).getReg())
8579 .addMBB(HeadMBB)
8580 .addReg(SelectMBBI->getOperand(5).getReg())
8581 .addMBB(IfFalseMBB);
8582 SelectMBBI->eraseFromParent();
8583 }
8584 SelectMBBI = Next;
8585 }
8586
8587 F->getProperties().resetNoPHIs();
8588 return TailMBB;
8589}
8590
8591MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
8592 MachineInstr &MI, MachineBasicBlock *BB) const {
8593 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8594 DebugLoc DL = MI.getDebugLoc();
8595
8596 switch (MI.getOpcode()) {
8597 default:
8598 llvm_unreachable("Unexpected instr type to insert");
8599 case LoongArch::DIV_W:
8600 case LoongArch::DIV_WU:
8601 case LoongArch::MOD_W:
8602 case LoongArch::MOD_WU:
8603 case LoongArch::DIV_D:
8604 case LoongArch::DIV_DU:
8605 case LoongArch::MOD_D:
8606 case LoongArch::MOD_DU:
8607 return insertDivByZeroTrap(MI, BB);
8608 break;
8609 case LoongArch::WRFCSR: {
8610 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
8611 LoongArch::FCSR0 + MI.getOperand(0).getImm())
8612 .addReg(MI.getOperand(1).getReg());
8613 MI.eraseFromParent();
8614 return BB;
8615 }
8616 case LoongArch::RDFCSR: {
8617 MachineInstr *ReadFCSR =
8618 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
8619 MI.getOperand(0).getReg())
8620 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
8621 ReadFCSR->getOperand(1).setIsUndef();
8622 MI.eraseFromParent();
8623 return BB;
8624 }
8625 case LoongArch::Select_GPR_Using_CC_GPR:
8626 return emitSelectPseudo(MI, BB, Subtarget);
8627 case LoongArch::BuildPairF64Pseudo:
8628 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
8629 case LoongArch::SplitPairF64Pseudo:
8630 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
8631 case LoongArch::PseudoVBZ:
8632 case LoongArch::PseudoVBZ_B:
8633 case LoongArch::PseudoVBZ_H:
8634 case LoongArch::PseudoVBZ_W:
8635 case LoongArch::PseudoVBZ_D:
8636 case LoongArch::PseudoVBNZ:
8637 case LoongArch::PseudoVBNZ_B:
8638 case LoongArch::PseudoVBNZ_H:
8639 case LoongArch::PseudoVBNZ_W:
8640 case LoongArch::PseudoVBNZ_D:
8641 case LoongArch::PseudoXVBZ:
8642 case LoongArch::PseudoXVBZ_B:
8643 case LoongArch::PseudoXVBZ_H:
8644 case LoongArch::PseudoXVBZ_W:
8645 case LoongArch::PseudoXVBZ_D:
8646 case LoongArch::PseudoXVBNZ:
8647 case LoongArch::PseudoXVBNZ_B:
8648 case LoongArch::PseudoXVBNZ_H:
8649 case LoongArch::PseudoXVBNZ_W:
8650 case LoongArch::PseudoXVBNZ_D:
8651 return emitVecCondBranchPseudo(MI, BB, Subtarget);
8652 case LoongArch::PseudoXVINSGR2VR_B:
8653 case LoongArch::PseudoXVINSGR2VR_H:
8654 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
8655 case LoongArch::PseudoCTPOP_B:
8656 case LoongArch::PseudoCTPOP_H:
8657 case LoongArch::PseudoCTPOP_W:
8658 case LoongArch::PseudoCTPOP_D:
8659 case LoongArch::PseudoCTPOP_H_LA32:
8660 case LoongArch::PseudoCTPOP_W_LA32:
8661 return emitPseudoCTPOP(MI, BB, Subtarget);
8662 case LoongArch::PseudoVMSKLTZ_B:
8663 case LoongArch::PseudoVMSKLTZ_H:
8664 case LoongArch::PseudoVMSKLTZ_W:
8665 case LoongArch::PseudoVMSKLTZ_D:
8666 case LoongArch::PseudoVMSKGEZ_B:
8667 case LoongArch::PseudoVMSKEQZ_B:
8668 case LoongArch::PseudoVMSKNEZ_B:
8669 case LoongArch::PseudoXVMSKLTZ_B:
8670 case LoongArch::PseudoXVMSKLTZ_H:
8671 case LoongArch::PseudoXVMSKLTZ_W:
8672 case LoongArch::PseudoXVMSKLTZ_D:
8673 case LoongArch::PseudoXVMSKGEZ_B:
8674 case LoongArch::PseudoXVMSKEQZ_B:
8675 case LoongArch::PseudoXVMSKNEZ_B:
8676 return emitPseudoVMSKCOND(MI, BB, Subtarget);
8677 case TargetOpcode::STATEPOINT:
8678 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
8679 // while bl call instruction (where statepoint will be lowered at the
8680 // end) has implicit def. This def is early-clobber as it will be set at
8681 // the moment of the call and earlier than any use is read.
8682 // Add this implicit dead def here as a workaround.
8683 MI.addOperand(*MI.getMF(),
8685 LoongArch::R1, /*isDef*/ true,
8686 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
8687 /*isUndef*/ false, /*isEarlyClobber*/ true));
8688 if (!Subtarget.is64Bit())
8689 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
8690 return emitPatchPoint(MI, BB);
8691 }
8692}
8693
8695 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
8696 unsigned *Fast) const {
8697 if (!Subtarget.hasUAL())
8698 return false;
8699
8700 // TODO: set reasonable speed number.
8701 if (Fast)
8702 *Fast = 1;
8703 return true;
8704}
8705
8706//===----------------------------------------------------------------------===//
8707// Calling Convention Implementation
8708//===----------------------------------------------------------------------===//
8709
8710// Eight general-purpose registers a0-a7 used for passing integer arguments,
8711// with a0-a1 reused to return values. Generally, the GPRs are used to pass
8712// fixed-point arguments, and floating-point arguments when no FPR is available
8713// or with soft float ABI.
8714const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
8715 LoongArch::R7, LoongArch::R8, LoongArch::R9,
8716 LoongArch::R10, LoongArch::R11};
8717
8718// PreserveNone calling convention:
8719// Arguments may be passed in any general-purpose registers except:
8720// - R1 : return address register
8721// - R22 : frame pointer
8722// - R31 : base pointer
8723//
8724// All general-purpose registers are treated as caller-saved,
8725// except R1 (RA) and R22 (FP).
8726//
8727// Non-volatile registers are allocated first so that a function
8728// can call normal functions without having to spill and reload
8729// argument registers.
8731 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
8732 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
8733 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
8734 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
8735 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
8736 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
8737 LoongArch::R20};
8738
8739// Eight floating-point registers fa0-fa7 used for passing floating-point
8740// arguments, and fa0-fa1 are also used to return values.
8741const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
8742 LoongArch::F3, LoongArch::F4, LoongArch::F5,
8743 LoongArch::F6, LoongArch::F7};
8744// FPR32 and FPR64 alias each other.
8746 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
8747 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
8748
8749const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
8750 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
8751 LoongArch::VR6, LoongArch::VR7};
8752
8753const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
8754 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
8755 LoongArch::XR6, LoongArch::XR7};
8756
8758 switch (State.getCallingConv()) {
8760 if (!State.isVarArg())
8761 return State.AllocateReg(PreserveNoneArgGPRs);
8762 [[fallthrough]];
8763 default:
8764 return State.AllocateReg(ArgGPRs);
8765 }
8766}
8767
8768// Pass a 2*GRLen argument that has been split into two GRLen values through
8769// registers or the stack as necessary.
8770static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
8771 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
8772 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
8773 ISD::ArgFlagsTy ArgFlags2) {
8774 unsigned GRLenInBytes = GRLen / 8;
8775 if (Register Reg = allocateArgGPR(State)) {
8776 // At least one half can be passed via register.
8777 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
8778 VA1.getLocVT(), CCValAssign::Full));
8779 } else {
8780 // Both halves must be passed on the stack, with proper alignment.
8781 Align StackAlign =
8782 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
8783 State.addLoc(
8785 State.AllocateStack(GRLenInBytes, StackAlign),
8786 VA1.getLocVT(), CCValAssign::Full));
8787 State.addLoc(CCValAssign::getMem(
8788 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8789 LocVT2, CCValAssign::Full));
8790 return false;
8791 }
8792 if (Register Reg = allocateArgGPR(State)) {
8793 // The second half can also be passed via register.
8794 State.addLoc(
8795 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
8796 } else {
8797 // The second half is passed via the stack, without additional alignment.
8798 State.addLoc(CCValAssign::getMem(
8799 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
8800 LocVT2, CCValAssign::Full));
8801 }
8802 return false;
8803}
8804
8805// Implements the LoongArch calling convention. Returns true upon failure.
8807 unsigned ValNo, MVT ValVT,
8808 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
8809 CCState &State, bool IsRet, Type *OrigTy) {
8810 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
8811 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
8812 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
8813 MVT LocVT = ValVT;
8814
8815 // Any return value split into more than two values can't be returned
8816 // directly.
8817 if (IsRet && ValNo > 1)
8818 return true;
8819
8820 // If passing a variadic argument, or if no FPR is available.
8821 bool UseGPRForFloat = true;
8822
8823 switch (ABI) {
8824 default:
8825 llvm_unreachable("Unexpected ABI");
8826 break;
8831 UseGPRForFloat = ArgFlags.isVarArg();
8832 break;
8835 break;
8836 }
8837
8838 // If this is a variadic argument, the LoongArch calling convention requires
8839 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
8840 // byte alignment. An aligned register should be used regardless of whether
8841 // the original argument was split during legalisation or not. The argument
8842 // will not be passed by registers if the original type is larger than
8843 // 2*GRLen, so the register alignment rule does not apply.
8844 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
8845 if (ArgFlags.isVarArg() &&
8846 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
8847 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
8848 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
8849 // Skip 'odd' register if necessary.
8850 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
8851 State.AllocateReg(ArgGPRs);
8852 }
8853
8854 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
8855 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
8856 State.getPendingArgFlags();
8857
8858 assert(PendingLocs.size() == PendingArgFlags.size() &&
8859 "PendingLocs and PendingArgFlags out of sync");
8860
8861 // FPR32 and FPR64 alias each other.
8862 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
8863 UseGPRForFloat = true;
8864
8865 if (UseGPRForFloat && ValVT == MVT::f32) {
8866 LocVT = GRLenVT;
8867 LocInfo = CCValAssign::BCvt;
8868 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
8869 LocVT = MVT::i64;
8870 LocInfo = CCValAssign::BCvt;
8871 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
8872 // Handle passing f64 on LA32D with a soft float ABI or when floating point
8873 // registers are exhausted.
8874 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
8875 // Depending on available argument GPRS, f64 may be passed in a pair of
8876 // GPRs, split between a GPR and the stack, or passed completely on the
8877 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
8878 // cases.
8879 MCRegister Reg = allocateArgGPR(State);
8880 if (!Reg) {
8881 int64_t StackOffset = State.AllocateStack(8, Align(8));
8882 State.addLoc(
8883 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8884 return false;
8885 }
8886 LocVT = MVT::i32;
8887 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8888 MCRegister HiReg = allocateArgGPR(State);
8889 if (HiReg) {
8890 State.addLoc(
8891 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
8892 } else {
8893 int64_t StackOffset = State.AllocateStack(4, Align(4));
8894 State.addLoc(
8895 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8896 }
8897 return false;
8898 }
8899
8900 // Split arguments might be passed indirectly, so keep track of the pending
8901 // values.
8902 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
8903 LocVT = GRLenVT;
8904 LocInfo = CCValAssign::Indirect;
8905 PendingLocs.push_back(
8906 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
8907 PendingArgFlags.push_back(ArgFlags);
8908 if (!ArgFlags.isSplitEnd()) {
8909 return false;
8910 }
8911 }
8912
8913 // If the split argument only had two elements, it should be passed directly
8914 // in registers or on the stack.
8915 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
8916 PendingLocs.size() <= 2) {
8917 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
8918 // Apply the normal calling convention rules to the first half of the
8919 // split argument.
8920 CCValAssign VA = PendingLocs[0];
8921 ISD::ArgFlagsTy AF = PendingArgFlags[0];
8922 PendingLocs.clear();
8923 PendingArgFlags.clear();
8924 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
8925 ArgFlags);
8926 }
8927
8928 // Allocate to a register if possible, or else a stack slot.
8929 Register Reg;
8930 unsigned StoreSizeBytes = GRLen / 8;
8931 Align StackAlign = Align(GRLen / 8);
8932
8933 if (ValVT == MVT::f32 && !UseGPRForFloat) {
8934 Reg = State.AllocateReg(ArgFPR32s);
8935 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
8936 Reg = State.AllocateReg(ArgFPR64s);
8937 } else if (ValVT.is128BitVector()) {
8938 Reg = State.AllocateReg(ArgVRs);
8939 UseGPRForFloat = false;
8940 StoreSizeBytes = 16;
8941 StackAlign = Align(16);
8942 } else if (ValVT.is256BitVector()) {
8943 Reg = State.AllocateReg(ArgXRs);
8944 UseGPRForFloat = false;
8945 StoreSizeBytes = 32;
8946 StackAlign = Align(32);
8947 } else {
8948 Reg = allocateArgGPR(State);
8949 }
8950
8951 unsigned StackOffset =
8952 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
8953
8954 // If we reach this point and PendingLocs is non-empty, we must be at the
8955 // end of a split argument that must be passed indirectly.
8956 if (!PendingLocs.empty()) {
8957 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
8958 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
8959 for (auto &It : PendingLocs) {
8960 if (Reg)
8961 It.convertToReg(Reg);
8962 else
8963 It.convertToMem(StackOffset);
8964 State.addLoc(It);
8965 }
8966 PendingLocs.clear();
8967 PendingArgFlags.clear();
8968 return false;
8969 }
8970 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
8971 "Expected an GRLenVT at this stage");
8972
8973 if (Reg) {
8974 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8975 return false;
8976 }
8977
8978 // When a floating-point value is passed on the stack, no bit-cast is needed.
8979 if (ValVT.isFloatingPoint()) {
8980 LocVT = ValVT;
8981 LocInfo = CCValAssign::Full;
8982 }
8983
8984 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8985 return false;
8986}
8987
8988void LoongArchTargetLowering::analyzeInputArgs(
8989 MachineFunction &MF, CCState &CCInfo,
8990 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8991 LoongArchCCAssignFn Fn) const {
8992 FunctionType *FType = MF.getFunction().getFunctionType();
8993 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8994 MVT ArgVT = Ins[i].VT;
8995 Type *ArgTy = nullptr;
8996 if (IsRet)
8997 ArgTy = FType->getReturnType();
8998 else if (Ins[i].isOrigArg())
8999 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
9001 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9002 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
9003 CCInfo, IsRet, ArgTy)) {
9004 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
9005 << '\n');
9006 llvm_unreachable("");
9007 }
9008 }
9009}
9010
9011void LoongArchTargetLowering::analyzeOutputArgs(
9012 MachineFunction &MF, CCState &CCInfo,
9013 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
9014 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
9015 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9016 MVT ArgVT = Outs[i].VT;
9017 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
9019 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9020 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
9021 CCInfo, IsRet, OrigTy)) {
9022 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
9023 << "\n");
9024 llvm_unreachable("");
9025 }
9026 }
9027}
9028
9029// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
9030// values.
9032 const CCValAssign &VA, const SDLoc &DL) {
9033 switch (VA.getLocInfo()) {
9034 default:
9035 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9036 case CCValAssign::Full:
9038 break;
9039 case CCValAssign::BCvt:
9040 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9041 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
9042 else
9043 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
9044 break;
9045 }
9046 return Val;
9047}
9048
9050 const CCValAssign &VA, const SDLoc &DL,
9051 const ISD::InputArg &In,
9052 const LoongArchTargetLowering &TLI) {
9055 EVT LocVT = VA.getLocVT();
9056 SDValue Val;
9057 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
9058 Register VReg = RegInfo.createVirtualRegister(RC);
9059 RegInfo.addLiveIn(VA.getLocReg(), VReg);
9060 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
9061
9062 // If input is sign extended from 32 bits, note it for the OptW pass.
9063 if (In.isOrigArg()) {
9064 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
9065 if (OrigArg->getType()->isIntegerTy()) {
9066 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
9067 // An input zero extended from i31 can also be considered sign extended.
9068 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
9069 (BitWidth < 32 && In.Flags.isZExt())) {
9072 LAFI->addSExt32Register(VReg);
9073 }
9074 }
9075 }
9076
9077 return convertLocVTToValVT(DAG, Val, VA, DL);
9078}
9079
9080// The caller is responsible for loading the full value if the argument is
9081// passed with CCValAssign::Indirect.
9083 const CCValAssign &VA, const SDLoc &DL) {
9085 MachineFrameInfo &MFI = MF.getFrameInfo();
9086 EVT ValVT = VA.getValVT();
9087 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
9088 /*IsImmutable=*/true);
9089 SDValue FIN = DAG.getFrameIndex(
9091
9092 ISD::LoadExtType ExtType;
9093 switch (VA.getLocInfo()) {
9094 default:
9095 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9096 case CCValAssign::Full:
9098 case CCValAssign::BCvt:
9099 ExtType = ISD::NON_EXTLOAD;
9100 break;
9101 }
9102 return DAG.getExtLoad(
9103 ExtType, DL, VA.getLocVT(), Chain, FIN,
9105}
9106
9108 const CCValAssign &VA,
9109 const CCValAssign &HiVA,
9110 const SDLoc &DL) {
9111 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
9112 "Unexpected VA");
9114 MachineFrameInfo &MFI = MF.getFrameInfo();
9116
9117 assert(VA.isRegLoc() && "Expected register VA assignment");
9118
9119 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9120 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
9121 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
9122 SDValue Hi;
9123 if (HiVA.isMemLoc()) {
9124 // Second half of f64 is passed on the stack.
9125 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
9126 /*IsImmutable=*/true);
9127 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
9128 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
9130 } else {
9131 // Second half of f64 is passed in another GPR.
9132 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
9133 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
9134 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
9135 }
9136 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
9137}
9138
9140 const CCValAssign &VA, const SDLoc &DL) {
9141 EVT LocVT = VA.getLocVT();
9142
9143 switch (VA.getLocInfo()) {
9144 default:
9145 llvm_unreachable("Unexpected CCValAssign::LocInfo");
9146 case CCValAssign::Full:
9147 break;
9148 case CCValAssign::BCvt:
9149 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
9150 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
9151 else
9152 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
9153 break;
9154 }
9155 return Val;
9156}
9157
9158static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
9159 CCValAssign::LocInfo LocInfo,
9160 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
9161 CCState &State) {
9162 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
9163 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
9164 // s0 s1 s2 s3 s4 s5 s6 s7 s8
9165 static const MCPhysReg GPRList[] = {
9166 LoongArch::R23, LoongArch::R24, LoongArch::R25,
9167 LoongArch::R26, LoongArch::R27, LoongArch::R28,
9168 LoongArch::R29, LoongArch::R30, LoongArch::R31};
9169 if (MCRegister Reg = State.AllocateReg(GPRList)) {
9170 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9171 return false;
9172 }
9173 }
9174
9175 if (LocVT == MVT::f32) {
9176 // Pass in STG registers: F1, F2, F3, F4
9177 // fs0,fs1,fs2,fs3
9178 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
9179 LoongArch::F26, LoongArch::F27};
9180 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
9181 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9182 return false;
9183 }
9184 }
9185
9186 if (LocVT == MVT::f64) {
9187 // Pass in STG registers: D1, D2, D3, D4
9188 // fs4,fs5,fs6,fs7
9189 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
9190 LoongArch::F30_64, LoongArch::F31_64};
9191 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
9192 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
9193 return false;
9194 }
9195 }
9196
9197 report_fatal_error("No registers left in GHC calling convention");
9198 return true;
9199}
9200
9201// Transform physical registers into virtual registers.
9203 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9204 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
9205 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
9206
9208
9209 switch (CallConv) {
9210 default:
9211 llvm_unreachable("Unsupported calling convention");
9212 case CallingConv::C:
9213 case CallingConv::Fast:
9216 break;
9217 case CallingConv::GHC:
9218 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
9219 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
9221 "GHC calling convention requires the F and D extensions");
9222 }
9223
9224 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9225 MVT GRLenVT = Subtarget.getGRLenVT();
9226 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
9227 // Used with varargs to acumulate store chains.
9228 std::vector<SDValue> OutChains;
9229
9230 // Assign locations to all of the incoming arguments.
9232 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9233
9234 if (CallConv == CallingConv::GHC)
9236 else
9237 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
9238
9239 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
9240 CCValAssign &VA = ArgLocs[i];
9241 SDValue ArgValue;
9242 // Passing f64 on LA32D with a soft float ABI must be handled as a special
9243 // case.
9244 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9245 assert(VA.needsCustom());
9246 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
9247 } else if (VA.isRegLoc())
9248 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
9249 else
9250 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
9251 if (VA.getLocInfo() == CCValAssign::Indirect) {
9252 // If the original argument was split and passed by reference, we need to
9253 // load all parts of it here (using the same address).
9254 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
9256 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
9257 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
9258 assert(ArgPartOffset == 0);
9259 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
9260 CCValAssign &PartVA = ArgLocs[i + 1];
9261 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
9262 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9263 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
9264 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
9266 ++i;
9267 ++InsIdx;
9268 }
9269 continue;
9270 }
9271 InVals.push_back(ArgValue);
9272 }
9273
9274 if (IsVarArg) {
9276 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
9277 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
9278 MachineFrameInfo &MFI = MF.getFrameInfo();
9279 MachineRegisterInfo &RegInfo = MF.getRegInfo();
9280 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
9281
9282 // Offset of the first variable argument from stack pointer, and size of
9283 // the vararg save area. For now, the varargs save area is either zero or
9284 // large enough to hold a0-a7.
9285 int VaArgOffset, VarArgsSaveSize;
9286
9287 // If all registers are allocated, then all varargs must be passed on the
9288 // stack and we don't need to save any argregs.
9289 if (ArgRegs.size() == Idx) {
9290 VaArgOffset = CCInfo.getStackSize();
9291 VarArgsSaveSize = 0;
9292 } else {
9293 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
9294 VaArgOffset = -VarArgsSaveSize;
9295 }
9296
9297 // Record the frame index of the first variable argument
9298 // which is a value necessary to VASTART.
9299 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9300 LoongArchFI->setVarArgsFrameIndex(FI);
9301
9302 // If saving an odd number of registers then create an extra stack slot to
9303 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
9304 // offsets to even-numbered registered remain 2*GRLen-aligned.
9305 if (Idx % 2) {
9306 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
9307 true);
9308 VarArgsSaveSize += GRLenInBytes;
9309 }
9310
9311 // Copy the integer registers that may have been used for passing varargs
9312 // to the vararg save area.
9313 for (unsigned I = Idx; I < ArgRegs.size();
9314 ++I, VaArgOffset += GRLenInBytes) {
9315 const Register Reg = RegInfo.createVirtualRegister(RC);
9316 RegInfo.addLiveIn(ArgRegs[I], Reg);
9317 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
9318 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
9319 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9320 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
9322 cast<StoreSDNode>(Store.getNode())
9323 ->getMemOperand()
9324 ->setValue((Value *)nullptr);
9325 OutChains.push_back(Store);
9326 }
9327 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
9328 }
9329
9330 // All stores are grouped in one node to allow the matching between
9331 // the size of Ins and InVals. This only happens for vararg functions.
9332 if (!OutChains.empty()) {
9333 OutChains.push_back(Chain);
9334 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
9335 }
9336
9337 return Chain;
9338}
9339
9341 return CI->isTailCall();
9342}
9343
9344// Check if the return value is used as only a return value, as otherwise
9345// we can't perform a tail-call.
9347 SDValue &Chain) const {
9348 if (N->getNumValues() != 1)
9349 return false;
9350 if (!N->hasNUsesOfValue(1, 0))
9351 return false;
9352
9353 SDNode *Copy = *N->user_begin();
9354 if (Copy->getOpcode() != ISD::CopyToReg)
9355 return false;
9356
9357 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
9358 // isn't safe to perform a tail call.
9359 if (Copy->getGluedNode())
9360 return false;
9361
9362 // The copy must be used by a LoongArchISD::RET, and nothing else.
9363 bool HasRet = false;
9364 for (SDNode *Node : Copy->users()) {
9365 if (Node->getOpcode() != LoongArchISD::RET)
9366 return false;
9367 HasRet = true;
9368 }
9369
9370 if (!HasRet)
9371 return false;
9372
9373 Chain = Copy->getOperand(0);
9374 return true;
9375}
9376
9377// Check whether the call is eligible for tail call optimization.
9378bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
9379 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
9380 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
9381
9382 auto CalleeCC = CLI.CallConv;
9383 auto &Outs = CLI.Outs;
9384 auto &Caller = MF.getFunction();
9385 auto CallerCC = Caller.getCallingConv();
9386
9387 // Do not tail call opt if the stack is used to pass parameters.
9388 if (CCInfo.getStackSize() != 0)
9389 return false;
9390
9391 // Do not tail call opt if any parameters need to be passed indirectly.
9392 for (auto &VA : ArgLocs)
9393 if (VA.getLocInfo() == CCValAssign::Indirect)
9394 return false;
9395
9396 // Do not tail call opt if either caller or callee uses struct return
9397 // semantics.
9398 auto IsCallerStructRet = Caller.hasStructRetAttr();
9399 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
9400 if (IsCallerStructRet || IsCalleeStructRet)
9401 return false;
9402
9403 // Do not tail call opt if either the callee or caller has a byval argument.
9404 for (auto &Arg : Outs)
9405 if (Arg.Flags.isByVal())
9406 return false;
9407
9408 // The callee has to preserve all registers the caller needs to preserve.
9409 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
9410 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
9411 if (CalleeCC != CallerCC) {
9412 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
9413 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
9414 return false;
9415 }
9416 return true;
9417}
9418
9420 return DAG.getDataLayout().getPrefTypeAlign(
9421 VT.getTypeForEVT(*DAG.getContext()));
9422}
9423
9424// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
9425// and output parameter nodes.
9426SDValue
9428 SmallVectorImpl<SDValue> &InVals) const {
9429 SelectionDAG &DAG = CLI.DAG;
9430 SDLoc &DL = CLI.DL;
9432 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
9434 SDValue Chain = CLI.Chain;
9435 SDValue Callee = CLI.Callee;
9436 CallingConv::ID CallConv = CLI.CallConv;
9437 bool IsVarArg = CLI.IsVarArg;
9438 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9439 MVT GRLenVT = Subtarget.getGRLenVT();
9440 bool &IsTailCall = CLI.IsTailCall;
9441
9443
9444 // Analyze the operands of the call, assigning locations to each operand.
9446 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
9447
9448 if (CallConv == CallingConv::GHC)
9449 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
9450 else
9451 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
9452
9453 // Check if it's really possible to do a tail call.
9454 if (IsTailCall)
9455 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
9456
9457 if (IsTailCall)
9458 ++NumTailCalls;
9459 else if (CLI.CB && CLI.CB->isMustTailCall())
9460 report_fatal_error("failed to perform tail call elimination on a call "
9461 "site marked musttail");
9462
9463 // Get a count of how many bytes are to be pushed on the stack.
9464 unsigned NumBytes = ArgCCInfo.getStackSize();
9465
9466 // Create local copies for byval args.
9467 SmallVector<SDValue> ByValArgs;
9468 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9469 ISD::ArgFlagsTy Flags = Outs[i].Flags;
9470 if (!Flags.isByVal())
9471 continue;
9472
9473 SDValue Arg = OutVals[i];
9474 unsigned Size = Flags.getByValSize();
9475 Align Alignment = Flags.getNonZeroByValAlign();
9476
9477 int FI =
9478 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
9479 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
9480 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
9481
9482 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
9483 /*IsVolatile=*/false,
9484 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
9486 ByValArgs.push_back(FIPtr);
9487 }
9488
9489 if (!IsTailCall)
9490 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
9491
9492 // Copy argument values to their designated locations.
9494 SmallVector<SDValue> MemOpChains;
9495 SDValue StackPtr;
9496 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
9497 ++i, ++OutIdx) {
9498 CCValAssign &VA = ArgLocs[i];
9499 SDValue ArgValue = OutVals[OutIdx];
9500 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
9501
9502 // Handle passing f64 on LA32D with a soft float ABI as a special case.
9503 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9504 assert(VA.isRegLoc() && "Expected register VA assignment");
9505 assert(VA.needsCustom());
9506 SDValue SplitF64 =
9507 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9508 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
9509 SDValue Lo = SplitF64.getValue(0);
9510 SDValue Hi = SplitF64.getValue(1);
9511
9512 Register RegLo = VA.getLocReg();
9513 RegsToPass.push_back(std::make_pair(RegLo, Lo));
9514
9515 // Get the CCValAssign for the Hi part.
9516 CCValAssign &HiVA = ArgLocs[++i];
9517
9518 if (HiVA.isMemLoc()) {
9519 // Second half of f64 is passed on the stack.
9520 if (!StackPtr.getNode())
9521 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9523 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9524 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
9525 // Emit the store.
9526 MemOpChains.push_back(DAG.getStore(
9527 Chain, DL, Hi, Address,
9529 } else {
9530 // Second half of f64 is passed in another GPR.
9531 Register RegHigh = HiVA.getLocReg();
9532 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
9533 }
9534 continue;
9535 }
9536
9537 // Promote the value if needed.
9538 // For now, only handle fully promoted and indirect arguments.
9539 if (VA.getLocInfo() == CCValAssign::Indirect) {
9540 // Store the argument in a stack slot and pass its address.
9541 Align StackAlign =
9542 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
9543 getPrefTypeAlign(ArgValue.getValueType(), DAG));
9544 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
9545 // If the original argument was split and passed by reference, we need to
9546 // store the required parts of it here (and pass just one address).
9547 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
9548 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
9549 assert(ArgPartOffset == 0);
9550 // Calculate the total size to store. We don't have access to what we're
9551 // actually storing other than performing the loop and collecting the
9552 // info.
9554 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
9555 SDValue PartValue = OutVals[OutIdx + 1];
9556 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
9557 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
9558 EVT PartVT = PartValue.getValueType();
9559
9560 StoredSize += PartVT.getStoreSize();
9561 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
9562 Parts.push_back(std::make_pair(PartValue, Offset));
9563 ++i;
9564 ++OutIdx;
9565 }
9566 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
9567 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
9568 MemOpChains.push_back(
9569 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
9571 for (const auto &Part : Parts) {
9572 SDValue PartValue = Part.first;
9573 SDValue PartOffset = Part.second;
9575 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
9576 MemOpChains.push_back(
9577 DAG.getStore(Chain, DL, PartValue, Address,
9579 }
9580 ArgValue = SpillSlot;
9581 } else {
9582 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
9583 }
9584
9585 // Use local copy if it is a byval arg.
9586 if (Flags.isByVal())
9587 ArgValue = ByValArgs[j++];
9588
9589 if (VA.isRegLoc()) {
9590 // Queue up the argument copies and emit them at the end.
9591 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
9592 } else {
9593 assert(VA.isMemLoc() && "Argument not register or memory");
9594 assert(!IsTailCall && "Tail call not allowed if stack is used "
9595 "for passing parameters");
9596
9597 // Work out the address of the stack slot.
9598 if (!StackPtr.getNode())
9599 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
9601 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
9603
9604 // Emit the store.
9605 MemOpChains.push_back(
9606 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
9607 }
9608 }
9609
9610 // Join the stores, which are independent of one another.
9611 if (!MemOpChains.empty())
9612 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
9613
9614 SDValue Glue;
9615
9616 // Build a sequence of copy-to-reg nodes, chained and glued together.
9617 for (auto &Reg : RegsToPass) {
9618 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
9619 Glue = Chain.getValue(1);
9620 }
9621
9622 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
9623 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
9624 // split it and then direct call can be matched by PseudoCALL_SMALL.
9626 const GlobalValue *GV = S->getGlobal();
9627 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
9630 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
9631 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
9632 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
9635 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
9636 }
9637
9638 // The first call operand is the chain and the second is the target address.
9640 Ops.push_back(Chain);
9641 Ops.push_back(Callee);
9642
9643 // Add argument registers to the end of the list so that they are
9644 // known live into the call.
9645 for (auto &Reg : RegsToPass)
9646 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
9647
9648 if (!IsTailCall) {
9649 // Add a register mask operand representing the call-preserved registers.
9650 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
9651 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
9652 assert(Mask && "Missing call preserved mask for calling convention");
9653 Ops.push_back(DAG.getRegisterMask(Mask));
9654 }
9655
9656 // Glue the call to the argument copies, if any.
9657 if (Glue.getNode())
9658 Ops.push_back(Glue);
9659
9660 // Emit the call.
9661 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
9662 unsigned Op;
9663 switch (DAG.getTarget().getCodeModel()) {
9664 default:
9665 report_fatal_error("Unsupported code model");
9666 case CodeModel::Small:
9667 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
9668 break;
9669 case CodeModel::Medium:
9670 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
9671 break;
9672 case CodeModel::Large:
9673 assert(Subtarget.is64Bit() && "Large code model requires LA64");
9674 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
9675 break;
9676 }
9677
9678 if (IsTailCall) {
9680 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
9681 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
9682 return Ret;
9683 }
9684
9685 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
9686 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
9687 Glue = Chain.getValue(1);
9688
9689 // Mark the end of the call, which is glued to the call itself.
9690 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
9691 Glue = Chain.getValue(1);
9692
9693 // Assign locations to each value returned by this call.
9695 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
9696 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
9697
9698 // Copy all of the result registers out of their specified physreg.
9699 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
9700 auto &VA = RVLocs[i];
9701 // Copy the value out.
9702 SDValue RetValue =
9703 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
9704 // Glue the RetValue to the end of the call sequence.
9705 Chain = RetValue.getValue(1);
9706 Glue = RetValue.getValue(2);
9707
9708 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9709 assert(VA.needsCustom());
9710 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
9711 MVT::i32, Glue);
9712 Chain = RetValue2.getValue(1);
9713 Glue = RetValue2.getValue(2);
9714 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
9715 RetValue, RetValue2);
9716 } else
9717 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
9718
9719 InVals.push_back(RetValue);
9720 }
9721
9722 return Chain;
9723}
9724
9726 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
9727 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
9728 const Type *RetTy) const {
9730 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
9731
9732 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
9733 LoongArchABI::ABI ABI =
9734 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
9735 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
9736 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
9737 return false;
9738 }
9739 return true;
9740}
9741
9743 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
9745 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
9746 SelectionDAG &DAG) const {
9747 // Stores the assignment of the return value to a location.
9749
9750 // Info about the registers and stack slot.
9751 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
9752 *DAG.getContext());
9753
9754 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
9755 nullptr, CC_LoongArch);
9756 if (CallConv == CallingConv::GHC && !RVLocs.empty())
9757 report_fatal_error("GHC functions return void only");
9758 SDValue Glue;
9759 SmallVector<SDValue, 4> RetOps(1, Chain);
9760
9761 // Copy the result values into the output registers.
9762 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
9763 SDValue Val = OutVals[OutIdx];
9764 CCValAssign &VA = RVLocs[i];
9765 assert(VA.isRegLoc() && "Can only return in registers!");
9766
9767 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
9768 // Handle returning f64 on LA32D with a soft float ABI.
9769 assert(VA.isRegLoc() && "Expected return via registers");
9770 assert(VA.needsCustom());
9771 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
9772 DAG.getVTList(MVT::i32, MVT::i32), Val);
9773 SDValue Lo = SplitF64.getValue(0);
9774 SDValue Hi = SplitF64.getValue(1);
9775 Register RegLo = VA.getLocReg();
9776 Register RegHi = RVLocs[++i].getLocReg();
9777
9778 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
9779 Glue = Chain.getValue(1);
9780 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
9781 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
9782 Glue = Chain.getValue(1);
9783 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
9784 } else {
9785 // Handle a 'normal' return.
9786 Val = convertValVTToLocVT(DAG, Val, VA, DL);
9787 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
9788
9789 // Guarantee that all emitted copies are stuck together.
9790 Glue = Chain.getValue(1);
9791 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
9792 }
9793 }
9794
9795 RetOps[0] = Chain; // Update chain.
9796
9797 // Add the glue node if we have it.
9798 if (Glue.getNode())
9799 RetOps.push_back(Glue);
9800
9801 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
9802}
9803
9804// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
9805// Note: The following prefixes are excluded:
9806// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
9807// as they can be represented using [x]vrepli.[whb]
9809 const APInt &SplatValue, const unsigned SplatBitSize) const {
9810 uint64_t RequiredImm = 0;
9811 uint64_t V = SplatValue.getZExtValue();
9812 if (SplatBitSize == 16 && !(V & 0x00FF)) {
9813 // 4'b0101
9814 RequiredImm = (0b10101 << 8) | (V >> 8);
9815 return {true, RequiredImm};
9816 } else if (SplatBitSize == 32) {
9817 // 4'b0001
9818 if (!(V & 0xFFFF00FF)) {
9819 RequiredImm = (0b10001 << 8) | (V >> 8);
9820 return {true, RequiredImm};
9821 }
9822 // 4'b0010
9823 if (!(V & 0xFF00FFFF)) {
9824 RequiredImm = (0b10010 << 8) | (V >> 16);
9825 return {true, RequiredImm};
9826 }
9827 // 4'b0011
9828 if (!(V & 0x00FFFFFF)) {
9829 RequiredImm = (0b10011 << 8) | (V >> 24);
9830 return {true, RequiredImm};
9831 }
9832 // 4'b0110
9833 if ((V & 0xFFFF00FF) == 0xFF) {
9834 RequiredImm = (0b10110 << 8) | (V >> 8);
9835 return {true, RequiredImm};
9836 }
9837 // 4'b0111
9838 if ((V & 0xFF00FFFF) == 0xFFFF) {
9839 RequiredImm = (0b10111 << 8) | (V >> 16);
9840 return {true, RequiredImm};
9841 }
9842 // 4'b1010
9843 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
9844 RequiredImm =
9845 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9846 return {true, RequiredImm};
9847 }
9848 } else if (SplatBitSize == 64) {
9849 // 4'b1011
9850 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
9851 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
9852 RequiredImm =
9853 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
9854 return {true, RequiredImm};
9855 }
9856 // 4'b1100
9857 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
9858 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
9859 RequiredImm =
9860 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
9861 return {true, RequiredImm};
9862 }
9863 // 4'b1001
9864 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
9865 uint8_t res = 0;
9866 for (int i = 0; i < 8; ++i) {
9867 uint8_t byte = x & 0xFF;
9868 if (byte == 0 || byte == 0xFF)
9869 res |= ((byte & 1) << i);
9870 else
9871 return {false, 0};
9872 x >>= 8;
9873 }
9874 return {true, res};
9875 };
9876 auto [IsSame, Suffix] = sameBitsPreByte(V);
9877 if (IsSame) {
9878 RequiredImm = (0b11001 << 8) | Suffix;
9879 return {true, RequiredImm};
9880 }
9881 }
9882 return {false, RequiredImm};
9883}
9884
9886 EVT VT) const {
9887 if (!Subtarget.hasExtLSX())
9888 return false;
9889
9890 if (VT == MVT::f32) {
9891 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
9892 return (masked == 0x3e000000 || masked == 0x40000000);
9893 }
9894
9895 if (VT == MVT::f64) {
9896 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
9897 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
9898 }
9899
9900 return false;
9901}
9902
9903bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
9904 bool ForCodeSize) const {
9905 // TODO: Maybe need more checks here after vector extension is supported.
9906 if (VT == MVT::f32 && !Subtarget.hasBasicF())
9907 return false;
9908 if (VT == MVT::f64 && !Subtarget.hasBasicD())
9909 return false;
9910 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
9911}
9912
9914 return true;
9915}
9916
9918 return true;
9919}
9920
9921bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9922 const Instruction *I) const {
9923 if (!Subtarget.is64Bit())
9924 return isa<LoadInst>(I) || isa<StoreInst>(I);
9925
9926 if (isa<LoadInst>(I))
9927 return true;
9928
9929 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9930 // require fences beacuse we can use amswap_db.[w/d].
9931 Type *Ty = I->getOperand(0)->getType();
9932 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
9933 unsigned Size = Ty->getIntegerBitWidth();
9934 return (Size == 8 || Size == 16);
9935 }
9936
9937 return false;
9938}
9939
9941 LLVMContext &Context,
9942 EVT VT) const {
9943 if (!VT.isVector())
9944 return getPointerTy(DL);
9946}
9947
9949 unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const {
9950 // Do not merge to float value size (128 or 256 bits) if no implicit
9951 // float attribute is set.
9952 bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
9953 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
9954 if (NoFloat)
9955 return MemVT.getSizeInBits() <= MaxIntSize;
9956
9957 // Make sure we don't merge greater than our maximum supported vector width.
9958 if (Subtarget.hasExtLASX())
9959 MaxIntSize = 256;
9960 else if (Subtarget.hasExtLSX())
9961 MaxIntSize = 128;
9962
9963 return MemVT.getSizeInBits() <= MaxIntSize;
9964}
9965
9967 EVT VT = Y.getValueType();
9968
9969 if (VT.isVector())
9970 return Subtarget.hasExtLSX() && VT.isInteger();
9971
9972 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9973}
9974
9977 MachineFunction &MF, unsigned Intrinsic) const {
9978 switch (Intrinsic) {
9979 default:
9980 return;
9981 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9982 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9983 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9984 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9985 IntrinsicInfo Info;
9987 Info.memVT = MVT::i32;
9988 Info.ptrVal = I.getArgOperand(0);
9989 Info.offset = 0;
9990 Info.align = Align(4);
9993 Infos.push_back(Info);
9994 return;
9995 // TODO: Add more Intrinsics later.
9996 }
9997 }
9998}
9999
10000// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
10001// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
10002// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
10003// regression, we need to implement it manually.
10006
10008 Op == AtomicRMWInst::And) &&
10009 "Unable to expand");
10010 unsigned MinWordSize = 4;
10011
10012 IRBuilder<> Builder(AI);
10013 LLVMContext &Ctx = Builder.getContext();
10014 const DataLayout &DL = AI->getDataLayout();
10015 Type *ValueType = AI->getType();
10016 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
10017
10018 Value *Addr = AI->getPointerOperand();
10019 PointerType *PtrTy = cast<PointerType>(Addr->getType());
10020 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
10021
10022 Value *AlignedAddr = Builder.CreateIntrinsic(
10023 Intrinsic::ptrmask, {PtrTy, IntTy},
10024 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
10025 "AlignedAddr");
10026
10027 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
10028 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
10029 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
10030 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
10031 Value *Mask = Builder.CreateShl(
10032 ConstantInt::get(WordType,
10033 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
10034 ShiftAmt, "Mask");
10035 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
10036 Value *ValOperand_Shifted =
10037 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
10038 ShiftAmt, "ValOperand_Shifted");
10039 Value *NewOperand;
10040 if (Op == AtomicRMWInst::And)
10041 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
10042 else
10043 NewOperand = ValOperand_Shifted;
10044
10045 AtomicRMWInst *NewAI =
10046 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
10047 AI->getOrdering(), AI->getSyncScopeID());
10048
10049 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
10050 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
10051 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
10052 AI->replaceAllUsesWith(FinalOldResult);
10053 AI->eraseFromParent();
10054}
10055
10058 const AtomicRMWInst *AI) const {
10059 // TODO: Add more AtomicRMWInst that needs to be extended.
10060
10061 // Since floating-point operation requires a non-trivial set of data
10062 // operations, use CmpXChg to expand.
10063 if (AI->isFloatingPointOperation() ||
10069
10070 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
10073 AI->getOperation() == AtomicRMWInst::Sub)) {
10075 }
10076
10077 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
10078 if (Subtarget.hasLAMCAS()) {
10079 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
10083 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
10085 }
10086
10087 if (Size == 8 || Size == 16)
10090}
10091
10092static Intrinsic::ID
10094 AtomicRMWInst::BinOp BinOp) {
10095 if (GRLen == 64) {
10096 switch (BinOp) {
10097 default:
10098 llvm_unreachable("Unexpected AtomicRMW BinOp");
10100 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
10101 case AtomicRMWInst::Add:
10102 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
10103 case AtomicRMWInst::Sub:
10104 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
10106 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
10108 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
10110 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
10111 case AtomicRMWInst::Max:
10112 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
10113 case AtomicRMWInst::Min:
10114 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
10115 // TODO: support other AtomicRMWInst.
10116 }
10117 }
10118
10119 if (GRLen == 32) {
10120 switch (BinOp) {
10121 default:
10122 llvm_unreachable("Unexpected AtomicRMW BinOp");
10124 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
10125 case AtomicRMWInst::Add:
10126 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
10127 case AtomicRMWInst::Sub:
10128 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
10130 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
10132 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
10134 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
10135 case AtomicRMWInst::Max:
10136 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
10137 case AtomicRMWInst::Min:
10138 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
10139 // TODO: support other AtomicRMWInst.
10140 }
10141 }
10142
10143 llvm_unreachable("Unexpected GRLen\n");
10144}
10145
10148 const AtomicCmpXchgInst *CI) const {
10149
10150 if (Subtarget.hasLAMCAS())
10152
10154 if (Size == 8 || Size == 16)
10157}
10158
10160 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
10161 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
10162 unsigned GRLen = Subtarget.getGRLen();
10163 AtomicOrdering FailOrd = CI->getFailureOrdering();
10164 Value *FailureOrdering =
10165 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
10166 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
10167 if (GRLen == 64) {
10168 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
10169 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
10170 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
10171 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10172 }
10173 Type *Tys[] = {AlignedAddr->getType()};
10174 Value *Result = Builder.CreateIntrinsic(
10175 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
10176 if (GRLen == 64)
10177 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10178 return Result;
10179}
10180
10182 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
10183 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
10184 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
10185 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
10186 // mask, as this produces better code than the LL/SC loop emitted by
10187 // int_loongarch_masked_atomicrmw_xchg.
10188 if (AI->getOperation() == AtomicRMWInst::Xchg &&
10191 if (CVal->isZero())
10192 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
10193 Builder.CreateNot(Mask, "Inv_Mask"),
10194 AI->getAlign(), Ord);
10195 if (CVal->isMinusOne())
10196 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
10197 AI->getAlign(), Ord);
10198 }
10199
10200 unsigned GRLen = Subtarget.getGRLen();
10201 Value *Ordering =
10202 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
10203 Type *Tys[] = {AlignedAddr->getType()};
10205 AI->getModule(),
10207
10208 if (GRLen == 64) {
10209 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
10210 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
10211 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
10212 }
10213
10214 Value *Result;
10215
10216 // Must pass the shift amount needed to sign extend the loaded value prior
10217 // to performing a signed comparison for min/max. ShiftAmt is the number of
10218 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
10219 // is the number of bits to left+right shift the value in order to
10220 // sign-extend.
10221 if (AI->getOperation() == AtomicRMWInst::Min ||
10223 const DataLayout &DL = AI->getDataLayout();
10224 unsigned ValWidth =
10225 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
10226 Value *SextShamt =
10227 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
10228 Result = Builder.CreateCall(LlwOpScwLoop,
10229 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
10230 } else {
10231 Result =
10232 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
10233 }
10234
10235 if (GRLen == 64)
10236 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
10237 return Result;
10238}
10239
10241 const MachineFunction &MF, EVT VT) const {
10242 VT = VT.getScalarType();
10243
10244 if (!VT.isSimple())
10245 return false;
10246
10247 switch (VT.getSimpleVT().SimpleTy) {
10248 case MVT::f32:
10249 case MVT::f64:
10250 return true;
10251 default:
10252 break;
10253 }
10254
10255 return false;
10256}
10257
10259 const Constant *PersonalityFn) const {
10260 return LoongArch::R4;
10261}
10262
10264 const Constant *PersonalityFn) const {
10265 return LoongArch::R5;
10266}
10267
10268//===----------------------------------------------------------------------===//
10269// Target Optimization Hooks
10270//===----------------------------------------------------------------------===//
10271
10273 const LoongArchSubtarget &Subtarget) {
10274 // Feature FRECIPE instrucions relative accuracy is 2^-14.
10275 // IEEE float has 23 digits and double has 52 digits.
10276 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
10277 return RefinementSteps;
10278}
10279
10280static bool
10282 assert(Subtarget.hasFrecipe() &&
10283 "Reciprocal estimate queried on unsupported target");
10284
10285 if (!VT.isSimple())
10286 return false;
10287
10288 switch (VT.getSimpleVT().SimpleTy) {
10289 case MVT::f32:
10290 // f32 is the base type for reciprocal estimate instructions.
10291 return true;
10292
10293 case MVT::f64:
10294 return Subtarget.hasBasicD();
10295
10296 case MVT::v4f32:
10297 case MVT::v2f64:
10298 return Subtarget.hasExtLSX();
10299
10300 case MVT::v8f32:
10301 case MVT::v4f64:
10302 return Subtarget.hasExtLASX();
10303
10304 default:
10305 return false;
10306 }
10307}
10308
10310 SelectionDAG &DAG, int Enabled,
10311 int &RefinementSteps,
10312 bool &UseOneConstNR,
10313 bool Reciprocal) const {
10315 "Enabled should never be Disabled here");
10316
10317 if (!Subtarget.hasFrecipe())
10318 return SDValue();
10319
10320 SDLoc DL(Operand);
10321 EVT VT = Operand.getValueType();
10322
10323 // Check supported types.
10324 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10325 return SDValue();
10326
10327 // Handle refinement steps.
10328 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10329 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10330
10331 // LoongArch only has FRSQRTE which is 1.0 / sqrt(x).
10332 UseOneConstNR = false;
10333 SDValue Rsqrt = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
10334
10335 // If the caller wants 1.0 / sqrt(x), or if further refinement steps
10336 // are needed (which rely on the reciprocal form), return the raw reciprocal
10337 // estimate.
10338 if (Reciprocal || RefinementSteps > 0)
10339 return Rsqrt;
10340
10341 // Otherwise, return sqrt(x) by multiplying with the operand.
10342 return DAG.getNode(ISD::FMUL, DL, VT, Operand, Rsqrt);
10343}
10344
10346 SelectionDAG &DAG,
10347 int Enabled,
10348 int &RefinementSteps) const {
10350 "Enabled should never be Disabled here");
10351
10352 if (!Subtarget.hasFrecipe())
10353 return SDValue();
10354
10355 SDLoc DL(Operand);
10356 EVT VT = Operand.getValueType();
10357
10358 // Check supported types.
10359 if (!isSupportedReciprocalEstimateType(VT, Subtarget))
10360 return SDValue();
10361
10362 if (RefinementSteps == ReciprocalEstimate::Unspecified)
10363 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
10364
10365 // FRECIPE computes 1.0 / x.
10366 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
10367}
10368
10369//===----------------------------------------------------------------------===//
10370// LoongArch Inline Assembly Support
10371//===----------------------------------------------------------------------===//
10372
10374LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
10375 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
10376 //
10377 // 'f': A floating-point register (if available).
10378 // 'k': A memory operand whose address is formed by a base register and
10379 // (optionally scaled) index register.
10380 // 'l': A signed 16-bit constant.
10381 // 'm': A memory operand whose address is formed by a base register and
10382 // offset that is suitable for use in instructions with the same
10383 // addressing mode as st.w and ld.w.
10384 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
10385 // instruction)
10386 // 'I': A signed 12-bit constant (for arithmetic instructions).
10387 // 'J': Integer zero.
10388 // 'K': An unsigned 12-bit constant (for logic instructions).
10389 // "ZB": An address that is held in a general-purpose register. The offset is
10390 // zero.
10391 // "ZC": A memory operand whose address is formed by a base register and
10392 // offset that is suitable for use in instructions with the same
10393 // addressing mode as ll.w and sc.w.
10394 if (Constraint.size() == 1) {
10395 switch (Constraint[0]) {
10396 default:
10397 break;
10398 case 'f':
10399 case 'q':
10400 return C_RegisterClass;
10401 case 'l':
10402 case 'I':
10403 case 'J':
10404 case 'K':
10405 return C_Immediate;
10406 case 'k':
10407 return C_Memory;
10408 }
10409 }
10410
10411 if (Constraint == "ZC" || Constraint == "ZB")
10412 return C_Memory;
10413
10414 // 'm' is handled here.
10415 return TargetLowering::getConstraintType(Constraint);
10416}
10417
10418InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
10419 StringRef ConstraintCode) const {
10420 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
10424 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
10425}
10426
10427std::pair<unsigned, const TargetRegisterClass *>
10428LoongArchTargetLowering::getRegForInlineAsmConstraint(
10429 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
10430 // First, see if this is a constraint that directly corresponds to a LoongArch
10431 // register class.
10432 if (Constraint.size() == 1) {
10433 switch (Constraint[0]) {
10434 case 'r':
10435 // TODO: Support fixed vectors up to GRLen?
10436 if (VT.isVector())
10437 break;
10438 return std::make_pair(0U, &LoongArch::GPRRegClass);
10439 case 'q':
10440 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
10441 case 'f':
10442 if (Subtarget.hasBasicF() && VT == MVT::f32)
10443 return std::make_pair(0U, &LoongArch::FPR32RegClass);
10444 if (Subtarget.hasBasicD() && VT == MVT::f64)
10445 return std::make_pair(0U, &LoongArch::FPR64RegClass);
10446 if (Subtarget.hasExtLSX() &&
10447 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
10448 return std::make_pair(0U, &LoongArch::LSX128RegClass);
10449 if (Subtarget.hasExtLASX() &&
10450 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
10451 return std::make_pair(0U, &LoongArch::LASX256RegClass);
10452 break;
10453 default:
10454 break;
10455 }
10456 }
10457
10458 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
10459 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
10460 // constraints while the official register name is prefixed with a '$'. So we
10461 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
10462 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
10463 // case insensitive, so no need to convert the constraint to upper case here.
10464 //
10465 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
10466 // decode the usage of register name aliases into their official names. And
10467 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
10468 // official register names.
10469 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
10470 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
10471 bool IsFP = Constraint[2] == 'f';
10472 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
10473 std::pair<unsigned, const TargetRegisterClass *> R;
10475 TRI, join_items("", Temp.first, Temp.second), VT);
10476 // Match those names to the widest floating point register type available.
10477 if (IsFP) {
10478 unsigned RegNo = R.first;
10479 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
10480 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
10481 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
10482 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
10483 }
10484 }
10485 }
10486 return R;
10487 }
10488
10489 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10490}
10491
10492void LoongArchTargetLowering::LowerAsmOperandForConstraint(
10493 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
10494 SelectionDAG &DAG) const {
10495 // Currently only support length 1 constraints.
10496 if (Constraint.size() == 1) {
10497 switch (Constraint[0]) {
10498 case 'l':
10499 // Validate & create a 16-bit signed immediate operand.
10500 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10501 uint64_t CVal = C->getSExtValue();
10502 if (isInt<16>(CVal))
10503 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10504 Subtarget.getGRLenVT()));
10505 }
10506 return;
10507 case 'I':
10508 // Validate & create a 12-bit signed immediate operand.
10509 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10510 uint64_t CVal = C->getSExtValue();
10511 if (isInt<12>(CVal))
10512 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
10513 Subtarget.getGRLenVT()));
10514 }
10515 return;
10516 case 'J':
10517 // Validate & create an integer zero operand.
10518 if (auto *C = dyn_cast<ConstantSDNode>(Op))
10519 if (C->getZExtValue() == 0)
10520 Ops.push_back(
10521 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
10522 return;
10523 case 'K':
10524 // Validate & create a 12-bit unsigned immediate operand.
10525 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
10526 uint64_t CVal = C->getZExtValue();
10527 if (isUInt<12>(CVal))
10528 Ops.push_back(
10529 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
10530 }
10531 return;
10532 default:
10533 break;
10534 }
10535 }
10537}
10538
10539#define GET_REGISTER_MATCHER
10540#include "LoongArchGenAsmMatcher.inc"
10541
10544 const MachineFunction &MF) const {
10545 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
10546 std::string NewRegName = Name.second.str();
10547 Register Reg = MatchRegisterAltName(NewRegName);
10548 if (!Reg)
10549 Reg = MatchRegisterName(NewRegName);
10550 if (!Reg)
10551 return Reg;
10552 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
10553 if (!ReservedRegs.test(Reg))
10554 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
10555 StringRef(RegName) + "\"."));
10556 return Reg;
10557}
10558
10560 EVT VT, SDValue C) const {
10561 // TODO: Support vectors.
10562 if (!VT.isScalarInteger())
10563 return false;
10564
10565 // Omit the optimization if the data size exceeds GRLen.
10566 if (VT.getSizeInBits() > Subtarget.getGRLen())
10567 return false;
10568
10569 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
10570 const APInt &Imm = ConstNode->getAPIntValue();
10571 // Break MUL into (SLLI + ADD/SUB) or ALSL.
10572 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
10573 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
10574 return true;
10575 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
10576 if (ConstNode->hasOneUse() &&
10577 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
10578 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
10579 return true;
10580 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
10581 // in which the immediate has two set bits. Or Break (MUL x, imm)
10582 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
10583 // equals to (1 << s0) - (1 << s1).
10584 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
10585 unsigned Shifts = Imm.countr_zero();
10586 // Reject immediates which can be composed via a single LUI.
10587 if (Shifts >= 12)
10588 return false;
10589 // Reject multiplications can be optimized to
10590 // (SLLI (ALSL x, x, 1/2/3/4), s).
10591 APInt ImmPop = Imm.ashr(Shifts);
10592 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
10593 return false;
10594 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
10595 // since it needs one more instruction than other 3 cases.
10596 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
10597 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
10598 (ImmSmall - Imm).isPowerOf2())
10599 return true;
10600 }
10601 }
10602
10603 return false;
10604}
10605
10607 const AddrMode &AM,
10608 Type *Ty, unsigned AS,
10609 Instruction *I) const {
10610 // LoongArch has four basic addressing modes:
10611 // 1. reg
10612 // 2. reg + 12-bit signed offset
10613 // 3. reg + 14-bit signed offset left-shifted by 2
10614 // 4. reg1 + reg2
10615 // TODO: Add more checks after support vector extension.
10616
10617 // No global is ever allowed as a base.
10618 if (AM.BaseGV)
10619 return false;
10620
10621 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
10622 // with `UAL` feature.
10623 if (!isInt<12>(AM.BaseOffs) &&
10624 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
10625 return false;
10626
10627 switch (AM.Scale) {
10628 case 0:
10629 // "r+i" or just "i", depending on HasBaseReg.
10630 break;
10631 case 1:
10632 // "r+r+i" is not allowed.
10633 if (AM.HasBaseReg && AM.BaseOffs)
10634 return false;
10635 // Otherwise we have "r+r" or "r+i".
10636 break;
10637 case 2:
10638 // "2*r+r" or "2*r+i" is not allowed.
10639 if (AM.HasBaseReg || AM.BaseOffs)
10640 return false;
10641 // Allow "2*r" as "r+r".
10642 break;
10643 default:
10644 return false;
10645 }
10646
10647 return true;
10648}
10649
10651 return isInt<12>(Imm);
10652}
10653
10655 return isInt<12>(Imm);
10656}
10657
10659 // Zexts are free if they can be combined with a load.
10660 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
10661 // poorly with type legalization of compares preferring sext.
10662 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
10663 EVT MemVT = LD->getMemoryVT();
10664 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
10665 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
10666 LD->getExtensionType() == ISD::ZEXTLOAD))
10667 return true;
10668 }
10669
10670 return TargetLowering::isZExtFree(Val, VT2);
10671}
10672
10674 EVT DstVT) const {
10675 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
10676}
10677
10679 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
10680}
10681
10683 // TODO: Support vectors.
10684 if (Y.getValueType().isVector())
10685 return false;
10686
10687 return !isa<ConstantSDNode>(Y);
10688}
10689
10691 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
10692 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
10693}
10694
10696 Type *Ty, bool IsSigned) const {
10697 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
10698 return true;
10699
10700 return IsSigned;
10701}
10702
10704 // Return false to suppress the unnecessary extensions if the LibCall
10705 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
10706 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
10707 Type.getSizeInBits() < Subtarget.getGRLen()))
10708 return false;
10709 return true;
10710}
10711
10712// memcpy, and other memory intrinsics, typically tries to use wider load/store
10713// if the source/dest is aligned and the copy size is large enough. We therefore
10714// want to align such objects passed to memory intrinsics.
10716 unsigned &MinSize,
10717 Align &PrefAlign) const {
10718 if (!isa<MemIntrinsic>(CI))
10719 return false;
10720
10721 if (Subtarget.is64Bit()) {
10722 MinSize = 8;
10723 PrefAlign = Align(8);
10724 } else {
10725 MinSize = 4;
10726 PrefAlign = Align(4);
10727 }
10728
10729 return true;
10730}
10731
10734 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
10735 VT.getVectorElementType() != MVT::i1)
10736 return TypeWidenVector;
10737
10739}
10740
10741bool LoongArchTargetLowering::splitValueIntoRegisterParts(
10742 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
10743 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
10744 bool IsABIRegCopy = CC.has_value();
10745 EVT ValueVT = Val.getValueType();
10746
10747 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10748 PartVT == MVT::f32) {
10749 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
10750 // nan, and cast to f32.
10751 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
10752 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
10753 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
10754 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
10755 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
10756 Parts[0] = Val;
10757 return true;
10758 }
10759
10760 return false;
10761}
10762
10763SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
10764 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
10765 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
10766 bool IsABIRegCopy = CC.has_value();
10767
10768 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
10769 PartVT == MVT::f32) {
10770 SDValue Val = Parts[0];
10771
10772 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
10773 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
10774 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
10775 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
10776 return Val;
10777 }
10778
10779 return SDValue();
10780}
10781
10782MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
10783 CallingConv::ID CC,
10784 EVT VT) const {
10785 // Use f32 to pass f16.
10786 if (VT == MVT::f16 && Subtarget.hasBasicF())
10787 return MVT::f32;
10788
10790}
10791
10792unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
10793 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
10794 // Use f32 to pass f16.
10795 if (VT == MVT::f16 && Subtarget.hasBasicF())
10796 return 1;
10797
10799}
10800
10802 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
10803 const SelectionDAG &DAG, unsigned Depth) const {
10804 unsigned Opc = Op.getOpcode();
10805 Known.resetAll();
10806 switch (Opc) {
10807 default:
10808 break;
10809 case LoongArchISD::VPICK_ZEXT_ELT: {
10810 assert(isa<VTSDNode>(Op->getOperand(2)) && "Unexpected operand!");
10811 EVT VT = cast<VTSDNode>(Op->getOperand(2))->getVT();
10812 unsigned VTBits = VT.getScalarSizeInBits();
10813 assert(Known.getBitWidth() >= VTBits && "Unexpected width!");
10814 Known.Zero.setBitsFrom(VTBits);
10815 break;
10816 }
10817 }
10818}
10819
10821 SDValue Op, const APInt &OriginalDemandedBits,
10822 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
10823 unsigned Depth) const {
10824 EVT VT = Op.getValueType();
10825 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
10826 unsigned Opc = Op.getOpcode();
10827 switch (Opc) {
10828 default:
10829 break;
10830 case LoongArchISD::VMSKLTZ:
10831 case LoongArchISD::XVMSKLTZ: {
10832 SDValue Src = Op.getOperand(0);
10833 MVT SrcVT = Src.getSimpleValueType();
10834 unsigned SrcBits = SrcVT.getScalarSizeInBits();
10835 unsigned NumElts = SrcVT.getVectorNumElements();
10836
10837 // If we don't need the sign bits at all just return zero.
10838 if (OriginalDemandedBits.countr_zero() >= NumElts)
10839 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
10840
10841 // Only demand the vector elements of the sign bits we need.
10842 APInt KnownUndef, KnownZero;
10843 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
10844 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
10845 TLO, Depth + 1))
10846 return true;
10847
10848 Known.Zero = KnownZero.zext(BitWidth);
10849 Known.Zero.setHighBits(BitWidth - NumElts);
10850
10851 // [X]VMSKLTZ only uses the MSB from each vector element.
10852 KnownBits KnownSrc;
10853 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
10854 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
10855 Depth + 1))
10856 return true;
10857
10858 if (KnownSrc.One[SrcBits - 1])
10859 Known.One.setLowBits(NumElts);
10860 else if (KnownSrc.Zero[SrcBits - 1])
10861 Known.Zero.setLowBits(NumElts);
10862
10863 // Attempt to avoid multi-use ops if we don't need anything from it.
10865 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
10866 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
10867 return false;
10868 }
10869 }
10870
10872 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
10873}
10874
10876 unsigned Opc = VecOp.getOpcode();
10877
10878 // Assume target opcodes can't be scalarized.
10879 // TODO - do we have any exceptions?
10880 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
10881 return false;
10882
10883 // If the vector op is not supported, try to convert to scalar.
10884 EVT VecVT = VecOp.getValueType();
10886 return true;
10887
10888 // If the vector op is supported, but the scalar op is not, the transform may
10889 // not be worthwhile.
10890 EVT ScalarVT = VecVT.getScalarType();
10891 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
10892}
10893
10895 unsigned Index) const {
10897 return false;
10898
10899 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
10900 return Index == 0;
10901}
10902
10904 unsigned Index) const {
10905 EVT EltVT = VT.getScalarType();
10906
10907 // Extract a scalar FP value from index 0 of a vector is free.
10908 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
10909}
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue PromoteMaskArithmetic(SDValue N, const SDLoc &DL, EVT VT, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned Depth)
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSupportedReciprocalEstimateType(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue performEXTENDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static bool buildVPERMIInfo(ArrayRef< int > Mask, SDValue V1, SDValue V2, SmallVectorImpl< SDValue > &SrcVec, unsigned &MaskImm)
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static bool isConstantSplatVector(SDValue N, APInt &SplatValue, unsigned MinSizeInBits)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VEXTRINS (if possible).
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VPERMI (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static SDValue combineFP_ROUND(SDValue N, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1517
bool isZero() const
Definition APFloat.h:1530
APInt bitcastToAPInt() const
Definition APFloat.h:1426
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1054
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1414
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1075
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:967
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1708
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1411
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:501
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2847
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getDoubleNumVectorElementsVT() const
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
bool isImplicitDef() const
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isExtVecInRegOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:325
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...