LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
407
413
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
421 Legal);
423 VT, Legal);
430 Expand);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
459 VT, Expand);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
473 }
474
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
485 }
486
487 // Compute derived properties from the register classes.
488 computeRegisterProperties(Subtarget.getRegisterInfo());
489
491
494
495 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
496
498
499 // Function alignments.
501 // Set preferred alignments.
502 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
503 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
504 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
505
506 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
507 if (Subtarget.hasLAMCAS())
509
510 if (Subtarget.hasSCQ()) {
513 }
514
515 // Disable strict node mutation.
516 IsStrictFPEnabled = true;
517}
518
520 const GlobalAddressSDNode *GA) const {
521 // In order to maximise the opportunity for common subexpression elimination,
522 // keep a separate ADD node for the global address offset instead of folding
523 // it in the global address node. Later peephole optimisations may choose to
524 // fold it back in when profitable.
525 return false;
526}
527
529 SelectionDAG &DAG) const {
530 switch (Op.getOpcode()) {
532 return lowerATOMIC_FENCE(Op, DAG);
534 return lowerEH_DWARF_CFA(Op, DAG);
536 return lowerGlobalAddress(Op, DAG);
538 return lowerGlobalTLSAddress(Op, DAG);
540 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
542 return lowerINTRINSIC_W_CHAIN(Op, DAG);
544 return lowerINTRINSIC_VOID(Op, DAG);
546 return lowerBlockAddress(Op, DAG);
547 case ISD::JumpTable:
548 return lowerJumpTable(Op, DAG);
549 case ISD::SHL_PARTS:
550 return lowerShiftLeftParts(Op, DAG);
551 case ISD::SRA_PARTS:
552 return lowerShiftRightParts(Op, DAG, true);
553 case ISD::SRL_PARTS:
554 return lowerShiftRightParts(Op, DAG, false);
556 return lowerConstantPool(Op, DAG);
557 case ISD::FP_TO_SINT:
558 return lowerFP_TO_SINT(Op, DAG);
559 case ISD::BITCAST:
560 return lowerBITCAST(Op, DAG);
561 case ISD::UINT_TO_FP:
562 return lowerUINT_TO_FP(Op, DAG);
563 case ISD::SINT_TO_FP:
564 return lowerSINT_TO_FP(Op, DAG);
565 case ISD::VASTART:
566 return lowerVASTART(Op, DAG);
567 case ISD::FRAMEADDR:
568 return lowerFRAMEADDR(Op, DAG);
569 case ISD::RETURNADDR:
570 return lowerRETURNADDR(Op, DAG);
572 return lowerWRITE_REGISTER(Op, DAG);
574 return lowerINSERT_VECTOR_ELT(Op, DAG);
576 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
578 return lowerBUILD_VECTOR(Op, DAG);
580 return lowerCONCAT_VECTORS(Op, DAG);
582 return lowerVECTOR_SHUFFLE(Op, DAG);
583 case ISD::BITREVERSE:
584 return lowerBITREVERSE(Op, DAG);
586 return lowerSCALAR_TO_VECTOR(Op, DAG);
587 case ISD::PREFETCH:
588 return lowerPREFETCH(Op, DAG);
589 case ISD::SELECT:
590 return lowerSELECT(Op, DAG);
591 case ISD::BRCOND:
592 return lowerBRCOND(Op, DAG);
593 case ISD::FP_TO_FP16:
594 return lowerFP_TO_FP16(Op, DAG);
595 case ISD::FP16_TO_FP:
596 return lowerFP16_TO_FP(Op, DAG);
597 case ISD::FP_TO_BF16:
598 return lowerFP_TO_BF16(Op, DAG);
599 case ISD::BF16_TO_FP:
600 return lowerBF16_TO_FP(Op, DAG);
602 return lowerVECREDUCE_ADD(Op, DAG);
603 case ISD::ROTL:
604 case ISD::ROTR:
605 return lowerRotate(Op, DAG);
613 return lowerVECREDUCE(Op, DAG);
614 case ISD::ConstantFP:
615 return lowerConstantFP(Op, DAG);
616 case ISD::SETCC:
617 return lowerSETCC(Op, DAG);
618 }
619 return SDValue();
620}
621
622// Helper to attempt to return a cheaper, bit-inverted version of \p V.
624 // TODO: don't always ignore oneuse constraints.
625 V = peekThroughBitcasts(V);
626 EVT VT = V.getValueType();
627
628 // Match not(xor X, -1) -> X.
629 if (V.getOpcode() == ISD::XOR &&
630 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
631 isAllOnesConstant(V.getOperand(1))))
632 return V.getOperand(0);
633
634 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
635 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
636 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
637 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
638 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
639 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
640 V.getOperand(1));
641 }
642 }
643
644 // Match not(SplatVector(not(X)) -> SplatVector(X).
645 if (V.getOpcode() == ISD::BUILD_VECTOR) {
646 if (SDValue SplatValue =
647 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
648 if (!V->isOnlyUserOf(SplatValue.getNode()))
649 return SDValue();
650
651 if (SDValue Not = isNOT(SplatValue, DAG)) {
652 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
653 return DAG.getSplat(VT, SDLoc(Not), Not);
654 }
655 }
656 }
657
658 // Match not(or(not(X),not(Y))) -> and(X, Y).
659 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
660 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
661 // TODO: Handle cases with single NOT operand -> VANDN
662 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
663 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
664 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
665 DAG.getBitcast(VT, Op1));
666 }
667
668 // TODO: Add more matching patterns. Such as,
669 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
670 // not(slt(C, X)) -> slt(X - 1, C)
671
672 return SDValue();
673}
674
675SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
676 SelectionDAG &DAG) const {
677 EVT VT = Op.getValueType();
678 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
679 const APFloat &FPVal = CFP->getValueAPF();
680 SDLoc DL(CFP);
681
682 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
683 (VT == MVT::f64 && Subtarget.hasBasicD()));
684
685 // If value is 0.0 or -0.0, just ignore it.
686 if (FPVal.isZero())
687 return SDValue();
688
689 // If lsx enabled, use cheaper 'vldi' instruction if possible.
690 if (isFPImmVLDILegal(FPVal, VT))
691 return SDValue();
692
693 // Construct as integer, and move to float register.
694 APInt INTVal = FPVal.bitcastToAPInt();
695
696 // If more than MaterializeFPImmInsNum instructions will be used to
697 // generate the INTVal and move it to float register, fallback to
698 // use floating point load from the constant pool.
700 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
701 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
702 return SDValue();
703
704 switch (VT.getSimpleVT().SimpleTy) {
705 default:
706 llvm_unreachable("Unexpected floating point type!");
707 break;
708 case MVT::f32: {
709 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
710 if (Subtarget.is64Bit())
711 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
712 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
713 : LoongArchISD::MOVGR2FR_W,
714 DL, VT, NewVal);
715 }
716 case MVT::f64: {
717 if (Subtarget.is64Bit()) {
718 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
719 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
720 }
721 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
722 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
723 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
724 }
725 }
726
727 return SDValue();
728}
729
730// Ensure SETCC result and operand have the same bit width; isel does not
731// support mismatched widths.
732SDValue LoongArchTargetLowering::lowerSETCC(SDValue Op,
733 SelectionDAG &DAG) const {
734 SDLoc DL(Op);
735 EVT ResultVT = Op.getValueType();
736 EVT OperandVT = Op.getOperand(0).getValueType();
737
738 EVT SetCCResultVT =
739 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), OperandVT);
740
741 if (ResultVT == SetCCResultVT)
742 return Op;
743
744 assert(Op.getOperand(0).getValueType() == Op.getOperand(1).getValueType() &&
745 "SETCC operands must have the same type!");
746
747 SDValue SetCCNode =
748 DAG.getNode(ISD::SETCC, DL, SetCCResultVT, Op.getOperand(0),
749 Op.getOperand(1), Op.getOperand(2));
750
751 if (ResultVT.bitsGT(SetCCResultVT))
752 SetCCNode = DAG.getNode(ISD::SIGN_EXTEND, DL, ResultVT, SetCCNode);
753 else if (ResultVT.bitsLT(SetCCResultVT))
754 SetCCNode = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, SetCCNode);
755
756 return SetCCNode;
757}
758
759// Lower vecreduce_add using vhaddw instructions.
760// For Example:
761// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
762// can be lowered to:
763// VHADDW_D_W vr0, vr0, vr0
764// VHADDW_Q_D vr0, vr0, vr0
765// VPICKVE2GR_D a0, vr0, 0
766// ADDI_W a0, a0, 0
767SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
768 SelectionDAG &DAG) const {
769
770 SDLoc DL(Op);
771 MVT OpVT = Op.getSimpleValueType();
772 SDValue Val = Op.getOperand(0);
773
774 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
775 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
776 unsigned ResBits = OpVT.getScalarSizeInBits();
777
778 unsigned LegalVecSize = 128;
779 bool isLASX256Vector =
780 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
781
782 // Ensure operand type legal or enable it legal.
783 while (!isTypeLegal(Val.getSimpleValueType())) {
784 Val = DAG.WidenVector(Val, DL);
785 }
786
787 // NumEles is designed for iterations count, v4i32 for LSX
788 // and v8i32 for LASX should have the same count.
789 if (isLASX256Vector) {
790 NumEles /= 2;
791 LegalVecSize = 256;
792 }
793
794 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
795 MVT IntTy = MVT::getIntegerVT(EleBits);
796 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
797 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
798 }
799
800 if (isLASX256Vector) {
801 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
802 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
803 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
804 }
805
806 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
807 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
808 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
809}
810
811// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
812// For Example:
813// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
814// can be lowered to:
815// VBSRL_V vr1, vr0, 8
816// VMAX_W vr0, vr1, vr0
817// VBSRL_V vr1, vr0, 4
818// VMAX_W vr0, vr1, vr0
819// VPICKVE2GR_W a0, vr0, 0
820// For 256 bit vector, it is illegal and will be spilt into
821// two 128 bit vector by default then processed by this.
822SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
823 SelectionDAG &DAG) const {
824 SDLoc DL(Op);
825
826 MVT OpVT = Op.getSimpleValueType();
827 SDValue Val = Op.getOperand(0);
828
829 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
830 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
831
832 // Ensure operand type legal or enable it legal.
833 while (!isTypeLegal(Val.getSimpleValueType())) {
834 Val = DAG.WidenVector(Val, DL);
835 }
836
837 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
838 MVT VecTy = Val.getSimpleValueType();
839 MVT GRLenVT = Subtarget.getGRLenVT();
840
841 for (int i = NumEles; i > 1; i /= 2) {
842 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
843 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
844 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
845 }
846
847 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
848 DAG.getConstant(0, DL, GRLenVT));
849}
850
851SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
852 SelectionDAG &DAG) const {
853 unsigned IsData = Op.getConstantOperandVal(4);
854
855 // We don't support non-data prefetch.
856 // Just preserve the chain.
857 if (!IsData)
858 return Op.getOperand(0);
859
860 return Op;
861}
862
863SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
864 SelectionDAG &DAG) const {
865 MVT VT = Op.getSimpleValueType();
866 assert(VT.isVector() && "Unexpected type");
867
868 SDLoc DL(Op);
869 SDValue R = Op.getOperand(0);
870 SDValue Amt = Op.getOperand(1);
871 unsigned Opcode = Op.getOpcode();
872 unsigned EltSizeInBits = VT.getScalarSizeInBits();
873
874 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
875 if (V.getOpcode() != ISD::BUILD_VECTOR)
876 return false;
877 if (SDValue SplatValue =
878 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
879 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
880 CstSplatValue = C->getAPIntValue();
881 return true;
882 }
883 }
884 return false;
885 };
886
887 // Check for constant splat rotation amount.
888 APInt CstSplatValue;
889 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
890 bool isROTL = Opcode == ISD::ROTL;
891
892 // Check for splat rotate by zero.
893 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
894 return R;
895
896 // LoongArch targets always prefer ISD::ROTR.
897 if (isROTL) {
898 SDValue Zero = DAG.getConstant(0, DL, VT);
899 return DAG.getNode(ISD::ROTR, DL, VT, R,
900 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
901 }
902
903 // Rotate by a immediate.
904 if (IsCstSplat) {
905 // ISD::ROTR: Attemp to rotate by a positive immediate.
906 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
907 if (SDValue Urem =
908 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
909 return DAG.getNode(Opcode, DL, VT, R, Urem);
910 }
911
912 return Op;
913}
914
915// Return true if Val is equal to (setcc LHS, RHS, CC).
916// Return false if Val is the inverse of (setcc LHS, RHS, CC).
917// Otherwise, return std::nullopt.
918static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
919 ISD::CondCode CC, SDValue Val) {
920 assert(Val->getOpcode() == ISD::SETCC);
921 SDValue LHS2 = Val.getOperand(0);
922 SDValue RHS2 = Val.getOperand(1);
923 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
924
925 if (LHS == LHS2 && RHS == RHS2) {
926 if (CC == CC2)
927 return true;
928 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
929 return false;
930 } else if (LHS == RHS2 && RHS == LHS2) {
932 if (CC == CC2)
933 return true;
934 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
935 return false;
936 }
937
938 return std::nullopt;
939}
940
942 const LoongArchSubtarget &Subtarget) {
943 SDValue CondV = N->getOperand(0);
944 SDValue TrueV = N->getOperand(1);
945 SDValue FalseV = N->getOperand(2);
946 MVT VT = N->getSimpleValueType(0);
947 SDLoc DL(N);
948
949 // (select c, -1, y) -> -c | y
950 if (isAllOnesConstant(TrueV)) {
951 SDValue Neg = DAG.getNegative(CondV, DL, VT);
952 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
953 }
954 // (select c, y, -1) -> (c-1) | y
955 if (isAllOnesConstant(FalseV)) {
956 SDValue Neg =
957 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
958 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
959 }
960
961 // (select c, 0, y) -> (c-1) & y
962 if (isNullConstant(TrueV)) {
963 SDValue Neg =
964 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
965 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
966 }
967 // (select c, y, 0) -> -c & y
968 if (isNullConstant(FalseV)) {
969 SDValue Neg = DAG.getNegative(CondV, DL, VT);
970 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
971 }
972
973 // select c, ~x, x --> xor -c, x
974 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
975 const APInt &TrueVal = TrueV->getAsAPIntVal();
976 const APInt &FalseVal = FalseV->getAsAPIntVal();
977 if (~TrueVal == FalseVal) {
978 SDValue Neg = DAG.getNegative(CondV, DL, VT);
979 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
980 }
981 }
982
983 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
984 // when both truev and falsev are also setcc.
985 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
986 FalseV.getOpcode() == ISD::SETCC) {
987 SDValue LHS = CondV.getOperand(0);
988 SDValue RHS = CondV.getOperand(1);
989 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
990
991 // (select x, x, y) -> x | y
992 // (select !x, x, y) -> x & y
993 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
994 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
995 DAG.getFreeze(FalseV));
996 }
997 // (select x, y, x) -> x & y
998 // (select !x, y, x) -> x | y
999 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
1000 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
1001 DAG.getFreeze(TrueV), FalseV);
1002 }
1003 }
1004
1005 return SDValue();
1006}
1007
1008// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
1009// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
1010// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
1011// being `0` or `-1`. In such cases we can replace `select` with `and`.
1012// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
1013// than `c0`?
1014static SDValue
1016 const LoongArchSubtarget &Subtarget) {
1017 unsigned SelOpNo = 0;
1018 SDValue Sel = BO->getOperand(0);
1019 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1020 SelOpNo = 1;
1021 Sel = BO->getOperand(1);
1022 }
1023
1024 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1025 return SDValue();
1026
1027 unsigned ConstSelOpNo = 1;
1028 unsigned OtherSelOpNo = 2;
1029 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1030 ConstSelOpNo = 2;
1031 OtherSelOpNo = 1;
1032 }
1033 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1034 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1035 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1036 return SDValue();
1037
1038 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1039 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1040 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1041 return SDValue();
1042
1043 SDLoc DL(Sel);
1044 EVT VT = BO->getValueType(0);
1045
1046 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1047 if (SelOpNo == 1)
1048 std::swap(NewConstOps[0], NewConstOps[1]);
1049
1050 SDValue NewConstOp =
1051 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1052 if (!NewConstOp)
1053 return SDValue();
1054
1055 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1056 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1057 return SDValue();
1058
1059 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1060 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1061 if (SelOpNo == 1)
1062 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1063 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1064
1065 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1066 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1067 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1068}
1069
1070// Changes the condition code and swaps operands if necessary, so the SetCC
1071// operation matches one of the comparisons supported directly by branches
1072// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1073// compare with 1/-1.
1075 ISD::CondCode &CC, SelectionDAG &DAG) {
1076 // If this is a single bit test that can't be handled by ANDI, shift the
1077 // bit to be tested to the MSB and perform a signed compare with 0.
1078 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1079 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1080 isa<ConstantSDNode>(LHS.getOperand(1))) {
1081 uint64_t Mask = LHS.getConstantOperandVal(1);
1082 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1083 unsigned ShAmt = 0;
1084 if (isPowerOf2_64(Mask)) {
1085 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1086 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1087 } else {
1088 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1089 }
1090
1091 LHS = LHS.getOperand(0);
1092 if (ShAmt != 0)
1093 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1094 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1095 return;
1096 }
1097 }
1098
1099 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1100 int64_t C = RHSC->getSExtValue();
1101 switch (CC) {
1102 default:
1103 break;
1104 case ISD::SETGT:
1105 // Convert X > -1 to X >= 0.
1106 if (C == -1) {
1107 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1108 CC = ISD::SETGE;
1109 return;
1110 }
1111 break;
1112 case ISD::SETLT:
1113 // Convert X < 1 to 0 >= X.
1114 if (C == 1) {
1115 RHS = LHS;
1116 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1117 CC = ISD::SETGE;
1118 return;
1119 }
1120 break;
1121 }
1122 }
1123
1124 switch (CC) {
1125 default:
1126 break;
1127 case ISD::SETGT:
1128 case ISD::SETLE:
1129 case ISD::SETUGT:
1130 case ISD::SETULE:
1132 std::swap(LHS, RHS);
1133 break;
1134 }
1135}
1136
1137SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1138 SelectionDAG &DAG) const {
1139 SDValue CondV = Op.getOperand(0);
1140 SDValue TrueV = Op.getOperand(1);
1141 SDValue FalseV = Op.getOperand(2);
1142 SDLoc DL(Op);
1143 MVT VT = Op.getSimpleValueType();
1144 MVT GRLenVT = Subtarget.getGRLenVT();
1145
1146 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1147 return V;
1148
1149 if (Op.hasOneUse()) {
1150 unsigned UseOpc = Op->user_begin()->getOpcode();
1151 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1152 SDNode *BinOp = *Op->user_begin();
1153 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1154 DAG, Subtarget)) {
1155 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1156 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1157 // may return a constant node and cause crash in lowerSELECT.
1158 if (NewSel.getOpcode() == ISD::SELECT)
1159 return lowerSELECT(NewSel, DAG);
1160 return NewSel;
1161 }
1162 }
1163 }
1164
1165 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1166 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1167 // (select condv, truev, falsev)
1168 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1169 if (CondV.getOpcode() != ISD::SETCC ||
1170 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1171 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1172 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1173
1174 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1175
1176 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1177 }
1178
1179 // If the CondV is the output of a SETCC node which operates on GRLenVT
1180 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1181 // to take advantage of the integer compare+branch instructions. i.e.: (select
1182 // (setcc lhs, rhs, cc), truev, falsev)
1183 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1184 SDValue LHS = CondV.getOperand(0);
1185 SDValue RHS = CondV.getOperand(1);
1186 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1187
1188 // Special case for a select of 2 constants that have a difference of 1.
1189 // Normally this is done by DAGCombine, but if the select is introduced by
1190 // type legalization or op legalization, we miss it. Restricting to SETLT
1191 // case for now because that is what signed saturating add/sub need.
1192 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1193 // but we would probably want to swap the true/false values if the condition
1194 // is SETGE/SETLE to avoid an XORI.
1195 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1196 CCVal == ISD::SETLT) {
1197 const APInt &TrueVal = TrueV->getAsAPIntVal();
1198 const APInt &FalseVal = FalseV->getAsAPIntVal();
1199 if (TrueVal - 1 == FalseVal)
1200 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1201 if (TrueVal + 1 == FalseVal)
1202 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1203 }
1204
1205 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1206 // 1 < x ? x : 1 -> 0 < x ? x : 1
1207 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1208 RHS == TrueV && LHS == FalseV) {
1209 LHS = DAG.getConstant(0, DL, VT);
1210 // 0 <u x is the same as x != 0.
1211 if (CCVal == ISD::SETULT) {
1212 std::swap(LHS, RHS);
1213 CCVal = ISD::SETNE;
1214 }
1215 }
1216
1217 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1218 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1219 RHS == FalseV) {
1220 RHS = DAG.getConstant(0, DL, VT);
1221 }
1222
1223 SDValue TargetCC = DAG.getCondCode(CCVal);
1224
1225 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1226 // (select (setcc lhs, rhs, CC), constant, falsev)
1227 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1228 std::swap(TrueV, FalseV);
1229 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1230 }
1231
1232 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1233 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1234}
1235
1236SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1237 SelectionDAG &DAG) const {
1238 SDValue CondV = Op.getOperand(1);
1239 SDLoc DL(Op);
1240 MVT GRLenVT = Subtarget.getGRLenVT();
1241
1242 if (CondV.getOpcode() == ISD::SETCC) {
1243 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1244 SDValue LHS = CondV.getOperand(0);
1245 SDValue RHS = CondV.getOperand(1);
1246 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1247
1248 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1249
1250 SDValue TargetCC = DAG.getCondCode(CCVal);
1251 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1252 Op.getOperand(0), LHS, RHS, TargetCC,
1253 Op.getOperand(2));
1254 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1255 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1256 Op.getOperand(0), CondV, Op.getOperand(2));
1257 }
1258 }
1259
1260 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1261 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1262 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1263}
1264
1265SDValue
1266LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1267 SelectionDAG &DAG) const {
1268 SDLoc DL(Op);
1269 MVT OpVT = Op.getSimpleValueType();
1270
1271 SDValue Vector = DAG.getUNDEF(OpVT);
1272 SDValue Val = Op.getOperand(0);
1273 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1274
1275 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1276}
1277
1278SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1279 SelectionDAG &DAG) const {
1280 EVT ResTy = Op->getValueType(0);
1281 SDValue Src = Op->getOperand(0);
1282 SDLoc DL(Op);
1283
1284 // LoongArchISD::BITREV_8B is not supported on LA32.
1285 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1286 return SDValue();
1287
1288 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1289 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1290 unsigned int NewEltNum = NewVT.getVectorNumElements();
1291
1292 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1293
1295 for (unsigned int i = 0; i < NewEltNum; i++) {
1296 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1297 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1298 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1299 ? (unsigned)LoongArchISD::BITREV_8B
1300 : (unsigned)ISD::BITREVERSE;
1301 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1302 }
1303 SDValue Res =
1304 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1305
1306 switch (ResTy.getSimpleVT().SimpleTy) {
1307 default:
1308 return SDValue();
1309 case MVT::v16i8:
1310 case MVT::v32i8:
1311 return Res;
1312 case MVT::v8i16:
1313 case MVT::v16i16:
1314 case MVT::v4i32:
1315 case MVT::v8i32: {
1317 for (unsigned int i = 0; i < NewEltNum; i++)
1318 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1319 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1320 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1321 }
1322 }
1323}
1324
1325// Widen element type to get a new mask value (if possible).
1326// For example:
1327// shufflevector <4 x i32> %a, <4 x i32> %b,
1328// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1329// is equivalent to:
1330// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1331// can be lowered to:
1332// VPACKOD_D vr0, vr0, vr1
1334 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1335 unsigned EltBits = VT.getScalarSizeInBits();
1336
1337 if (EltBits > 32 || EltBits == 1)
1338 return SDValue();
1339
1340 SmallVector<int, 8> NewMask;
1341 if (widenShuffleMaskElts(Mask, NewMask)) {
1342 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1343 : MVT::getIntegerVT(EltBits * 2);
1344 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1345 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1346 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1347 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1348 return DAG.getBitcast(
1349 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1350 }
1351 }
1352
1353 return SDValue();
1354}
1355
1356/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1357/// instruction.
1358// The funciton matches elements from one of the input vector shuffled to the
1359// left or right with zeroable elements 'shifted in'. It handles both the
1360// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1361// lane.
1362// Mostly copied from X86.
1363static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1364 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1365 int MaskOffset, const APInt &Zeroable) {
1366 int Size = Mask.size();
1367 unsigned SizeInBits = Size * ScalarSizeInBits;
1368
1369 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1370 for (int i = 0; i < Size; i += Scale)
1371 for (int j = 0; j < Shift; ++j)
1372 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1373 return false;
1374
1375 return true;
1376 };
1377
1378 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1379 int Step = 1) {
1380 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1381 if (!(Mask[i] == -1 || Mask[i] == Low))
1382 return false;
1383 return true;
1384 };
1385
1386 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1387 for (int i = 0; i != Size; i += Scale) {
1388 unsigned Pos = Left ? i + Shift : i;
1389 unsigned Low = Left ? i : i + Shift;
1390 unsigned Len = Scale - Shift;
1391 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1392 return -1;
1393 }
1394
1395 int ShiftEltBits = ScalarSizeInBits * Scale;
1396 bool ByteShift = ShiftEltBits > 64;
1397 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1398 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1399 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1400
1401 // Normalize the scale for byte shifts to still produce an i64 element
1402 // type.
1403 Scale = ByteShift ? Scale / 2 : Scale;
1404
1405 // We need to round trip through the appropriate type for the shift.
1406 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1407 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1408 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1409 return (int)ShiftAmt;
1410 };
1411
1412 unsigned MaxWidth = 128;
1413 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1414 for (int Shift = 1; Shift != Scale; ++Shift)
1415 for (bool Left : {true, false})
1416 if (CheckZeros(Shift, Scale, Left)) {
1417 int ShiftAmt = MatchShift(Shift, Scale, Left);
1418 if (0 < ShiftAmt)
1419 return ShiftAmt;
1420 }
1421
1422 // no match
1423 return -1;
1424}
1425
1426/// Lower VECTOR_SHUFFLE as shift (if possible).
1427///
1428/// For example:
1429/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1430/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1431/// is lowered to:
1432/// (VBSLL_V $v0, $v0, 4)
1433///
1434/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1435/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1436/// is lowered to:
1437/// (VSLLI_D $v0, $v0, 32)
1439 MVT VT, SDValue V1, SDValue V2,
1440 SelectionDAG &DAG,
1441 const LoongArchSubtarget &Subtarget,
1442 const APInt &Zeroable) {
1443 int Size = Mask.size();
1444 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1445
1446 MVT ShiftVT;
1447 SDValue V = V1;
1448 unsigned Opcode;
1449
1450 // Try to match shuffle against V1 shift.
1451 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1452 Mask, 0, Zeroable);
1453
1454 // If V1 failed, try to match shuffle against V2 shift.
1455 if (ShiftAmt < 0) {
1456 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1457 Mask, Size, Zeroable);
1458 V = V2;
1459 }
1460
1461 if (ShiftAmt < 0)
1462 return SDValue();
1463
1464 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1465 "Illegal integer vector type");
1466 V = DAG.getBitcast(ShiftVT, V);
1467 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1468 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1469 return DAG.getBitcast(VT, V);
1470}
1471
1472/// Determine whether a range fits a regular pattern of values.
1473/// This function accounts for the possibility of jumping over the End iterator.
1474template <typename ValType>
1475static bool
1477 unsigned CheckStride,
1479 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1480 auto &I = Begin;
1481
1482 while (I != End) {
1483 if (*I != -1 && *I != ExpectedIndex)
1484 return false;
1485 ExpectedIndex += ExpectedIndexStride;
1486
1487 // Incrementing past End is undefined behaviour so we must increment one
1488 // step at a time and check for End at each step.
1489 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1490 ; // Empty loop body.
1491 }
1492 return true;
1493}
1494
1495/// Compute whether each element of a shuffle is zeroable.
1496///
1497/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1499 SDValue V2, APInt &KnownUndef,
1500 APInt &KnownZero) {
1501 int Size = Mask.size();
1502 KnownUndef = KnownZero = APInt::getZero(Size);
1503
1504 V1 = peekThroughBitcasts(V1);
1505 V2 = peekThroughBitcasts(V2);
1506
1507 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1508 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1509
1510 int VectorSizeInBits = V1.getValueSizeInBits();
1511 int ScalarSizeInBits = VectorSizeInBits / Size;
1512 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1513 (void)ScalarSizeInBits;
1514
1515 for (int i = 0; i < Size; ++i) {
1516 int M = Mask[i];
1517 if (M < 0) {
1518 KnownUndef.setBit(i);
1519 continue;
1520 }
1521 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1522 KnownZero.setBit(i);
1523 continue;
1524 }
1525 }
1526}
1527
1528/// Test whether a shuffle mask is equivalent within each sub-lane.
1529///
1530/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1531/// non-trivial to compute in the face of undef lanes. The representation is
1532/// suitable for use with existing 128-bit shuffles as entries from the second
1533/// vector have been remapped to [LaneSize, 2*LaneSize).
1534static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1535 ArrayRef<int> Mask,
1536 SmallVectorImpl<int> &RepeatedMask) {
1537 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1538 RepeatedMask.assign(LaneSize, -1);
1539 int Size = Mask.size();
1540 for (int i = 0; i < Size; ++i) {
1541 assert(Mask[i] == -1 || Mask[i] >= 0);
1542 if (Mask[i] < 0)
1543 continue;
1544 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1545 // This entry crosses lanes, so there is no way to model this shuffle.
1546 return false;
1547
1548 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1549 // Adjust second vector indices to start at LaneSize instead of Size.
1550 int LocalM =
1551 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1552 if (RepeatedMask[i % LaneSize] < 0)
1553 // This is the first non-undef entry in this slot of a 128-bit lane.
1554 RepeatedMask[i % LaneSize] = LocalM;
1555 else if (RepeatedMask[i % LaneSize] != LocalM)
1556 // Found a mismatch with the repeated mask.
1557 return false;
1558 }
1559 return true;
1560}
1561
1562/// Attempts to match vector shuffle as byte rotation.
1564 ArrayRef<int> Mask) {
1565
1566 SDValue Lo, Hi;
1567 SmallVector<int, 16> RepeatedMask;
1568
1569 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1570 return -1;
1571
1572 int NumElts = RepeatedMask.size();
1573 int Rotation = 0;
1574 int Scale = 16 / NumElts;
1575
1576 for (int i = 0; i < NumElts; ++i) {
1577 int M = RepeatedMask[i];
1578 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1579 "Unexpected mask index.");
1580 if (M < 0)
1581 continue;
1582
1583 // Determine where a rotated vector would have started.
1584 int StartIdx = i - (M % NumElts);
1585 if (StartIdx == 0)
1586 return -1;
1587
1588 // If we found the tail of a vector the rotation must be the missing
1589 // front. If we found the head of a vector, it must be how much of the
1590 // head.
1591 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1592
1593 if (Rotation == 0)
1594 Rotation = CandidateRotation;
1595 else if (Rotation != CandidateRotation)
1596 return -1;
1597
1598 // Compute which value this mask is pointing at.
1599 SDValue MaskV = M < NumElts ? V1 : V2;
1600
1601 // Compute which of the two target values this index should be assigned
1602 // to. This reflects whether the high elements are remaining or the low
1603 // elements are remaining.
1604 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1605
1606 // Either set up this value if we've not encountered it before, or check
1607 // that it remains consistent.
1608 if (!TargetV)
1609 TargetV = MaskV;
1610 else if (TargetV != MaskV)
1611 return -1;
1612 }
1613
1614 // Check that we successfully analyzed the mask, and normalize the results.
1615 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1616 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1617 if (!Lo)
1618 Lo = Hi;
1619 else if (!Hi)
1620 Hi = Lo;
1621
1622 V1 = Lo;
1623 V2 = Hi;
1624
1625 return Rotation * Scale;
1626}
1627
1628/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1629///
1630/// For example:
1631/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1632/// <2 x i32> <i32 3, i32 0>
1633/// is lowered to:
1634/// (VBSRL_V $v1, $v1, 8)
1635/// (VBSLL_V $v0, $v0, 8)
1636/// (VOR_V $v0, $V0, $v1)
1637static SDValue
1639 SDValue V1, SDValue V2, SelectionDAG &DAG,
1640 const LoongArchSubtarget &Subtarget) {
1641
1642 SDValue Lo = V1, Hi = V2;
1643 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1644 if (ByteRotation <= 0)
1645 return SDValue();
1646
1647 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1648 Lo = DAG.getBitcast(ByteVT, Lo);
1649 Hi = DAG.getBitcast(ByteVT, Hi);
1650
1651 int LoByteShift = 16 - ByteRotation;
1652 int HiByteShift = ByteRotation;
1653 MVT GRLenVT = Subtarget.getGRLenVT();
1654
1655 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1656 DAG.getConstant(LoByteShift, DL, GRLenVT));
1657 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1658 DAG.getConstant(HiByteShift, DL, GRLenVT));
1659 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1660}
1661
1662/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1663///
1664/// For example:
1665/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1666/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1667/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1668/// is lowered to:
1669/// (VREPLI $v1, 0)
1670/// (VILVL $v0, $v1, $v0)
1672 ArrayRef<int> Mask, MVT VT,
1673 SDValue V1, SDValue V2,
1674 SelectionDAG &DAG,
1675 const APInt &Zeroable) {
1676 int Bits = VT.getSizeInBits();
1677 int EltBits = VT.getScalarSizeInBits();
1678 int NumElements = VT.getVectorNumElements();
1679
1680 if (Zeroable.isAllOnes())
1681 return DAG.getConstant(0, DL, VT);
1682
1683 // Define a helper function to check a particular ext-scale and lower to it if
1684 // valid.
1685 auto Lower = [&](int Scale) -> SDValue {
1686 SDValue InputV;
1687 bool AnyExt = true;
1688 int Offset = 0;
1689 for (int i = 0; i < NumElements; i++) {
1690 int M = Mask[i];
1691 if (M < 0)
1692 continue;
1693 if (i % Scale != 0) {
1694 // Each of the extended elements need to be zeroable.
1695 if (!Zeroable[i])
1696 return SDValue();
1697
1698 AnyExt = false;
1699 continue;
1700 }
1701
1702 // Each of the base elements needs to be consecutive indices into the
1703 // same input vector.
1704 SDValue V = M < NumElements ? V1 : V2;
1705 M = M % NumElements;
1706 if (!InputV) {
1707 InputV = V;
1708 Offset = M - (i / Scale);
1709
1710 // These offset can't be handled
1711 if (Offset % (NumElements / Scale))
1712 return SDValue();
1713 } else if (InputV != V)
1714 return SDValue();
1715
1716 if (M != (Offset + (i / Scale)))
1717 return SDValue(); // Non-consecutive strided elements.
1718 }
1719
1720 // If we fail to find an input, we have a zero-shuffle which should always
1721 // have already been handled.
1722 if (!InputV)
1723 return SDValue();
1724
1725 do {
1726 unsigned VilVLoHi = LoongArchISD::VILVL;
1727 if (Offset >= (NumElements / 2)) {
1728 VilVLoHi = LoongArchISD::VILVH;
1729 Offset -= (NumElements / 2);
1730 }
1731
1732 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1733 SDValue Ext =
1734 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1735 InputV = DAG.getBitcast(InputVT, InputV);
1736 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1737 Scale /= 2;
1738 EltBits *= 2;
1739 NumElements /= 2;
1740 } while (Scale > 1);
1741 return DAG.getBitcast(VT, InputV);
1742 };
1743
1744 // Each iteration, try extending the elements half as much, but into twice as
1745 // many elements.
1746 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1747 NumExtElements *= 2) {
1748 if (SDValue V = Lower(NumElements / NumExtElements))
1749 return V;
1750 }
1751 return SDValue();
1752}
1753
1754/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1755///
1756/// VREPLVEI performs vector broadcast based on an element specified by an
1757/// integer immediate, with its mask being similar to:
1758/// <x, x, x, ...>
1759/// where x is any valid index.
1760///
1761/// When undef's appear in the mask they are treated as if they were whatever
1762/// value is necessary in order to fit the above form.
1763static SDValue
1765 SDValue V1, SelectionDAG &DAG,
1766 const LoongArchSubtarget &Subtarget) {
1767 int SplatIndex = -1;
1768 for (const auto &M : Mask) {
1769 if (M != -1) {
1770 SplatIndex = M;
1771 break;
1772 }
1773 }
1774
1775 if (SplatIndex == -1)
1776 return DAG.getUNDEF(VT);
1777
1778 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1779 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1780 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1781 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1782 }
1783
1784 return SDValue();
1785}
1786
1787/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1788///
1789/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1790/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1791///
1792/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1793/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1794/// When undef's appear they are treated as if they were whatever value is
1795/// necessary in order to fit the above forms.
1796///
1797/// For example:
1798/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1799/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1800/// i32 7, i32 6, i32 5, i32 4>
1801/// is lowered to:
1802/// (VSHUF4I_H $v0, $v1, 27)
1803/// where the 27 comes from:
1804/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1805static SDValue
1807 SDValue V1, SDValue V2, SelectionDAG &DAG,
1808 const LoongArchSubtarget &Subtarget) {
1809
1810 unsigned SubVecSize = 4;
1811 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1812 SubVecSize = 2;
1813
1814 int SubMask[4] = {-1, -1, -1, -1};
1815 for (unsigned i = 0; i < SubVecSize; ++i) {
1816 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1817 int M = Mask[j];
1818
1819 // Convert from vector index to 4-element subvector index
1820 // If an index refers to an element outside of the subvector then give up
1821 if (M != -1) {
1822 M -= 4 * (j / SubVecSize);
1823 if (M < 0 || M >= 4)
1824 return SDValue();
1825 }
1826
1827 // If the mask has an undef, replace it with the current index.
1828 // Note that it might still be undef if the current index is also undef
1829 if (SubMask[i] == -1)
1830 SubMask[i] = M;
1831 // Check that non-undef values are the same as in the mask. If they
1832 // aren't then give up
1833 else if (M != -1 && M != SubMask[i])
1834 return SDValue();
1835 }
1836 }
1837
1838 // Calculate the immediate. Replace any remaining undefs with zero
1839 int Imm = 0;
1840 for (int i = SubVecSize - 1; i >= 0; --i) {
1841 int M = SubMask[i];
1842
1843 if (M == -1)
1844 M = 0;
1845
1846 Imm <<= 2;
1847 Imm |= M & 0x3;
1848 }
1849
1850 MVT GRLenVT = Subtarget.getGRLenVT();
1851
1852 // Return vshuf4i.d
1853 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1854 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1855 DAG.getConstant(Imm, DL, GRLenVT));
1856
1857 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1858 DAG.getConstant(Imm, DL, GRLenVT));
1859}
1860
1861/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1862///
1863/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1864/// reverse whose mask likes:
1865/// <7, 6, 5, 4, 3, 2, 1, 0>
1866///
1867/// When undef's appear in the mask they are treated as if they were whatever
1868/// value is necessary in order to fit the above forms.
1869static SDValue
1871 SDValue V1, SelectionDAG &DAG,
1872 const LoongArchSubtarget &Subtarget) {
1873 // Only vectors with i8/i16 elements which cannot match other patterns
1874 // directly needs to do this.
1875 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1876 VT != MVT::v16i16)
1877 return SDValue();
1878
1879 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1880 return SDValue();
1881
1882 int WidenNumElts = VT.getVectorNumElements() / 4;
1883 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1884 for (int i = 0; i < WidenNumElts; ++i)
1885 WidenMask[i] = WidenNumElts - 1 - i;
1886
1887 MVT WidenVT = MVT::getVectorVT(
1888 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1889 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1890 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1891 DAG.getUNDEF(WidenVT), WidenMask);
1892
1893 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1894 DAG.getBitcast(VT, WidenRev),
1895 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1896}
1897
1898/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1899///
1900/// VPACKEV interleaves the even elements from each vector.
1901///
1902/// It is possible to lower into VPACKEV when the mask consists of two of the
1903/// following forms interleaved:
1904/// <0, 2, 4, ...>
1905/// <n, n+2, n+4, ...>
1906/// where n is the number of elements in the vector.
1907/// For example:
1908/// <0, 0, 2, 2, 4, 4, ...>
1909/// <0, n, 2, n+2, 4, n+4, ...>
1910///
1911/// When undef's appear in the mask they are treated as if they were whatever
1912/// value is necessary in order to fit the above forms.
1914 MVT VT, SDValue V1, SDValue V2,
1915 SelectionDAG &DAG) {
1916
1917 const auto &Begin = Mask.begin();
1918 const auto &End = Mask.end();
1919 SDValue OriV1 = V1, OriV2 = V2;
1920
1921 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1922 V1 = OriV1;
1923 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1924 V1 = OriV2;
1925 else
1926 return SDValue();
1927
1928 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1929 V2 = OriV1;
1930 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1931 V2 = OriV2;
1932 else
1933 return SDValue();
1934
1935 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1936}
1937
1938/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1939///
1940/// VPACKOD interleaves the odd elements from each vector.
1941///
1942/// It is possible to lower into VPACKOD when the mask consists of two of the
1943/// following forms interleaved:
1944/// <1, 3, 5, ...>
1945/// <n+1, n+3, n+5, ...>
1946/// where n is the number of elements in the vector.
1947/// For example:
1948/// <1, 1, 3, 3, 5, 5, ...>
1949/// <1, n+1, 3, n+3, 5, n+5, ...>
1950///
1951/// When undef's appear in the mask they are treated as if they were whatever
1952/// value is necessary in order to fit the above forms.
1954 MVT VT, SDValue V1, SDValue V2,
1955 SelectionDAG &DAG) {
1956
1957 const auto &Begin = Mask.begin();
1958 const auto &End = Mask.end();
1959 SDValue OriV1 = V1, OriV2 = V2;
1960
1961 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1962 V1 = OriV1;
1963 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1964 V1 = OriV2;
1965 else
1966 return SDValue();
1967
1968 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1969 V2 = OriV1;
1970 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1971 V2 = OriV2;
1972 else
1973 return SDValue();
1974
1975 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1976}
1977
1978/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1979///
1980/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1981/// of each vector.
1982///
1983/// It is possible to lower into VILVH when the mask consists of two of the
1984/// following forms interleaved:
1985/// <x, x+1, x+2, ...>
1986/// <n+x, n+x+1, n+x+2, ...>
1987/// where n is the number of elements in the vector and x is half n.
1988/// For example:
1989/// <x, x, x+1, x+1, x+2, x+2, ...>
1990/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1991///
1992/// When undef's appear in the mask they are treated as if they were whatever
1993/// value is necessary in order to fit the above forms.
1995 MVT VT, SDValue V1, SDValue V2,
1996 SelectionDAG &DAG) {
1997
1998 const auto &Begin = Mask.begin();
1999 const auto &End = Mask.end();
2000 unsigned HalfSize = Mask.size() / 2;
2001 SDValue OriV1 = V1, OriV2 = V2;
2002
2003 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2004 V1 = OriV1;
2005 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
2006 V1 = OriV2;
2007 else
2008 return SDValue();
2009
2010 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2011 V2 = OriV1;
2012 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
2013 1))
2014 V2 = OriV2;
2015 else
2016 return SDValue();
2017
2018 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2019}
2020
2021/// Lower VECTOR_SHUFFLE into VILVL (if possible).
2022///
2023/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
2024/// of each vector.
2025///
2026/// It is possible to lower into VILVL when the mask consists of two of the
2027/// following forms interleaved:
2028/// <0, 1, 2, ...>
2029/// <n, n+1, n+2, ...>
2030/// where n is the number of elements in the vector.
2031/// For example:
2032/// <0, 0, 1, 1, 2, 2, ...>
2033/// <0, n, 1, n+1, 2, n+2, ...>
2034///
2035/// When undef's appear in the mask they are treated as if they were whatever
2036/// value is necessary in order to fit the above forms.
2038 MVT VT, SDValue V1, SDValue V2,
2039 SelectionDAG &DAG) {
2040
2041 const auto &Begin = Mask.begin();
2042 const auto &End = Mask.end();
2043 SDValue OriV1 = V1, OriV2 = V2;
2044
2045 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2046 V1 = OriV1;
2047 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2048 V1 = OriV2;
2049 else
2050 return SDValue();
2051
2052 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2053 V2 = OriV1;
2054 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2055 V2 = OriV2;
2056 else
2057 return SDValue();
2058
2059 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2060}
2061
2062/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2063///
2064/// VPICKEV copies the even elements of each vector into the result vector.
2065///
2066/// It is possible to lower into VPICKEV when the mask consists of two of the
2067/// following forms concatenated:
2068/// <0, 2, 4, ...>
2069/// <n, n+2, n+4, ...>
2070/// where n is the number of elements in the vector.
2071/// For example:
2072/// <0, 2, 4, ..., 0, 2, 4, ...>
2073/// <0, 2, 4, ..., n, n+2, n+4, ...>
2074///
2075/// When undef's appear in the mask they are treated as if they were whatever
2076/// value is necessary in order to fit the above forms.
2078 MVT VT, SDValue V1, SDValue V2,
2079 SelectionDAG &DAG) {
2080
2081 const auto &Begin = Mask.begin();
2082 const auto &Mid = Mask.begin() + Mask.size() / 2;
2083 const auto &End = Mask.end();
2084 SDValue OriV1 = V1, OriV2 = V2;
2085
2086 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2087 V1 = OriV1;
2088 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2089 V1 = OriV2;
2090 else
2091 return SDValue();
2092
2093 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2094 V2 = OriV1;
2095 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2096 V2 = OriV2;
2097
2098 else
2099 return SDValue();
2100
2101 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2102}
2103
2104/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2105///
2106/// VPICKOD copies the odd elements of each vector into the result vector.
2107///
2108/// It is possible to lower into VPICKOD when the mask consists of two of the
2109/// following forms concatenated:
2110/// <1, 3, 5, ...>
2111/// <n+1, n+3, n+5, ...>
2112/// where n is the number of elements in the vector.
2113/// For example:
2114/// <1, 3, 5, ..., 1, 3, 5, ...>
2115/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2116///
2117/// When undef's appear in the mask they are treated as if they were whatever
2118/// value is necessary in order to fit the above forms.
2120 MVT VT, SDValue V1, SDValue V2,
2121 SelectionDAG &DAG) {
2122
2123 const auto &Begin = Mask.begin();
2124 const auto &Mid = Mask.begin() + Mask.size() / 2;
2125 const auto &End = Mask.end();
2126 SDValue OriV1 = V1, OriV2 = V2;
2127
2128 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2129 V1 = OriV1;
2130 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2131 V1 = OriV2;
2132 else
2133 return SDValue();
2134
2135 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2136 V2 = OriV1;
2137 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2138 V2 = OriV2;
2139 else
2140 return SDValue();
2141
2142 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2143}
2144
2145/// Lower VECTOR_SHUFFLE into VSHUF.
2146///
2147/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2148/// adding it as an operand to the resulting VSHUF.
2150 MVT VT, SDValue V1, SDValue V2,
2151 SelectionDAG &DAG,
2152 const LoongArchSubtarget &Subtarget) {
2153
2155 for (auto M : Mask)
2156 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2157
2158 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2159 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2160
2161 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2162 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2163 // VSHF concatenates the vectors in a bitwise fashion:
2164 // <0b00, 0b01> + <0b10, 0b11> ->
2165 // 0b0100 + 0b1110 -> 0b01001110
2166 // <0b10, 0b11, 0b00, 0b01>
2167 // We must therefore swap the operands to get the correct result.
2168 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2169}
2170
2171/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2172///
2173/// This routine breaks down the specific type of 128-bit shuffle and
2174/// dispatches to the lowering routines accordingly.
2176 SDValue V1, SDValue V2, SelectionDAG &DAG,
2177 const LoongArchSubtarget &Subtarget) {
2178 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2179 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2180 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2181 "Vector type is unsupported for lsx!");
2183 "Two operands have different types!");
2184 assert(VT.getVectorNumElements() == Mask.size() &&
2185 "Unexpected mask size for shuffle!");
2186 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2187
2188 APInt KnownUndef, KnownZero;
2189 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2190 APInt Zeroable = KnownUndef | KnownZero;
2191
2192 SDValue Result;
2193 // TODO: Add more comparison patterns.
2194 if (V2.isUndef()) {
2195 if ((Result =
2196 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2197 return Result;
2198 if ((Result =
2199 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2200 return Result;
2201 if ((Result =
2202 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2203 return Result;
2204
2205 // TODO: This comment may be enabled in the future to better match the
2206 // pattern for instruction selection.
2207 /* V2 = V1; */
2208 }
2209
2210 // It is recommended not to change the pattern comparison order for better
2211 // performance.
2212 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2213 return Result;
2214 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2215 return Result;
2216 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2217 return Result;
2218 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2219 return Result;
2220 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2221 return Result;
2222 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2223 return Result;
2224 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2225 (Result =
2226 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2227 return Result;
2228 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2229 Zeroable)))
2230 return Result;
2231 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2232 Zeroable)))
2233 return Result;
2234 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2235 Subtarget)))
2236 return Result;
2237 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2238 return NewShuffle;
2239 if ((Result =
2240 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2241 return Result;
2242 return SDValue();
2243}
2244
2245/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2246///
2247/// It is a XVREPLVEI when the mask is:
2248/// <x, x, x, ..., x+n, x+n, x+n, ...>
2249/// where the number of x is equal to n and n is half the length of vector.
2250///
2251/// When undef's appear in the mask they are treated as if they were whatever
2252/// value is necessary in order to fit the above form.
2253static SDValue
2255 SDValue V1, SelectionDAG &DAG,
2256 const LoongArchSubtarget &Subtarget) {
2257 int SplatIndex = -1;
2258 for (const auto &M : Mask) {
2259 if (M != -1) {
2260 SplatIndex = M;
2261 break;
2262 }
2263 }
2264
2265 if (SplatIndex == -1)
2266 return DAG.getUNDEF(VT);
2267
2268 const auto &Begin = Mask.begin();
2269 const auto &End = Mask.end();
2270 int HalfSize = Mask.size() / 2;
2271
2272 if (SplatIndex >= HalfSize)
2273 return SDValue();
2274
2275 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2276 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2277 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2278 0)) {
2279 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2280 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2281 }
2282
2283 return SDValue();
2284}
2285
2286/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2287static SDValue
2289 SDValue V1, SDValue V2, SelectionDAG &DAG,
2290 const LoongArchSubtarget &Subtarget) {
2291 // When the size is less than or equal to 4, lower cost instructions may be
2292 // used.
2293 if (Mask.size() <= 4)
2294 return SDValue();
2295 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2296}
2297
2298/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2299static SDValue
2301 SDValue V1, SelectionDAG &DAG,
2302 const LoongArchSubtarget &Subtarget) {
2303 // Only consider XVPERMI_D.
2304 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2305 return SDValue();
2306
2307 unsigned MaskImm = 0;
2308 for (unsigned i = 0; i < Mask.size(); ++i) {
2309 if (Mask[i] == -1)
2310 continue;
2311 MaskImm |= Mask[i] << (i * 2);
2312 }
2313
2314 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2315 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2316}
2317
2318/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2320 MVT VT, SDValue V1, SelectionDAG &DAG,
2321 const LoongArchSubtarget &Subtarget) {
2322 // LoongArch LASX only have XVPERM_W.
2323 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2324 return SDValue();
2325
2326 unsigned NumElts = VT.getVectorNumElements();
2327 unsigned HalfSize = NumElts / 2;
2328 bool FrontLo = true, FrontHi = true;
2329 bool BackLo = true, BackHi = true;
2330
2331 auto inRange = [](int val, int low, int high) {
2332 return (val == -1) || (val >= low && val < high);
2333 };
2334
2335 for (unsigned i = 0; i < HalfSize; ++i) {
2336 int Fronti = Mask[i];
2337 int Backi = Mask[i + HalfSize];
2338
2339 FrontLo &= inRange(Fronti, 0, HalfSize);
2340 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2341 BackLo &= inRange(Backi, 0, HalfSize);
2342 BackHi &= inRange(Backi, HalfSize, NumElts);
2343 }
2344
2345 // If both the lower and upper 128-bit parts access only one half of the
2346 // vector (either lower or upper), avoid using xvperm.w. The latency of
2347 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2348 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2349 return SDValue();
2350
2352 MVT GRLenVT = Subtarget.getGRLenVT();
2353 for (unsigned i = 0; i < NumElts; ++i)
2354 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2355 : DAG.getConstant(Mask[i], DL, GRLenVT));
2356 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2357
2358 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2359}
2360
2361/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2363 MVT VT, SDValue V1, SDValue V2,
2364 SelectionDAG &DAG) {
2365 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2366}
2367
2368/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2370 MVT VT, SDValue V1, SDValue V2,
2371 SelectionDAG &DAG) {
2372 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2373}
2374
2375/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2377 MVT VT, SDValue V1, SDValue V2,
2378 SelectionDAG &DAG) {
2379
2380 const auto &Begin = Mask.begin();
2381 const auto &End = Mask.end();
2382 unsigned HalfSize = Mask.size() / 2;
2383 unsigned LeftSize = HalfSize / 2;
2384 SDValue OriV1 = V1, OriV2 = V2;
2385
2386 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2387 1) &&
2388 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2389 V1 = OriV1;
2390 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2391 Mask.size() + HalfSize - LeftSize, 1) &&
2392 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2393 Mask.size() + HalfSize + LeftSize, 1))
2394 V1 = OriV2;
2395 else
2396 return SDValue();
2397
2398 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2399 1) &&
2400 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2401 1))
2402 V2 = OriV1;
2403 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2404 Mask.size() + HalfSize - LeftSize, 1) &&
2405 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2406 Mask.size() + HalfSize + LeftSize, 1))
2407 V2 = OriV2;
2408 else
2409 return SDValue();
2410
2411 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2412}
2413
2414/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2416 MVT VT, SDValue V1, SDValue V2,
2417 SelectionDAG &DAG) {
2418
2419 const auto &Begin = Mask.begin();
2420 const auto &End = Mask.end();
2421 unsigned HalfSize = Mask.size() / 2;
2422 SDValue OriV1 = V1, OriV2 = V2;
2423
2424 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2425 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2426 V1 = OriV1;
2427 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2428 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2429 Mask.size() + HalfSize, 1))
2430 V1 = OriV2;
2431 else
2432 return SDValue();
2433
2434 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2435 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2436 V2 = OriV1;
2437 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2438 1) &&
2439 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2440 Mask.size() + HalfSize, 1))
2441 V2 = OriV2;
2442 else
2443 return SDValue();
2444
2445 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2446}
2447
2448/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2450 MVT VT, SDValue V1, SDValue V2,
2451 SelectionDAG &DAG) {
2452
2453 const auto &Begin = Mask.begin();
2454 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2455 const auto &Mid = Mask.begin() + Mask.size() / 2;
2456 const auto &RightMid = Mask.end() - Mask.size() / 4;
2457 const auto &End = Mask.end();
2458 unsigned HalfSize = Mask.size() / 2;
2459 SDValue OriV1 = V1, OriV2 = V2;
2460
2461 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2462 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2463 V1 = OriV1;
2464 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2465 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2466 V1 = OriV2;
2467 else
2468 return SDValue();
2469
2470 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2471 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2472 V2 = OriV1;
2473 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2474 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2475 V2 = OriV2;
2476
2477 else
2478 return SDValue();
2479
2480 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2481}
2482
2483/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2485 MVT VT, SDValue V1, SDValue V2,
2486 SelectionDAG &DAG) {
2487
2488 const auto &Begin = Mask.begin();
2489 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2490 const auto &Mid = Mask.begin() + Mask.size() / 2;
2491 const auto &RightMid = Mask.end() - Mask.size() / 4;
2492 const auto &End = Mask.end();
2493 unsigned HalfSize = Mask.size() / 2;
2494 SDValue OriV1 = V1, OriV2 = V2;
2495
2496 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2497 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2498 V1 = OriV1;
2499 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2500 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2501 2))
2502 V1 = OriV2;
2503 else
2504 return SDValue();
2505
2506 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2507 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2508 V2 = OriV1;
2509 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2510 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2511 2))
2512 V2 = OriV2;
2513 else
2514 return SDValue();
2515
2516 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2517}
2518
2519/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2520static SDValue
2522 SDValue V1, SDValue V2, SelectionDAG &DAG,
2523 const LoongArchSubtarget &Subtarget) {
2524 // LoongArch LASX only supports xvinsve0.{w/d}.
2525 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2526 VT != MVT::v4f64)
2527 return SDValue();
2528
2529 MVT GRLenVT = Subtarget.getGRLenVT();
2530 int MaskSize = Mask.size();
2531 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2532
2533 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2534 // all other elements are either 'Base + i' or undef (-1). On success, return
2535 // the index of the replaced element. Otherwise, just return -1.
2536 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2537 int Idx = -1;
2538 for (int i = 0; i < MaskSize; ++i) {
2539 if (Mask[i] == Base + i || Mask[i] == -1)
2540 continue;
2541 if (Mask[i] != Replaced)
2542 return -1;
2543 if (Idx == -1)
2544 Idx = i;
2545 else
2546 return -1;
2547 }
2548 return Idx;
2549 };
2550
2551 // Case 1: the lowest element of V2 replaces one element in V1.
2552 int Idx = checkReplaceOne(0, MaskSize);
2553 if (Idx != -1)
2554 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2555 DAG.getConstant(Idx, DL, GRLenVT));
2556
2557 // Case 2: the lowest element of V1 replaces one element in V2.
2558 Idx = checkReplaceOne(MaskSize, 0);
2559 if (Idx != -1)
2560 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2561 DAG.getConstant(Idx, DL, GRLenVT));
2562
2563 return SDValue();
2564}
2565
2566/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2568 MVT VT, SDValue V1, SDValue V2,
2569 SelectionDAG &DAG) {
2570
2571 int MaskSize = Mask.size();
2572 int HalfSize = Mask.size() / 2;
2573 const auto &Begin = Mask.begin();
2574 const auto &Mid = Mask.begin() + HalfSize;
2575 const auto &End = Mask.end();
2576
2577 // VECTOR_SHUFFLE concatenates the vectors:
2578 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2579 // shuffling ->
2580 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2581 //
2582 // XVSHUF concatenates the vectors:
2583 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2584 // shuffling ->
2585 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2586 SmallVector<SDValue, 8> MaskAlloc;
2587 for (auto it = Begin; it < Mid; it++) {
2588 if (*it < 0) // UNDEF
2589 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2590 else if ((*it >= 0 && *it < HalfSize) ||
2591 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2592 int M = *it < HalfSize ? *it : *it - HalfSize;
2593 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2594 } else
2595 return SDValue();
2596 }
2597 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2598
2599 for (auto it = Mid; it < End; it++) {
2600 if (*it < 0) // UNDEF
2601 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2602 else if ((*it >= HalfSize && *it < MaskSize) ||
2603 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2604 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2605 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2606 } else
2607 return SDValue();
2608 }
2609 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2610
2611 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2612 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2613 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2614}
2615
2616/// Shuffle vectors by lane to generate more optimized instructions.
2617/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2618///
2619/// Therefore, except for the following four cases, other cases are regarded
2620/// as cross-lane shuffles, where optimization is relatively limited.
2621///
2622/// - Shuffle high, low lanes of two inputs vector
2623/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2624/// - Shuffle low, high lanes of two inputs vector
2625/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2626/// - Shuffle low, low lanes of two inputs vector
2627/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2628/// - Shuffle high, high lanes of two inputs vector
2629/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2630///
2631/// The first case is the closest to LoongArch instructions and the other
2632/// cases need to be converted to it for processing.
2633///
2634/// This function will return true for the last three cases above and will
2635/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2636/// cross-lane shuffle cases.
2638 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2639 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2640
2641 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2642
2643 int MaskSize = Mask.size();
2644 int HalfSize = Mask.size() / 2;
2645 MVT GRLenVT = Subtarget.getGRLenVT();
2646
2647 HalfMaskType preMask = None, postMask = None;
2648
2649 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2650 return M < 0 || (M >= 0 && M < HalfSize) ||
2651 (M >= MaskSize && M < MaskSize + HalfSize);
2652 }))
2653 preMask = HighLaneTy;
2654 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2655 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2656 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2657 }))
2658 preMask = LowLaneTy;
2659
2660 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2661 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2662 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2663 }))
2664 postMask = LowLaneTy;
2665 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2666 return M < 0 || (M >= 0 && M < HalfSize) ||
2667 (M >= MaskSize && M < MaskSize + HalfSize);
2668 }))
2669 postMask = HighLaneTy;
2670
2671 // The pre-half of mask is high lane type, and the post-half of mask
2672 // is low lane type, which is closest to the LoongArch instructions.
2673 //
2674 // Note: In the LoongArch architecture, the high lane of mask corresponds
2675 // to the lower 128-bit of vector register, and the low lane of mask
2676 // corresponds the higher 128-bit of vector register.
2677 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2678 return false;
2679 }
2680 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2681 V1 = DAG.getBitcast(MVT::v4i64, V1);
2682 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2683 DAG.getConstant(0b01001110, DL, GRLenVT));
2684 V1 = DAG.getBitcast(VT, V1);
2685
2686 if (!V2.isUndef()) {
2687 V2 = DAG.getBitcast(MVT::v4i64, V2);
2688 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2689 DAG.getConstant(0b01001110, DL, GRLenVT));
2690 V2 = DAG.getBitcast(VT, V2);
2691 }
2692
2693 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2694 *it = *it < 0 ? *it : *it - HalfSize;
2695 }
2696 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2697 *it = *it < 0 ? *it : *it + HalfSize;
2698 }
2699 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2700 V1 = DAG.getBitcast(MVT::v4i64, V1);
2701 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2702 DAG.getConstant(0b11101110, DL, GRLenVT));
2703 V1 = DAG.getBitcast(VT, V1);
2704
2705 if (!V2.isUndef()) {
2706 V2 = DAG.getBitcast(MVT::v4i64, V2);
2707 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2708 DAG.getConstant(0b11101110, DL, GRLenVT));
2709 V2 = DAG.getBitcast(VT, V2);
2710 }
2711
2712 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2713 *it = *it < 0 ? *it : *it - HalfSize;
2714 }
2715 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2716 V1 = DAG.getBitcast(MVT::v4i64, V1);
2717 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2718 DAG.getConstant(0b01000100, DL, GRLenVT));
2719 V1 = DAG.getBitcast(VT, V1);
2720
2721 if (!V2.isUndef()) {
2722 V2 = DAG.getBitcast(MVT::v4i64, V2);
2723 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2724 DAG.getConstant(0b01000100, DL, GRLenVT));
2725 V2 = DAG.getBitcast(VT, V2);
2726 }
2727
2728 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2729 *it = *it < 0 ? *it : *it + HalfSize;
2730 }
2731 } else { // cross-lane
2732 return false;
2733 }
2734
2735 return true;
2736}
2737
2738/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2739/// Only for 256-bit vector.
2740///
2741/// For example:
2742/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2743/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2744/// is lowerded to:
2745/// (XVPERMI $xr2, $xr0, 78)
2746/// (XVSHUF $xr1, $xr2, $xr0)
2747/// (XVORI $xr0, $xr1, 0)
2749 ArrayRef<int> Mask,
2750 MVT VT, SDValue V1,
2751 SDValue V2,
2752 SelectionDAG &DAG) {
2753 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2754 int Size = Mask.size();
2755 int LaneSize = Size / 2;
2756
2757 bool LaneCrossing[2] = {false, false};
2758 for (int i = 0; i < Size; ++i)
2759 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2760 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2761
2762 // Ensure that all lanes ared involved.
2763 if (!LaneCrossing[0] && !LaneCrossing[1])
2764 return SDValue();
2765
2766 SmallVector<int> InLaneMask;
2767 InLaneMask.assign(Mask.begin(), Mask.end());
2768 for (int i = 0; i < Size; ++i) {
2769 int &M = InLaneMask[i];
2770 if (M < 0)
2771 continue;
2772 if (((M % Size) / LaneSize) != (i / LaneSize))
2773 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2774 }
2775
2776 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2777 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2778 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2779 Flipped = DAG.getBitcast(VT, Flipped);
2780 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2781}
2782
2783/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2784///
2785/// This routine breaks down the specific type of 256-bit shuffle and
2786/// dispatches to the lowering routines accordingly.
2788 SDValue V1, SDValue V2, SelectionDAG &DAG,
2789 const LoongArchSubtarget &Subtarget) {
2790 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2791 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2792 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2793 "Vector type is unsupported for lasx!");
2795 "Two operands have different types!");
2796 assert(VT.getVectorNumElements() == Mask.size() &&
2797 "Unexpected mask size for shuffle!");
2798 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2799 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2800
2801 APInt KnownUndef, KnownZero;
2802 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2803 APInt Zeroable = KnownUndef | KnownZero;
2804
2805 SDValue Result;
2806 // TODO: Add more comparison patterns.
2807 if (V2.isUndef()) {
2808 if ((Result =
2809 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2810 return Result;
2811 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2812 Subtarget)))
2813 return Result;
2814 // Try to widen vectors to gain more optimization opportunities.
2815 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2816 return NewShuffle;
2817 if ((Result =
2818 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2819 return Result;
2820 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2821 return Result;
2822 if ((Result =
2823 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2824 return Result;
2825
2826 // TODO: This comment may be enabled in the future to better match the
2827 // pattern for instruction selection.
2828 /* V2 = V1; */
2829 }
2830
2831 // It is recommended not to change the pattern comparison order for better
2832 // performance.
2833 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2834 return Result;
2835 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2836 return Result;
2837 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2838 return Result;
2839 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2840 return Result;
2841 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2842 return Result;
2843 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2844 return Result;
2845 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2846 Zeroable)))
2847 return Result;
2848 if ((Result =
2849 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2850 return Result;
2851 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2852 Subtarget)))
2853 return Result;
2854
2855 // canonicalize non cross-lane shuffle vector
2856 SmallVector<int> NewMask(Mask);
2857 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2858 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2859
2860 // FIXME: Handling the remaining cases earlier can degrade performance
2861 // in some situations. Further analysis is required to enable more
2862 // effective optimizations.
2863 if (V2.isUndef()) {
2864 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2865 V1, V2, DAG)))
2866 return Result;
2867 }
2868
2869 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2870 return NewShuffle;
2871 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2872 return Result;
2873
2874 return SDValue();
2875}
2876
2877SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2878 SelectionDAG &DAG) const {
2879 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2880 ArrayRef<int> OrigMask = SVOp->getMask();
2881 SDValue V1 = Op.getOperand(0);
2882 SDValue V2 = Op.getOperand(1);
2883 MVT VT = Op.getSimpleValueType();
2884 int NumElements = VT.getVectorNumElements();
2885 SDLoc DL(Op);
2886
2887 bool V1IsUndef = V1.isUndef();
2888 bool V2IsUndef = V2.isUndef();
2889 if (V1IsUndef && V2IsUndef)
2890 return DAG.getUNDEF(VT);
2891
2892 // When we create a shuffle node we put the UNDEF node to second operand,
2893 // but in some cases the first operand may be transformed to UNDEF.
2894 // In this case we should just commute the node.
2895 if (V1IsUndef)
2896 return DAG.getCommutedVectorShuffle(*SVOp);
2897
2898 // Check for non-undef masks pointing at an undef vector and make the masks
2899 // undef as well. This makes it easier to match the shuffle based solely on
2900 // the mask.
2901 if (V2IsUndef &&
2902 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2903 SmallVector<int, 8> NewMask(OrigMask);
2904 for (int &M : NewMask)
2905 if (M >= NumElements)
2906 M = -1;
2907 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2908 }
2909
2910 // Check for illegal shuffle mask element index values.
2911 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2912 (void)MaskUpperLimit;
2913 assert(llvm::all_of(OrigMask,
2914 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2915 "Out of bounds shuffle index");
2916
2917 // For each vector width, delegate to a specialized lowering routine.
2918 if (VT.is128BitVector())
2919 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2920
2921 if (VT.is256BitVector())
2922 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2923
2924 return SDValue();
2925}
2926
2927SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2928 SelectionDAG &DAG) const {
2929 // Custom lower to ensure the libcall return is passed in an FPR on hard
2930 // float ABIs.
2931 SDLoc DL(Op);
2932 MakeLibCallOptions CallOptions;
2933 SDValue Op0 = Op.getOperand(0);
2934 SDValue Chain = SDValue();
2935 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2936 SDValue Res;
2937 std::tie(Res, Chain) =
2938 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2939 if (Subtarget.is64Bit())
2940 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2941 return DAG.getBitcast(MVT::i32, Res);
2942}
2943
2944SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2945 SelectionDAG &DAG) const {
2946 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2947 // float ABIs.
2948 SDLoc DL(Op);
2949 MakeLibCallOptions CallOptions;
2950 SDValue Op0 = Op.getOperand(0);
2951 SDValue Chain = SDValue();
2952 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2953 DL, MVT::f32, Op0)
2954 : DAG.getBitcast(MVT::f32, Op0);
2955 SDValue Res;
2956 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2957 CallOptions, DL, Chain);
2958 return Res;
2959}
2960
2961SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2962 SelectionDAG &DAG) const {
2963 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2964 SDLoc DL(Op);
2965 MakeLibCallOptions CallOptions;
2966 RTLIB::Libcall LC =
2967 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2968 SDValue Res =
2969 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2970 if (Subtarget.is64Bit())
2971 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2972 return DAG.getBitcast(MVT::i32, Res);
2973}
2974
2975SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2976 SelectionDAG &DAG) const {
2977 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2978 MVT VT = Op.getSimpleValueType();
2979 SDLoc DL(Op);
2980 Op = DAG.getNode(
2981 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2982 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2983 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2984 DL, MVT::f32, Op)
2985 : DAG.getBitcast(MVT::f32, Op);
2986 if (VT != MVT::f32)
2987 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2988 return Res;
2989}
2990
2991// Lower BUILD_VECTOR as broadcast load (if possible).
2992// For example:
2993// %a = load i8, ptr %ptr
2994// %b = build_vector %a, %a, %a, %a
2995// is lowered to :
2996// (VLDREPL_B $a0, 0)
2998 const SDLoc &DL,
2999 SelectionDAG &DAG) {
3000 MVT VT = BVOp->getSimpleValueType(0);
3001 int NumOps = BVOp->getNumOperands();
3002
3003 assert((VT.is128BitVector() || VT.is256BitVector()) &&
3004 "Unsupported vector type for broadcast.");
3005
3006 SDValue IdentitySrc;
3007 bool IsIdeneity = true;
3008
3009 for (int i = 0; i != NumOps; i++) {
3010 SDValue Op = BVOp->getOperand(i);
3011 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
3012 IsIdeneity = false;
3013 break;
3014 }
3015 IdentitySrc = BVOp->getOperand(0);
3016 }
3017
3018 // make sure that this load is valid and only has one user.
3019 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
3020 return SDValue();
3021
3022 auto *LN = cast<LoadSDNode>(IdentitySrc);
3023 auto ExtType = LN->getExtensionType();
3024
3025 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3026 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3027 // Indexed loads and stores are not supported on LoongArch.
3028 assert(LN->isUnindexed() && "Unexpected indexed load.");
3029
3030 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3031 // The offset operand of unindexed load is always undefined, so there is
3032 // no need to pass it to VLDREPL.
3033 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3034 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3035 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3036 return BCast;
3037 }
3038 return SDValue();
3039}
3040
3041// Sequentially insert elements from Ops into Vector, from low to high indices.
3042// Note: Ops can have fewer elements than Vector.
3044 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3045 EVT ResTy) {
3046 assert(Ops.size() <= ResTy.getVectorNumElements());
3047
3048 SDValue Op0 = Ops[0];
3049 if (!Op0.isUndef())
3050 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3051 for (unsigned i = 1; i < Ops.size(); ++i) {
3052 SDValue Opi = Ops[i];
3053 if (Opi.isUndef())
3054 continue;
3055 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3056 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3057 }
3058}
3059
3060// Build a ResTy subvector from Node, taking NumElts elements starting at index
3061// 'first'.
3063 SelectionDAG &DAG, SDLoc DL,
3064 const LoongArchSubtarget &Subtarget,
3065 EVT ResTy, unsigned first) {
3066 unsigned NumElts = ResTy.getVectorNumElements();
3067
3068 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3069
3070 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3071 Node->op_begin() + first + NumElts);
3072 SDValue Vector = DAG.getUNDEF(ResTy);
3073 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3074 return Vector;
3075}
3076
3077SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3078 SelectionDAG &DAG) const {
3079 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3080 MVT VT = Node->getSimpleValueType(0);
3081 EVT ResTy = Op->getValueType(0);
3082 unsigned NumElts = ResTy.getVectorNumElements();
3083 SDLoc DL(Op);
3084 APInt SplatValue, SplatUndef;
3085 unsigned SplatBitSize;
3086 bool HasAnyUndefs;
3087 bool IsConstant = false;
3088 bool UseSameConstant = true;
3089 SDValue ConstantValue;
3090 bool Is128Vec = ResTy.is128BitVector();
3091 bool Is256Vec = ResTy.is256BitVector();
3092
3093 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3094 (!Subtarget.hasExtLASX() || !Is256Vec))
3095 return SDValue();
3096
3097 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3098 return Result;
3099
3100 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3101 /*MinSplatBits=*/8) &&
3102 SplatBitSize <= 64) {
3103 // We can only cope with 8, 16, 32, or 64-bit elements.
3104 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3105 SplatBitSize != 64)
3106 return SDValue();
3107
3108 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3109 // We can only handle 64-bit elements that are within
3110 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3111 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3112 if (!SplatValue.isSignedIntN(10) &&
3113 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3114 return SDValue();
3115 if ((Is128Vec && ResTy == MVT::v4i32) ||
3116 (Is256Vec && ResTy == MVT::v8i32))
3117 return Op;
3118 }
3119
3120 EVT ViaVecTy;
3121
3122 switch (SplatBitSize) {
3123 default:
3124 return SDValue();
3125 case 8:
3126 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3127 break;
3128 case 16:
3129 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3130 break;
3131 case 32:
3132 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3133 break;
3134 case 64:
3135 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3136 break;
3137 }
3138
3139 // SelectionDAG::getConstant will promote SplatValue appropriately.
3140 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3141
3142 // Bitcast to the type we originally wanted.
3143 if (ViaVecTy != ResTy)
3144 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3145
3146 return Result;
3147 }
3148
3149 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3150 return Op;
3151
3152 for (unsigned i = 0; i < NumElts; ++i) {
3153 SDValue Opi = Node->getOperand(i);
3154 if (isIntOrFPConstant(Opi)) {
3155 IsConstant = true;
3156 if (!ConstantValue.getNode())
3157 ConstantValue = Opi;
3158 else if (ConstantValue != Opi)
3159 UseSameConstant = false;
3160 }
3161 }
3162
3163 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3164 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3165 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3166 for (unsigned i = 0; i < NumElts; ++i) {
3167 SDValue Opi = Node->getOperand(i);
3168 if (!isIntOrFPConstant(Opi))
3169 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3170 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3171 }
3172 return Result;
3173 }
3174
3175 if (!IsConstant) {
3176 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3177 // the sub-sequence of the vector and then broadcast the sub-sequence.
3178 //
3179 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3180 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3181 // generates worse code in some cases. This could be further optimized
3182 // with more consideration.
3184 BitVector UndefElements;
3185 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3186 UndefElements.count() == 0) {
3187 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3188 // because the high part can be simply treated as undef.
3189 SDValue Vector = DAG.getUNDEF(ResTy);
3190 EVT FillTy = Is256Vec
3192 : ResTy;
3193 SDValue FillVec =
3194 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3195
3196 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3197
3198 unsigned SeqLen = Sequence.size();
3199 unsigned SplatLen = NumElts / SeqLen;
3200 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3201 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3202
3203 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3204 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3205 if (SplatEltTy == MVT::i128)
3206 SplatTy = MVT::v4i64;
3207
3208 SDValue SplatVec;
3209 SDValue SrcVec = DAG.getBitcast(
3210 SplatTy,
3211 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3212 if (Is256Vec) {
3213 SplatVec =
3214 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3215 : LoongArchISD::XVREPLVE0,
3216 DL, SplatTy, SrcVec);
3217 } else {
3218 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3219 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3220 }
3221
3222 return DAG.getBitcast(ResTy, SplatVec);
3223 }
3224
3225 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3226 // using memory operations is much lower.
3227 //
3228 // For 256-bit vectors, normally split into two halves and concatenate.
3229 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3230 // one non-undef element, skip spliting to avoid a worse result.
3231 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3232 ResTy == MVT::v4f64) {
3233 unsigned NonUndefCount = 0;
3234 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3235 if (!Node->getOperand(i).isUndef()) {
3236 ++NonUndefCount;
3237 if (NonUndefCount > 1)
3238 break;
3239 }
3240 }
3241 if (NonUndefCount == 1)
3242 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3243 }
3244
3245 EVT VecTy =
3246 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3247 SDValue Vector =
3248 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3249
3250 if (Is128Vec)
3251 return Vector;
3252
3253 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3254 VecTy, NumElts / 2);
3255
3256 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3257 }
3258
3259 return SDValue();
3260}
3261
3262SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3263 SelectionDAG &DAG) const {
3264 SDLoc DL(Op);
3265 MVT ResVT = Op.getSimpleValueType();
3266 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3267
3268 unsigned NumOperands = Op.getNumOperands();
3269 unsigned NumFreezeUndef = 0;
3270 unsigned NumZero = 0;
3271 unsigned NumNonZero = 0;
3272 unsigned NonZeros = 0;
3273 SmallSet<SDValue, 4> Undefs;
3274 for (unsigned i = 0; i != NumOperands; ++i) {
3275 SDValue SubVec = Op.getOperand(i);
3276 if (SubVec.isUndef())
3277 continue;
3278 if (ISD::isFreezeUndef(SubVec.getNode())) {
3279 // If the freeze(undef) has multiple uses then we must fold to zero.
3280 if (SubVec.hasOneUse()) {
3281 ++NumFreezeUndef;
3282 } else {
3283 ++NumZero;
3284 Undefs.insert(SubVec);
3285 }
3286 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3287 ++NumZero;
3288 else {
3289 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3290 NonZeros |= 1 << i;
3291 ++NumNonZero;
3292 }
3293 }
3294
3295 // If we have more than 2 non-zeros, build each half separately.
3296 if (NumNonZero > 2) {
3297 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3298 ArrayRef<SDUse> Ops = Op->ops();
3299 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3300 Ops.slice(0, NumOperands / 2));
3301 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3302 Ops.slice(NumOperands / 2));
3303 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3304 }
3305
3306 // Otherwise, build it up through insert_subvectors.
3307 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3308 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3309 : DAG.getUNDEF(ResVT));
3310
3311 // Replace Undef operands with ZeroVector.
3312 for (SDValue U : Undefs)
3313 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3314
3315 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3316 unsigned NumSubElems = SubVT.getVectorNumElements();
3317 for (unsigned i = 0; i != NumOperands; ++i) {
3318 if ((NonZeros & (1 << i)) == 0)
3319 continue;
3320
3321 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3322 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3323 }
3324
3325 return Vec;
3326}
3327
3328SDValue
3329LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3330 SelectionDAG &DAG) const {
3331 MVT EltVT = Op.getSimpleValueType();
3332 SDValue Vec = Op->getOperand(0);
3333 EVT VecTy = Vec->getValueType(0);
3334 SDValue Idx = Op->getOperand(1);
3335 SDLoc DL(Op);
3336 MVT GRLenVT = Subtarget.getGRLenVT();
3337
3338 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3339
3340 if (isa<ConstantSDNode>(Idx))
3341 return Op;
3342
3343 switch (VecTy.getSimpleVT().SimpleTy) {
3344 default:
3345 llvm_unreachable("Unexpected type");
3346 case MVT::v32i8:
3347 case MVT::v16i16:
3348 case MVT::v4i64:
3349 case MVT::v4f64: {
3350 // Extract the high half subvector and place it to the low half of a new
3351 // vector. It doesn't matter what the high half of the new vector is.
3352 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3353 SDValue VecHi =
3354 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3355 SDValue TmpVec =
3356 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3357 VecHi, DAG.getConstant(0, DL, GRLenVT));
3358
3359 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3360 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3361 // desired element.
3362 SDValue IdxCp =
3363 Subtarget.is64Bit()
3364 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3365 : DAG.getBitcast(MVT::f32, Idx);
3366 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3367 SDValue MaskVec =
3368 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3369 SDValue ResVec =
3370 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3371
3372 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3373 DAG.getConstant(0, DL, GRLenVT));
3374 }
3375 case MVT::v8i32:
3376 case MVT::v8f32: {
3377 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3378 SDValue SplatValue =
3379 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3380
3381 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3382 DAG.getConstant(0, DL, GRLenVT));
3383 }
3384 }
3385}
3386
3387SDValue
3388LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3389 SelectionDAG &DAG) const {
3390 MVT VT = Op.getSimpleValueType();
3391 MVT EltVT = VT.getVectorElementType();
3392 unsigned NumElts = VT.getVectorNumElements();
3393 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3394 SDLoc DL(Op);
3395 SDValue Op0 = Op.getOperand(0);
3396 SDValue Op1 = Op.getOperand(1);
3397 SDValue Op2 = Op.getOperand(2);
3398
3399 if (isa<ConstantSDNode>(Op2))
3400 return Op;
3401
3402 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3403 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3404
3405 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3406 return SDValue();
3407
3408 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3409 SmallVector<SDValue, 32> RawIndices;
3410 SDValue SplatIdx;
3411 SDValue Indices;
3412
3413 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3414 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3415 for (unsigned i = 0; i < NumElts; ++i) {
3416 RawIndices.push_back(Op2);
3417 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3418 }
3419 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3420 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3421
3422 RawIndices.clear();
3423 for (unsigned i = 0; i < NumElts; ++i) {
3424 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3425 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3426 }
3427 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3428 Indices = DAG.getBitcast(IdxVTy, Indices);
3429 } else {
3430 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3431
3432 for (unsigned i = 0; i < NumElts; ++i)
3433 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3434 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3435 }
3436
3437 // insert vec, elt, idx
3438 // =>
3439 // select (splatidx == {0,1,2...}) ? splatelt : vec
3440 SDValue SelectCC =
3441 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3442 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3443}
3444
3445SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3446 SelectionDAG &DAG) const {
3447 SDLoc DL(Op);
3448 SyncScope::ID FenceSSID =
3449 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3450
3451 // singlethread fences only synchronize with signal handlers on the same
3452 // thread and thus only need to preserve instruction order, not actually
3453 // enforce memory ordering.
3454 if (FenceSSID == SyncScope::SingleThread)
3455 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3456 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3457
3458 return Op;
3459}
3460
3461SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3462 SelectionDAG &DAG) const {
3463
3464 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3465 DAG.getContext()->emitError(
3466 "On LA64, only 64-bit registers can be written.");
3467 return Op.getOperand(0);
3468 }
3469
3470 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3471 DAG.getContext()->emitError(
3472 "On LA32, only 32-bit registers can be written.");
3473 return Op.getOperand(0);
3474 }
3475
3476 return Op;
3477}
3478
3479SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3480 SelectionDAG &DAG) const {
3481 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3482 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3483 "be a constant integer");
3484 return SDValue();
3485 }
3486
3487 MachineFunction &MF = DAG.getMachineFunction();
3489 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3490 EVT VT = Op.getValueType();
3491 SDLoc DL(Op);
3492 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3493 unsigned Depth = Op.getConstantOperandVal(0);
3494 int GRLenInBytes = Subtarget.getGRLen() / 8;
3495
3496 while (Depth--) {
3497 int Offset = -(GRLenInBytes * 2);
3498 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3499 DAG.getSignedConstant(Offset, DL, VT));
3500 FrameAddr =
3501 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3502 }
3503 return FrameAddr;
3504}
3505
3506SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3507 SelectionDAG &DAG) const {
3508 // Currently only support lowering return address for current frame.
3509 if (Op.getConstantOperandVal(0) != 0) {
3510 DAG.getContext()->emitError(
3511 "return address can only be determined for the current frame");
3512 return SDValue();
3513 }
3514
3515 MachineFunction &MF = DAG.getMachineFunction();
3517 MVT GRLenVT = Subtarget.getGRLenVT();
3518
3519 // Return the value of the return address register, marking it an implicit
3520 // live-in.
3521 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3522 getRegClassFor(GRLenVT));
3523 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3524}
3525
3526SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3527 SelectionDAG &DAG) const {
3528 MachineFunction &MF = DAG.getMachineFunction();
3529 auto Size = Subtarget.getGRLen() / 8;
3530 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3531 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3532}
3533
3534SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3535 SelectionDAG &DAG) const {
3536 MachineFunction &MF = DAG.getMachineFunction();
3537 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3538
3539 SDLoc DL(Op);
3540 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3542
3543 // vastart just stores the address of the VarArgsFrameIndex slot into the
3544 // memory location argument.
3545 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3546 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3547 MachinePointerInfo(SV));
3548}
3549
3550SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3551 SelectionDAG &DAG) const {
3552 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3553 !Subtarget.hasBasicD() && "unexpected target features");
3554
3555 SDLoc DL(Op);
3556 SDValue Op0 = Op.getOperand(0);
3557 if (Op0->getOpcode() == ISD::AND) {
3558 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3559 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3560 return Op;
3561 }
3562
3563 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3564 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3565 Op0.getConstantOperandVal(2) == UINT64_C(0))
3566 return Op;
3567
3568 if (Op0.getOpcode() == ISD::AssertZext &&
3569 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3570 return Op;
3571
3572 EVT OpVT = Op0.getValueType();
3573 EVT RetVT = Op.getValueType();
3574 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3575 MakeLibCallOptions CallOptions;
3576 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3577 SDValue Chain = SDValue();
3579 std::tie(Result, Chain) =
3580 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3581 return Result;
3582}
3583
3584SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3585 SelectionDAG &DAG) const {
3586 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3587 !Subtarget.hasBasicD() && "unexpected target features");
3588
3589 SDLoc DL(Op);
3590 SDValue Op0 = Op.getOperand(0);
3591
3592 if ((Op0.getOpcode() == ISD::AssertSext ||
3594 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3595 return Op;
3596
3597 EVT OpVT = Op0.getValueType();
3598 EVT RetVT = Op.getValueType();
3599 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3600 MakeLibCallOptions CallOptions;
3601 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3602 SDValue Chain = SDValue();
3604 std::tie(Result, Chain) =
3605 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3606 return Result;
3607}
3608
3609SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3610 SelectionDAG &DAG) const {
3611
3612 SDLoc DL(Op);
3613 EVT VT = Op.getValueType();
3614 SDValue Op0 = Op.getOperand(0);
3615 EVT Op0VT = Op0.getValueType();
3616
3617 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3618 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3619 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3620 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3621 }
3622 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3623 SDValue Lo, Hi;
3624 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3625 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3626 }
3627 return Op;
3628}
3629
3630SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3631 SelectionDAG &DAG) const {
3632
3633 SDLoc DL(Op);
3634 SDValue Op0 = Op.getOperand(0);
3635
3636 if (Op0.getValueType() == MVT::f16)
3637 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3638
3639 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3640 !Subtarget.hasBasicD()) {
3641 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3642 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3643 }
3644
3645 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3646 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3647 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3648}
3649
3651 SelectionDAG &DAG, unsigned Flags) {
3652 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3653}
3654
3656 SelectionDAG &DAG, unsigned Flags) {
3657 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3658 Flags);
3659}
3660
3662 SelectionDAG &DAG, unsigned Flags) {
3663 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3664 N->getOffset(), Flags);
3665}
3666
3668 SelectionDAG &DAG, unsigned Flags) {
3669 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3670}
3671
3672template <class NodeTy>
3673SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3675 bool IsLocal) const {
3676 SDLoc DL(N);
3677 EVT Ty = getPointerTy(DAG.getDataLayout());
3678 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3679 SDValue Load;
3680
3681 switch (M) {
3682 default:
3683 report_fatal_error("Unsupported code model");
3684
3685 case CodeModel::Large: {
3686 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3687
3688 // This is not actually used, but is necessary for successfully matching
3689 // the PseudoLA_*_LARGE nodes.
3690 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3691 if (IsLocal) {
3692 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3693 // eventually becomes the desired 5-insn code sequence.
3694 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3695 Tmp, Addr),
3696 0);
3697 } else {
3698 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3699 // eventually becomes the desired 5-insn code sequence.
3700 Load = SDValue(
3701 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3702 0);
3703 }
3704 break;
3705 }
3706
3707 case CodeModel::Small:
3708 case CodeModel::Medium:
3709 if (IsLocal) {
3710 // This generates the pattern (PseudoLA_PCREL sym), which
3711 //
3712 // for la32r expands to:
3713 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3714 //
3715 // for la32s and la64 expands to:
3716 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3717 Load = SDValue(
3718 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3719 } else {
3720 // This generates the pattern (PseudoLA_GOT sym), which
3721 //
3722 // for la32r expands to:
3723 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3724 //
3725 // for la32s and la64 expands to:
3726 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3727 Load =
3728 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3729 }
3730 }
3731
3732 if (!IsLocal) {
3733 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3734 MachineFunction &MF = DAG.getMachineFunction();
3735 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3739 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3740 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3741 }
3742
3743 return Load;
3744}
3745
3746SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3747 SelectionDAG &DAG) const {
3748 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3749 DAG.getTarget().getCodeModel());
3750}
3751
3752SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3753 SelectionDAG &DAG) const {
3754 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3755 DAG.getTarget().getCodeModel());
3756}
3757
3758SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3759 SelectionDAG &DAG) const {
3760 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3761 DAG.getTarget().getCodeModel());
3762}
3763
3764SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3765 SelectionDAG &DAG) const {
3766 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3767 assert(N->getOffset() == 0 && "unexpected offset in global node");
3768 auto CM = DAG.getTarget().getCodeModel();
3769 const GlobalValue *GV = N->getGlobal();
3770
3771 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3772 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3773 CM = *GCM;
3774 }
3775
3776 return getAddr(N, DAG, CM, GV->isDSOLocal());
3777}
3778
3779SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3780 SelectionDAG &DAG,
3781 unsigned Opc, bool UseGOT,
3782 bool Large) const {
3783 SDLoc DL(N);
3784 EVT Ty = getPointerTy(DAG.getDataLayout());
3785 MVT GRLenVT = Subtarget.getGRLenVT();
3786
3787 // This is not actually used, but is necessary for successfully matching the
3788 // PseudoLA_*_LARGE nodes.
3789 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3790 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3791
3792 // Only IE needs an extra argument for large code model.
3793 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3794 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3795 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3796
3797 // If it is LE for normal/medium code model, the add tp operation will occur
3798 // during the pseudo-instruction expansion.
3799 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3800 return Offset;
3801
3802 if (UseGOT) {
3803 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3804 MachineFunction &MF = DAG.getMachineFunction();
3805 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3809 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3810 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3811 }
3812
3813 // Add the thread pointer.
3814 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3815 DAG.getRegister(LoongArch::R2, GRLenVT));
3816}
3817
3818SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3819 SelectionDAG &DAG,
3820 unsigned Opc,
3821 bool Large) const {
3822 SDLoc DL(N);
3823 EVT Ty = getPointerTy(DAG.getDataLayout());
3824 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3825
3826 // This is not actually used, but is necessary for successfully matching the
3827 // PseudoLA_*_LARGE nodes.
3828 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3829
3830 // Use a PC-relative addressing mode to access the dynamic GOT address.
3831 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3832 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3833 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3834
3835 // Prepare argument list to generate call.
3837 Args.emplace_back(Load, CallTy);
3838
3839 // Setup call to __tls_get_addr.
3840 TargetLowering::CallLoweringInfo CLI(DAG);
3841 CLI.setDebugLoc(DL)
3842 .setChain(DAG.getEntryNode())
3843 .setLibCallee(CallingConv::C, CallTy,
3844 DAG.getExternalSymbol("__tls_get_addr", Ty),
3845 std::move(Args));
3846
3847 return LowerCallTo(CLI).first;
3848}
3849
3850SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3851 SelectionDAG &DAG, unsigned Opc,
3852 bool Large) const {
3853 SDLoc DL(N);
3854 EVT Ty = getPointerTy(DAG.getDataLayout());
3855 const GlobalValue *GV = N->getGlobal();
3856
3857 // This is not actually used, but is necessary for successfully matching the
3858 // PseudoLA_*_LARGE nodes.
3859 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3860
3861 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3862 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3863 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3864 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3865 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3866}
3867
3868SDValue
3869LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3870 SelectionDAG &DAG) const {
3873 report_fatal_error("In GHC calling convention TLS is not supported");
3874
3875 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3876 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3877
3878 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3879 assert(N->getOffset() == 0 && "unexpected offset in global node");
3880
3881 if (DAG.getTarget().useEmulatedTLS())
3882 reportFatalUsageError("the emulated TLS is prohibited");
3883
3884 bool IsDesc = DAG.getTarget().useTLSDESC();
3885
3886 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3888 // In this model, application code calls the dynamic linker function
3889 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3890 // runtime.
3891 if (!IsDesc)
3892 return getDynamicTLSAddr(N, DAG,
3893 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3894 : LoongArch::PseudoLA_TLS_GD,
3895 Large);
3896 break;
3898 // Same as GeneralDynamic, except for assembly modifiers and relocation
3899 // records.
3900 if (!IsDesc)
3901 return getDynamicTLSAddr(N, DAG,
3902 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3903 : LoongArch::PseudoLA_TLS_LD,
3904 Large);
3905 break;
3907 // This model uses the GOT to resolve TLS offsets.
3908 return getStaticTLSAddr(N, DAG,
3909 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3910 : LoongArch::PseudoLA_TLS_IE,
3911 /*UseGOT=*/true, Large);
3913 // This model is used when static linking as the TLS offsets are resolved
3914 // during program linking.
3915 //
3916 // This node doesn't need an extra argument for the large code model.
3917 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3918 /*UseGOT=*/false, Large);
3919 }
3920
3921 return getTLSDescAddr(N, DAG,
3922 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3923 : LoongArch::PseudoLA_TLS_DESC,
3924 Large);
3925}
3926
3927template <unsigned N>
3929 SelectionDAG &DAG, bool IsSigned = false) {
3930 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3931 // Check the ImmArg.
3932 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3933 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3934 DAG.getContext()->emitError(Op->getOperationName(0) +
3935 ": argument out of range.");
3936 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3937 }
3938 return SDValue();
3939}
3940
3941SDValue
3942LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3943 SelectionDAG &DAG) const {
3944 switch (Op.getConstantOperandVal(0)) {
3945 default:
3946 return SDValue(); // Don't custom lower most intrinsics.
3947 case Intrinsic::thread_pointer: {
3948 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3949 return DAG.getRegister(LoongArch::R2, PtrVT);
3950 }
3951 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3952 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3953 case Intrinsic::loongarch_lsx_vreplvei_d:
3954 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3955 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3956 case Intrinsic::loongarch_lsx_vreplvei_w:
3957 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3958 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3959 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3960 case Intrinsic::loongarch_lasx_xvpickve_d:
3961 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3962 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3963 case Intrinsic::loongarch_lasx_xvinsve0_d:
3964 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3965 case Intrinsic::loongarch_lsx_vsat_b:
3966 case Intrinsic::loongarch_lsx_vsat_bu:
3967 case Intrinsic::loongarch_lsx_vrotri_b:
3968 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3969 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3970 case Intrinsic::loongarch_lsx_vsrlri_b:
3971 case Intrinsic::loongarch_lsx_vsrari_b:
3972 case Intrinsic::loongarch_lsx_vreplvei_h:
3973 case Intrinsic::loongarch_lasx_xvsat_b:
3974 case Intrinsic::loongarch_lasx_xvsat_bu:
3975 case Intrinsic::loongarch_lasx_xvrotri_b:
3976 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3977 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3978 case Intrinsic::loongarch_lasx_xvsrlri_b:
3979 case Intrinsic::loongarch_lasx_xvsrari_b:
3980 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3981 case Intrinsic::loongarch_lasx_xvpickve_w:
3982 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3983 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3984 case Intrinsic::loongarch_lasx_xvinsve0_w:
3985 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3986 case Intrinsic::loongarch_lsx_vsat_h:
3987 case Intrinsic::loongarch_lsx_vsat_hu:
3988 case Intrinsic::loongarch_lsx_vrotri_h:
3989 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3990 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3991 case Intrinsic::loongarch_lsx_vsrlri_h:
3992 case Intrinsic::loongarch_lsx_vsrari_h:
3993 case Intrinsic::loongarch_lsx_vreplvei_b:
3994 case Intrinsic::loongarch_lasx_xvsat_h:
3995 case Intrinsic::loongarch_lasx_xvsat_hu:
3996 case Intrinsic::loongarch_lasx_xvrotri_h:
3997 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3998 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3999 case Intrinsic::loongarch_lasx_xvsrlri_h:
4000 case Intrinsic::loongarch_lasx_xvsrari_h:
4001 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
4002 return checkIntrinsicImmArg<4>(Op, 2, DAG);
4003 case Intrinsic::loongarch_lsx_vsrlni_b_h:
4004 case Intrinsic::loongarch_lsx_vsrani_b_h:
4005 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
4006 case Intrinsic::loongarch_lsx_vsrarni_b_h:
4007 case Intrinsic::loongarch_lsx_vssrlni_b_h:
4008 case Intrinsic::loongarch_lsx_vssrani_b_h:
4009 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
4010 case Intrinsic::loongarch_lsx_vssrani_bu_h:
4011 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
4012 case Intrinsic::loongarch_lsx_vssrarni_b_h:
4013 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
4014 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
4015 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
4016 case Intrinsic::loongarch_lasx_xvsrani_b_h:
4017 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
4018 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
4019 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
4020 case Intrinsic::loongarch_lasx_xvssrani_b_h:
4021 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
4022 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
4023 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
4024 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
4025 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4026 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4027 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4028 case Intrinsic::loongarch_lsx_vsat_w:
4029 case Intrinsic::loongarch_lsx_vsat_wu:
4030 case Intrinsic::loongarch_lsx_vrotri_w:
4031 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4032 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4033 case Intrinsic::loongarch_lsx_vsrlri_w:
4034 case Intrinsic::loongarch_lsx_vsrari_w:
4035 case Intrinsic::loongarch_lsx_vslei_bu:
4036 case Intrinsic::loongarch_lsx_vslei_hu:
4037 case Intrinsic::loongarch_lsx_vslei_wu:
4038 case Intrinsic::loongarch_lsx_vslei_du:
4039 case Intrinsic::loongarch_lsx_vslti_bu:
4040 case Intrinsic::loongarch_lsx_vslti_hu:
4041 case Intrinsic::loongarch_lsx_vslti_wu:
4042 case Intrinsic::loongarch_lsx_vslti_du:
4043 case Intrinsic::loongarch_lsx_vbsll_v:
4044 case Intrinsic::loongarch_lsx_vbsrl_v:
4045 case Intrinsic::loongarch_lasx_xvsat_w:
4046 case Intrinsic::loongarch_lasx_xvsat_wu:
4047 case Intrinsic::loongarch_lasx_xvrotri_w:
4048 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4049 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4050 case Intrinsic::loongarch_lasx_xvsrlri_w:
4051 case Intrinsic::loongarch_lasx_xvsrari_w:
4052 case Intrinsic::loongarch_lasx_xvslei_bu:
4053 case Intrinsic::loongarch_lasx_xvslei_hu:
4054 case Intrinsic::loongarch_lasx_xvslei_wu:
4055 case Intrinsic::loongarch_lasx_xvslei_du:
4056 case Intrinsic::loongarch_lasx_xvslti_bu:
4057 case Intrinsic::loongarch_lasx_xvslti_hu:
4058 case Intrinsic::loongarch_lasx_xvslti_wu:
4059 case Intrinsic::loongarch_lasx_xvslti_du:
4060 case Intrinsic::loongarch_lasx_xvbsll_v:
4061 case Intrinsic::loongarch_lasx_xvbsrl_v:
4062 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4063 case Intrinsic::loongarch_lsx_vseqi_b:
4064 case Intrinsic::loongarch_lsx_vseqi_h:
4065 case Intrinsic::loongarch_lsx_vseqi_w:
4066 case Intrinsic::loongarch_lsx_vseqi_d:
4067 case Intrinsic::loongarch_lsx_vslei_b:
4068 case Intrinsic::loongarch_lsx_vslei_h:
4069 case Intrinsic::loongarch_lsx_vslei_w:
4070 case Intrinsic::loongarch_lsx_vslei_d:
4071 case Intrinsic::loongarch_lsx_vslti_b:
4072 case Intrinsic::loongarch_lsx_vslti_h:
4073 case Intrinsic::loongarch_lsx_vslti_w:
4074 case Intrinsic::loongarch_lsx_vslti_d:
4075 case Intrinsic::loongarch_lasx_xvseqi_b:
4076 case Intrinsic::loongarch_lasx_xvseqi_h:
4077 case Intrinsic::loongarch_lasx_xvseqi_w:
4078 case Intrinsic::loongarch_lasx_xvseqi_d:
4079 case Intrinsic::loongarch_lasx_xvslei_b:
4080 case Intrinsic::loongarch_lasx_xvslei_h:
4081 case Intrinsic::loongarch_lasx_xvslei_w:
4082 case Intrinsic::loongarch_lasx_xvslei_d:
4083 case Intrinsic::loongarch_lasx_xvslti_b:
4084 case Intrinsic::loongarch_lasx_xvslti_h:
4085 case Intrinsic::loongarch_lasx_xvslti_w:
4086 case Intrinsic::loongarch_lasx_xvslti_d:
4087 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4088 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4089 case Intrinsic::loongarch_lsx_vsrani_h_w:
4090 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4091 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4092 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4093 case Intrinsic::loongarch_lsx_vssrani_h_w:
4094 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4095 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4096 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4097 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4098 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4099 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4100 case Intrinsic::loongarch_lsx_vfrstpi_b:
4101 case Intrinsic::loongarch_lsx_vfrstpi_h:
4102 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4103 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4104 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4105 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4106 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4107 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4108 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4109 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4110 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4111 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4112 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4113 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4114 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4115 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4116 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4117 case Intrinsic::loongarch_lsx_vsat_d:
4118 case Intrinsic::loongarch_lsx_vsat_du:
4119 case Intrinsic::loongarch_lsx_vrotri_d:
4120 case Intrinsic::loongarch_lsx_vsrlri_d:
4121 case Intrinsic::loongarch_lsx_vsrari_d:
4122 case Intrinsic::loongarch_lasx_xvsat_d:
4123 case Intrinsic::loongarch_lasx_xvsat_du:
4124 case Intrinsic::loongarch_lasx_xvrotri_d:
4125 case Intrinsic::loongarch_lasx_xvsrlri_d:
4126 case Intrinsic::loongarch_lasx_xvsrari_d:
4127 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4128 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4129 case Intrinsic::loongarch_lsx_vsrani_w_d:
4130 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4131 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4132 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4133 case Intrinsic::loongarch_lsx_vssrani_w_d:
4134 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4135 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4136 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4137 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4138 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4139 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4140 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4141 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4142 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4143 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4144 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4145 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4146 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4147 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4148 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4149 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4150 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4151 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4152 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4153 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4154 case Intrinsic::loongarch_lsx_vsrani_d_q:
4155 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4156 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4157 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4158 case Intrinsic::loongarch_lsx_vssrani_d_q:
4159 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4160 case Intrinsic::loongarch_lsx_vssrani_du_q:
4161 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4162 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4163 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4164 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4165 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4166 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4167 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4168 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4169 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4170 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4171 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4172 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4173 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4174 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4175 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4176 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4177 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4178 case Intrinsic::loongarch_lsx_vnori_b:
4179 case Intrinsic::loongarch_lsx_vshuf4i_b:
4180 case Intrinsic::loongarch_lsx_vshuf4i_h:
4181 case Intrinsic::loongarch_lsx_vshuf4i_w:
4182 case Intrinsic::loongarch_lasx_xvnori_b:
4183 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4184 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4185 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4186 case Intrinsic::loongarch_lasx_xvpermi_d:
4187 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4188 case Intrinsic::loongarch_lsx_vshuf4i_d:
4189 case Intrinsic::loongarch_lsx_vpermi_w:
4190 case Intrinsic::loongarch_lsx_vbitseli_b:
4191 case Intrinsic::loongarch_lsx_vextrins_b:
4192 case Intrinsic::loongarch_lsx_vextrins_h:
4193 case Intrinsic::loongarch_lsx_vextrins_w:
4194 case Intrinsic::loongarch_lsx_vextrins_d:
4195 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4196 case Intrinsic::loongarch_lasx_xvpermi_w:
4197 case Intrinsic::loongarch_lasx_xvpermi_q:
4198 case Intrinsic::loongarch_lasx_xvbitseli_b:
4199 case Intrinsic::loongarch_lasx_xvextrins_b:
4200 case Intrinsic::loongarch_lasx_xvextrins_h:
4201 case Intrinsic::loongarch_lasx_xvextrins_w:
4202 case Intrinsic::loongarch_lasx_xvextrins_d:
4203 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4204 case Intrinsic::loongarch_lsx_vrepli_b:
4205 case Intrinsic::loongarch_lsx_vrepli_h:
4206 case Intrinsic::loongarch_lsx_vrepli_w:
4207 case Intrinsic::loongarch_lsx_vrepli_d:
4208 case Intrinsic::loongarch_lasx_xvrepli_b:
4209 case Intrinsic::loongarch_lasx_xvrepli_h:
4210 case Intrinsic::loongarch_lasx_xvrepli_w:
4211 case Intrinsic::loongarch_lasx_xvrepli_d:
4212 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4213 case Intrinsic::loongarch_lsx_vldi:
4214 case Intrinsic::loongarch_lasx_xvldi:
4215 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4216 }
4217}
4218
4219// Helper function that emits error message for intrinsics with chain and return
4220// merge values of a UNDEF and the chain.
4222 StringRef ErrorMsg,
4223 SelectionDAG &DAG) {
4224 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4225 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4226 SDLoc(Op));
4227}
4228
4229SDValue
4230LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4231 SelectionDAG &DAG) const {
4232 SDLoc DL(Op);
4233 MVT GRLenVT = Subtarget.getGRLenVT();
4234 EVT VT = Op.getValueType();
4235 SDValue Chain = Op.getOperand(0);
4236 const StringRef ErrorMsgOOR = "argument out of range";
4237 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4238 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4239
4240 switch (Op.getConstantOperandVal(1)) {
4241 default:
4242 return Op;
4243 case Intrinsic::loongarch_crc_w_b_w:
4244 case Intrinsic::loongarch_crc_w_h_w:
4245 case Intrinsic::loongarch_crc_w_w_w:
4246 case Intrinsic::loongarch_crc_w_d_w:
4247 case Intrinsic::loongarch_crcc_w_b_w:
4248 case Intrinsic::loongarch_crcc_w_h_w:
4249 case Intrinsic::loongarch_crcc_w_w_w:
4250 case Intrinsic::loongarch_crcc_w_d_w:
4251 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4252 case Intrinsic::loongarch_csrrd_w:
4253 case Intrinsic::loongarch_csrrd_d: {
4254 unsigned Imm = Op.getConstantOperandVal(2);
4255 return !isUInt<14>(Imm)
4256 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4257 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4258 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4259 }
4260 case Intrinsic::loongarch_csrwr_w:
4261 case Intrinsic::loongarch_csrwr_d: {
4262 unsigned Imm = Op.getConstantOperandVal(3);
4263 return !isUInt<14>(Imm)
4264 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4265 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4266 {Chain, Op.getOperand(2),
4267 DAG.getConstant(Imm, DL, GRLenVT)});
4268 }
4269 case Intrinsic::loongarch_csrxchg_w:
4270 case Intrinsic::loongarch_csrxchg_d: {
4271 unsigned Imm = Op.getConstantOperandVal(4);
4272 return !isUInt<14>(Imm)
4273 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4274 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4275 {Chain, Op.getOperand(2), Op.getOperand(3),
4276 DAG.getConstant(Imm, DL, GRLenVT)});
4277 }
4278 case Intrinsic::loongarch_iocsrrd_d: {
4279 return DAG.getNode(
4280 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4281 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4282 }
4283#define IOCSRRD_CASE(NAME, NODE) \
4284 case Intrinsic::loongarch_##NAME: { \
4285 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4286 {Chain, Op.getOperand(2)}); \
4287 }
4288 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4289 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4290 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4291#undef IOCSRRD_CASE
4292 case Intrinsic::loongarch_cpucfg: {
4293 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4294 {Chain, Op.getOperand(2)});
4295 }
4296 case Intrinsic::loongarch_lddir_d: {
4297 unsigned Imm = Op.getConstantOperandVal(3);
4298 return !isUInt<8>(Imm)
4299 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4300 : Op;
4301 }
4302 case Intrinsic::loongarch_movfcsr2gr: {
4303 if (!Subtarget.hasBasicF())
4304 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4305 unsigned Imm = Op.getConstantOperandVal(2);
4306 return !isUInt<2>(Imm)
4307 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4308 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4309 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4310 }
4311 case Intrinsic::loongarch_lsx_vld:
4312 case Intrinsic::loongarch_lsx_vldrepl_b:
4313 case Intrinsic::loongarch_lasx_xvld:
4314 case Intrinsic::loongarch_lasx_xvldrepl_b:
4315 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4316 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4317 : SDValue();
4318 case Intrinsic::loongarch_lsx_vldrepl_h:
4319 case Intrinsic::loongarch_lasx_xvldrepl_h:
4320 return !isShiftedInt<11, 1>(
4321 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4323 Op, "argument out of range or not a multiple of 2", DAG)
4324 : SDValue();
4325 case Intrinsic::loongarch_lsx_vldrepl_w:
4326 case Intrinsic::loongarch_lasx_xvldrepl_w:
4327 return !isShiftedInt<10, 2>(
4328 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4330 Op, "argument out of range or not a multiple of 4", DAG)
4331 : SDValue();
4332 case Intrinsic::loongarch_lsx_vldrepl_d:
4333 case Intrinsic::loongarch_lasx_xvldrepl_d:
4334 return !isShiftedInt<9, 3>(
4335 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4337 Op, "argument out of range or not a multiple of 8", DAG)
4338 : SDValue();
4339 }
4340}
4341
4342// Helper function that emits error message for intrinsics with void return
4343// value and return the chain.
4345 SelectionDAG &DAG) {
4346
4347 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4348 return Op.getOperand(0);
4349}
4350
4351SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4352 SelectionDAG &DAG) const {
4353 SDLoc DL(Op);
4354 MVT GRLenVT = Subtarget.getGRLenVT();
4355 SDValue Chain = Op.getOperand(0);
4356 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4357 SDValue Op2 = Op.getOperand(2);
4358 const StringRef ErrorMsgOOR = "argument out of range";
4359 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4360 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4361 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4362
4363 switch (IntrinsicEnum) {
4364 default:
4365 // TODO: Add more Intrinsics.
4366 return SDValue();
4367 case Intrinsic::loongarch_cacop_d:
4368 case Intrinsic::loongarch_cacop_w: {
4369 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4370 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4371 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4372 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4373 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4374 unsigned Imm1 = Op2->getAsZExtVal();
4375 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4376 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4377 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4378 return Op;
4379 }
4380 case Intrinsic::loongarch_dbar: {
4381 unsigned Imm = Op2->getAsZExtVal();
4382 return !isUInt<15>(Imm)
4383 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4384 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4385 DAG.getConstant(Imm, DL, GRLenVT));
4386 }
4387 case Intrinsic::loongarch_ibar: {
4388 unsigned Imm = Op2->getAsZExtVal();
4389 return !isUInt<15>(Imm)
4390 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4391 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4392 DAG.getConstant(Imm, DL, GRLenVT));
4393 }
4394 case Intrinsic::loongarch_break: {
4395 unsigned Imm = Op2->getAsZExtVal();
4396 return !isUInt<15>(Imm)
4397 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4398 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4399 DAG.getConstant(Imm, DL, GRLenVT));
4400 }
4401 case Intrinsic::loongarch_movgr2fcsr: {
4402 if (!Subtarget.hasBasicF())
4403 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4404 unsigned Imm = Op2->getAsZExtVal();
4405 return !isUInt<2>(Imm)
4406 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4407 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4408 DAG.getConstant(Imm, DL, GRLenVT),
4409 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4410 Op.getOperand(3)));
4411 }
4412 case Intrinsic::loongarch_syscall: {
4413 unsigned Imm = Op2->getAsZExtVal();
4414 return !isUInt<15>(Imm)
4415 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4416 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4417 DAG.getConstant(Imm, DL, GRLenVT));
4418 }
4419#define IOCSRWR_CASE(NAME, NODE) \
4420 case Intrinsic::loongarch_##NAME: { \
4421 SDValue Op3 = Op.getOperand(3); \
4422 return Subtarget.is64Bit() \
4423 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4424 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4425 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4426 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4427 Op3); \
4428 }
4429 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4430 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4431 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4432#undef IOCSRWR_CASE
4433 case Intrinsic::loongarch_iocsrwr_d: {
4434 return !Subtarget.is64Bit()
4435 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4436 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4437 Op2,
4438 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4439 Op.getOperand(3)));
4440 }
4441#define ASRT_LE_GT_CASE(NAME) \
4442 case Intrinsic::loongarch_##NAME: { \
4443 return !Subtarget.is64Bit() \
4444 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4445 : Op; \
4446 }
4447 ASRT_LE_GT_CASE(asrtle_d)
4448 ASRT_LE_GT_CASE(asrtgt_d)
4449#undef ASRT_LE_GT_CASE
4450 case Intrinsic::loongarch_ldpte_d: {
4451 unsigned Imm = Op.getConstantOperandVal(3);
4452 return !Subtarget.is64Bit()
4453 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4454 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4455 : Op;
4456 }
4457 case Intrinsic::loongarch_lsx_vst:
4458 case Intrinsic::loongarch_lasx_xvst:
4459 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4460 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4461 : SDValue();
4462 case Intrinsic::loongarch_lasx_xvstelm_b:
4463 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4464 !isUInt<5>(Op.getConstantOperandVal(5)))
4465 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4466 : SDValue();
4467 case Intrinsic::loongarch_lsx_vstelm_b:
4468 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4469 !isUInt<4>(Op.getConstantOperandVal(5)))
4470 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4471 : SDValue();
4472 case Intrinsic::loongarch_lasx_xvstelm_h:
4473 return (!isShiftedInt<8, 1>(
4474 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4475 !isUInt<4>(Op.getConstantOperandVal(5)))
4477 Op, "argument out of range or not a multiple of 2", DAG)
4478 : SDValue();
4479 case Intrinsic::loongarch_lsx_vstelm_h:
4480 return (!isShiftedInt<8, 1>(
4481 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4482 !isUInt<3>(Op.getConstantOperandVal(5)))
4484 Op, "argument out of range or not a multiple of 2", DAG)
4485 : SDValue();
4486 case Intrinsic::loongarch_lasx_xvstelm_w:
4487 return (!isShiftedInt<8, 2>(
4488 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4489 !isUInt<3>(Op.getConstantOperandVal(5)))
4491 Op, "argument out of range or not a multiple of 4", DAG)
4492 : SDValue();
4493 case Intrinsic::loongarch_lsx_vstelm_w:
4494 return (!isShiftedInt<8, 2>(
4495 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4496 !isUInt<2>(Op.getConstantOperandVal(5)))
4498 Op, "argument out of range or not a multiple of 4", DAG)
4499 : SDValue();
4500 case Intrinsic::loongarch_lasx_xvstelm_d:
4501 return (!isShiftedInt<8, 3>(
4502 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4503 !isUInt<2>(Op.getConstantOperandVal(5)))
4505 Op, "argument out of range or not a multiple of 8", DAG)
4506 : SDValue();
4507 case Intrinsic::loongarch_lsx_vstelm_d:
4508 return (!isShiftedInt<8, 3>(
4509 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4510 !isUInt<1>(Op.getConstantOperandVal(5)))
4512 Op, "argument out of range or not a multiple of 8", DAG)
4513 : SDValue();
4514 }
4515}
4516
4517SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4518 SelectionDAG &DAG) const {
4519 SDLoc DL(Op);
4520 SDValue Lo = Op.getOperand(0);
4521 SDValue Hi = Op.getOperand(1);
4522 SDValue Shamt = Op.getOperand(2);
4523 EVT VT = Lo.getValueType();
4524
4525 // if Shamt-GRLen < 0: // Shamt < GRLen
4526 // Lo = Lo << Shamt
4527 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4528 // else:
4529 // Lo = 0
4530 // Hi = Lo << (Shamt-GRLen)
4531
4532 SDValue Zero = DAG.getConstant(0, DL, VT);
4533 SDValue One = DAG.getConstant(1, DL, VT);
4534 SDValue MinusGRLen =
4535 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4536 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4537 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4538 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4539
4540 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4541 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4542 SDValue ShiftRightLo =
4543 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4544 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4545 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4546 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4547
4548 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4549
4550 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4551 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4552
4553 SDValue Parts[2] = {Lo, Hi};
4554 return DAG.getMergeValues(Parts, DL);
4555}
4556
4557SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4558 SelectionDAG &DAG,
4559 bool IsSRA) const {
4560 SDLoc DL(Op);
4561 SDValue Lo = Op.getOperand(0);
4562 SDValue Hi = Op.getOperand(1);
4563 SDValue Shamt = Op.getOperand(2);
4564 EVT VT = Lo.getValueType();
4565
4566 // SRA expansion:
4567 // if Shamt-GRLen < 0: // Shamt < GRLen
4568 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4569 // Hi = Hi >>s Shamt
4570 // else:
4571 // Lo = Hi >>s (Shamt-GRLen);
4572 // Hi = Hi >>s (GRLen-1)
4573 //
4574 // SRL expansion:
4575 // if Shamt-GRLen < 0: // Shamt < GRLen
4576 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4577 // Hi = Hi >>u Shamt
4578 // else:
4579 // Lo = Hi >>u (Shamt-GRLen);
4580 // Hi = 0;
4581
4582 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4583
4584 SDValue Zero = DAG.getConstant(0, DL, VT);
4585 SDValue One = DAG.getConstant(1, DL, VT);
4586 SDValue MinusGRLen =
4587 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4588 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4589 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4590 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4591
4592 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4593 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4594 SDValue ShiftLeftHi =
4595 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4596 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4597 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4598 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4599 SDValue HiFalse =
4600 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4601
4602 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4603
4604 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4605 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4606
4607 SDValue Parts[2] = {Lo, Hi};
4608 return DAG.getMergeValues(Parts, DL);
4609}
4610
4611// Returns the opcode of the target-specific SDNode that implements the 32-bit
4612// form of the given Opcode.
4613static unsigned getLoongArchWOpcode(unsigned Opcode) {
4614 switch (Opcode) {
4615 default:
4616 llvm_unreachable("Unexpected opcode");
4617 case ISD::SDIV:
4618 return LoongArchISD::DIV_W;
4619 case ISD::UDIV:
4620 return LoongArchISD::DIV_WU;
4621 case ISD::SREM:
4622 return LoongArchISD::MOD_W;
4623 case ISD::UREM:
4624 return LoongArchISD::MOD_WU;
4625 case ISD::SHL:
4626 return LoongArchISD::SLL_W;
4627 case ISD::SRA:
4628 return LoongArchISD::SRA_W;
4629 case ISD::SRL:
4630 return LoongArchISD::SRL_W;
4631 case ISD::ROTL:
4632 case ISD::ROTR:
4633 return LoongArchISD::ROTR_W;
4634 case ISD::CTTZ:
4635 return LoongArchISD::CTZ_W;
4636 case ISD::CTLZ:
4637 return LoongArchISD::CLZ_W;
4638 }
4639}
4640
4641// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4642// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4643// otherwise be promoted to i64, making it difficult to select the
4644// SLL_W/.../*W later one because the fact the operation was originally of
4645// type i8/i16/i32 is lost.
4647 unsigned ExtOpc = ISD::ANY_EXTEND) {
4648 SDLoc DL(N);
4649 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4650 SDValue NewOp0, NewRes;
4651
4652 switch (NumOp) {
4653 default:
4654 llvm_unreachable("Unexpected NumOp");
4655 case 1: {
4656 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4657 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4658 break;
4659 }
4660 case 2: {
4661 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4662 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4663 if (N->getOpcode() == ISD::ROTL) {
4664 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4665 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4666 }
4667 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4668 break;
4669 }
4670 // TODO:Handle more NumOp.
4671 }
4672
4673 // ReplaceNodeResults requires we maintain the same type for the return
4674 // value.
4675 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4676}
4677
4678// Converts the given 32-bit operation to a i64 operation with signed extension
4679// semantic to reduce the signed extension instructions.
4681 SDLoc DL(N);
4682 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4683 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4684 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4685 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4686 DAG.getValueType(MVT::i32));
4687 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4688}
4689
4690// Helper function that emits error message for intrinsics with/without chain
4691// and return a UNDEF or and the chain as the results.
4694 StringRef ErrorMsg, bool WithChain = true) {
4695 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4696 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4697 if (!WithChain)
4698 return;
4699 Results.push_back(N->getOperand(0));
4700}
4701
4702template <unsigned N>
4703static void
4705 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4706 unsigned ResOp) {
4707 const StringRef ErrorMsgOOR = "argument out of range";
4708 unsigned Imm = Node->getConstantOperandVal(2);
4709 if (!isUInt<N>(Imm)) {
4711 /*WithChain=*/false);
4712 return;
4713 }
4714 SDLoc DL(Node);
4715 SDValue Vec = Node->getOperand(1);
4716
4717 SDValue PickElt =
4718 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4719 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4721 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4722 PickElt.getValue(0)));
4723}
4724
4727 SelectionDAG &DAG,
4728 const LoongArchSubtarget &Subtarget,
4729 unsigned ResOp) {
4730 SDLoc DL(N);
4731 SDValue Vec = N->getOperand(1);
4732
4733 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4734 Results.push_back(
4735 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4736}
4737
4738static void
4740 SelectionDAG &DAG,
4741 const LoongArchSubtarget &Subtarget) {
4742 switch (N->getConstantOperandVal(0)) {
4743 default:
4744 llvm_unreachable("Unexpected Intrinsic.");
4745 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4746 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4747 LoongArchISD::VPICK_SEXT_ELT);
4748 break;
4749 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4750 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4751 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4752 LoongArchISD::VPICK_SEXT_ELT);
4753 break;
4754 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4755 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4756 LoongArchISD::VPICK_SEXT_ELT);
4757 break;
4758 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4759 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4760 LoongArchISD::VPICK_ZEXT_ELT);
4761 break;
4762 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4763 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4764 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4765 LoongArchISD::VPICK_ZEXT_ELT);
4766 break;
4767 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4768 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4769 LoongArchISD::VPICK_ZEXT_ELT);
4770 break;
4771 case Intrinsic::loongarch_lsx_bz_b:
4772 case Intrinsic::loongarch_lsx_bz_h:
4773 case Intrinsic::loongarch_lsx_bz_w:
4774 case Intrinsic::loongarch_lsx_bz_d:
4775 case Intrinsic::loongarch_lasx_xbz_b:
4776 case Intrinsic::loongarch_lasx_xbz_h:
4777 case Intrinsic::loongarch_lasx_xbz_w:
4778 case Intrinsic::loongarch_lasx_xbz_d:
4779 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4780 LoongArchISD::VALL_ZERO);
4781 break;
4782 case Intrinsic::loongarch_lsx_bz_v:
4783 case Intrinsic::loongarch_lasx_xbz_v:
4784 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4785 LoongArchISD::VANY_ZERO);
4786 break;
4787 case Intrinsic::loongarch_lsx_bnz_b:
4788 case Intrinsic::loongarch_lsx_bnz_h:
4789 case Intrinsic::loongarch_lsx_bnz_w:
4790 case Intrinsic::loongarch_lsx_bnz_d:
4791 case Intrinsic::loongarch_lasx_xbnz_b:
4792 case Intrinsic::loongarch_lasx_xbnz_h:
4793 case Intrinsic::loongarch_lasx_xbnz_w:
4794 case Intrinsic::loongarch_lasx_xbnz_d:
4795 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4796 LoongArchISD::VALL_NONZERO);
4797 break;
4798 case Intrinsic::loongarch_lsx_bnz_v:
4799 case Intrinsic::loongarch_lasx_xbnz_v:
4800 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4801 LoongArchISD::VANY_NONZERO);
4802 break;
4803 }
4804}
4805
4808 SelectionDAG &DAG) {
4809 assert(N->getValueType(0) == MVT::i128 &&
4810 "AtomicCmpSwap on types less than 128 should be legal");
4811 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4812
4813 unsigned Opcode;
4814 switch (MemOp->getMergedOrdering()) {
4818 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4819 break;
4822 Opcode = LoongArch::PseudoCmpXchg128;
4823 break;
4824 default:
4825 llvm_unreachable("Unexpected ordering!");
4826 }
4827
4828 SDLoc DL(N);
4829 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4830 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4831 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4832 NewVal.first, NewVal.second, N->getOperand(0)};
4833
4834 SDNode *CmpSwap = DAG.getMachineNode(
4835 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4836 Ops);
4837 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4838 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4839 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4840 Results.push_back(SDValue(CmpSwap, 3));
4841}
4842
4845 SDLoc DL(N);
4846 EVT VT = N->getValueType(0);
4847 switch (N->getOpcode()) {
4848 default:
4849 llvm_unreachable("Don't know how to legalize this operation");
4850 case ISD::ADD:
4851 case ISD::SUB:
4852 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4853 "Unexpected custom legalisation");
4854 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4855 break;
4856 case ISD::SDIV:
4857 case ISD::UDIV:
4858 case ISD::SREM:
4859 case ISD::UREM:
4860 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4861 "Unexpected custom legalisation");
4862 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4863 Subtarget.hasDiv32() && VT == MVT::i32
4865 : ISD::SIGN_EXTEND));
4866 break;
4867 case ISD::SHL:
4868 case ISD::SRA:
4869 case ISD::SRL:
4870 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4871 "Unexpected custom legalisation");
4872 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4873 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4874 break;
4875 }
4876 break;
4877 case ISD::ROTL:
4878 case ISD::ROTR:
4879 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4880 "Unexpected custom legalisation");
4881 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4882 break;
4883 case ISD::FP_TO_SINT: {
4884 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4885 "Unexpected custom legalisation");
4886 SDValue Src = N->getOperand(0);
4887 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4888 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4890 if (!isTypeLegal(Src.getValueType()))
4891 return;
4892 if (Src.getValueType() == MVT::f16)
4893 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4894 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4895 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4896 return;
4897 }
4898 // If the FP type needs to be softened, emit a library call using the 'si'
4899 // version. If we left it to default legalization we'd end up with 'di'.
4900 RTLIB::Libcall LC;
4901 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4902 MakeLibCallOptions CallOptions;
4903 EVT OpVT = Src.getValueType();
4904 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4905 SDValue Chain = SDValue();
4906 SDValue Result;
4907 std::tie(Result, Chain) =
4908 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4909 Results.push_back(Result);
4910 break;
4911 }
4912 case ISD::BITCAST: {
4913 SDValue Src = N->getOperand(0);
4914 EVT SrcVT = Src.getValueType();
4915 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4916 Subtarget.hasBasicF()) {
4917 SDValue Dst =
4918 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4919 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4920 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4921 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4922 DAG.getVTList(MVT::i32, MVT::i32), Src);
4923 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4924 NewReg.getValue(0), NewReg.getValue(1));
4925 Results.push_back(RetReg);
4926 }
4927 break;
4928 }
4929 case ISD::FP_TO_UINT: {
4930 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4931 "Unexpected custom legalisation");
4932 auto &TLI = DAG.getTargetLoweringInfo();
4933 SDValue Tmp1, Tmp2;
4934 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4935 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4936 break;
4937 }
4938 case ISD::BSWAP: {
4939 SDValue Src = N->getOperand(0);
4940 assert((VT == MVT::i16 || VT == MVT::i32) &&
4941 "Unexpected custom legalization");
4942 MVT GRLenVT = Subtarget.getGRLenVT();
4943 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4944 SDValue Tmp;
4945 switch (VT.getSizeInBits()) {
4946 default:
4947 llvm_unreachable("Unexpected operand width");
4948 case 16:
4949 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4950 break;
4951 case 32:
4952 // Only LA64 will get to here due to the size mismatch between VT and
4953 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4954 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4955 break;
4956 }
4957 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4958 break;
4959 }
4960 case ISD::BITREVERSE: {
4961 SDValue Src = N->getOperand(0);
4962 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4963 "Unexpected custom legalization");
4964 MVT GRLenVT = Subtarget.getGRLenVT();
4965 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4966 SDValue Tmp;
4967 switch (VT.getSizeInBits()) {
4968 default:
4969 llvm_unreachable("Unexpected operand width");
4970 case 8:
4971 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4972 break;
4973 case 32:
4974 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4975 break;
4976 }
4977 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4978 break;
4979 }
4980 case ISD::CTLZ:
4981 case ISD::CTTZ: {
4982 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4983 "Unexpected custom legalisation");
4984 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4985 break;
4986 }
4988 SDValue Chain = N->getOperand(0);
4989 SDValue Op2 = N->getOperand(2);
4990 MVT GRLenVT = Subtarget.getGRLenVT();
4991 const StringRef ErrorMsgOOR = "argument out of range";
4992 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4993 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4994
4995 switch (N->getConstantOperandVal(1)) {
4996 default:
4997 llvm_unreachable("Unexpected Intrinsic.");
4998 case Intrinsic::loongarch_movfcsr2gr: {
4999 if (!Subtarget.hasBasicF()) {
5000 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
5001 return;
5002 }
5003 unsigned Imm = Op2->getAsZExtVal();
5004 if (!isUInt<2>(Imm)) {
5005 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5006 return;
5007 }
5008 SDValue MOVFCSR2GRResults = DAG.getNode(
5009 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
5010 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5011 Results.push_back(
5012 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
5013 Results.push_back(MOVFCSR2GRResults.getValue(1));
5014 break;
5015 }
5016#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
5017 case Intrinsic::loongarch_##NAME: { \
5018 SDValue NODE = DAG.getNode( \
5019 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5020 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
5021 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5022 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5023 Results.push_back(NODE.getValue(1)); \
5024 break; \
5025 }
5026 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5027 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5028 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5029 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5030 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5031 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5032#undef CRC_CASE_EXT_BINARYOP
5033
5034#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5035 case Intrinsic::loongarch_##NAME: { \
5036 SDValue NODE = DAG.getNode( \
5037 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5038 {Chain, Op2, \
5039 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5040 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5041 Results.push_back(NODE.getValue(1)); \
5042 break; \
5043 }
5044 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5045 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5046#undef CRC_CASE_EXT_UNARYOP
5047#define CSR_CASE(ID) \
5048 case Intrinsic::loongarch_##ID: { \
5049 if (!Subtarget.is64Bit()) \
5050 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5051 break; \
5052 }
5053 CSR_CASE(csrrd_d);
5054 CSR_CASE(csrwr_d);
5055 CSR_CASE(csrxchg_d);
5056 CSR_CASE(iocsrrd_d);
5057#undef CSR_CASE
5058 case Intrinsic::loongarch_csrrd_w: {
5059 unsigned Imm = Op2->getAsZExtVal();
5060 if (!isUInt<14>(Imm)) {
5061 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5062 return;
5063 }
5064 SDValue CSRRDResults =
5065 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5066 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5067 Results.push_back(
5068 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5069 Results.push_back(CSRRDResults.getValue(1));
5070 break;
5071 }
5072 case Intrinsic::loongarch_csrwr_w: {
5073 unsigned Imm = N->getConstantOperandVal(3);
5074 if (!isUInt<14>(Imm)) {
5075 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5076 return;
5077 }
5078 SDValue CSRWRResults =
5079 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5080 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5081 DAG.getConstant(Imm, DL, GRLenVT)});
5082 Results.push_back(
5083 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5084 Results.push_back(CSRWRResults.getValue(1));
5085 break;
5086 }
5087 case Intrinsic::loongarch_csrxchg_w: {
5088 unsigned Imm = N->getConstantOperandVal(4);
5089 if (!isUInt<14>(Imm)) {
5090 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5091 return;
5092 }
5093 SDValue CSRXCHGResults = DAG.getNode(
5094 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5095 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5096 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5097 DAG.getConstant(Imm, DL, GRLenVT)});
5098 Results.push_back(
5099 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5100 Results.push_back(CSRXCHGResults.getValue(1));
5101 break;
5102 }
5103#define IOCSRRD_CASE(NAME, NODE) \
5104 case Intrinsic::loongarch_##NAME: { \
5105 SDValue IOCSRRDResults = \
5106 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5107 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5108 Results.push_back( \
5109 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5110 Results.push_back(IOCSRRDResults.getValue(1)); \
5111 break; \
5112 }
5113 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5114 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5115 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5116#undef IOCSRRD_CASE
5117 case Intrinsic::loongarch_cpucfg: {
5118 SDValue CPUCFGResults =
5119 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5120 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5121 Results.push_back(
5122 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5123 Results.push_back(CPUCFGResults.getValue(1));
5124 break;
5125 }
5126 case Intrinsic::loongarch_lddir_d: {
5127 if (!Subtarget.is64Bit()) {
5128 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5129 return;
5130 }
5131 break;
5132 }
5133 }
5134 break;
5135 }
5136 case ISD::READ_REGISTER: {
5137 if (Subtarget.is64Bit())
5138 DAG.getContext()->emitError(
5139 "On LA64, only 64-bit registers can be read.");
5140 else
5141 DAG.getContext()->emitError(
5142 "On LA32, only 32-bit registers can be read.");
5143 Results.push_back(DAG.getUNDEF(VT));
5144 Results.push_back(N->getOperand(0));
5145 break;
5146 }
5148 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5149 break;
5150 }
5151 case ISD::LROUND: {
5152 SDValue Op0 = N->getOperand(0);
5153 EVT OpVT = Op0.getValueType();
5154 RTLIB::Libcall LC =
5155 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5156 MakeLibCallOptions CallOptions;
5157 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5158 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5159 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5160 Results.push_back(Result);
5161 break;
5162 }
5163 case ISD::ATOMIC_CMP_SWAP: {
5165 break;
5166 }
5167 case ISD::TRUNCATE: {
5168 MVT VT = N->getSimpleValueType(0);
5169 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5170 return;
5171
5172 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5173 SDValue In = N->getOperand(0);
5174 EVT InVT = In.getValueType();
5175 EVT InEltVT = InVT.getVectorElementType();
5176 EVT EltVT = VT.getVectorElementType();
5177 unsigned MinElts = VT.getVectorNumElements();
5178 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5179 unsigned InBits = InVT.getSizeInBits();
5180
5181 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5182 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5183 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5184 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5185 for (unsigned I = 0; I < MinElts; ++I)
5186 TruncMask[I] = Scale * I;
5187
5188 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5189 MVT SVT = In.getSimpleValueType().getScalarType();
5190 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5191 SDValue WidenIn =
5192 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5193 DAG.getVectorIdxConstant(0, DL));
5194 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5195 "Illegal vector type in truncation");
5196 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5197 Results.push_back(
5198 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5199 return;
5200 }
5201 }
5202
5203 break;
5204 }
5205 }
5206}
5207
5208/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5210 SelectionDAG &DAG) {
5211 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5212
5213 MVT VT = N->getSimpleValueType(0);
5214 if (!VT.is128BitVector() && !VT.is256BitVector())
5215 return SDValue();
5216
5217 SDValue X, Y;
5218 SDValue N0 = N->getOperand(0);
5219 SDValue N1 = N->getOperand(1);
5220
5221 if (SDValue Not = isNOT(N0, DAG)) {
5222 X = Not;
5223 Y = N1;
5224 } else if (SDValue Not = isNOT(N1, DAG)) {
5225 X = Not;
5226 Y = N0;
5227 } else
5228 return SDValue();
5229
5230 X = DAG.getBitcast(VT, X);
5231 Y = DAG.getBitcast(VT, Y);
5232 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5233}
5234
5237 const LoongArchSubtarget &Subtarget) {
5238 if (DCI.isBeforeLegalizeOps())
5239 return SDValue();
5240
5241 SDValue FirstOperand = N->getOperand(0);
5242 SDValue SecondOperand = N->getOperand(1);
5243 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5244 EVT ValTy = N->getValueType(0);
5245 SDLoc DL(N);
5246 uint64_t lsb, msb;
5247 unsigned SMIdx, SMLen;
5248 ConstantSDNode *CN;
5249 SDValue NewOperand;
5250 MVT GRLenVT = Subtarget.getGRLenVT();
5251
5252 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5253 return R;
5254
5255 // BSTRPICK requires the 32S feature.
5256 if (!Subtarget.has32S())
5257 return SDValue();
5258
5259 // Op's second operand must be a shifted mask.
5260 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5261 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5262 return SDValue();
5263
5264 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5265 // Pattern match BSTRPICK.
5266 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5267 // => BSTRPICK $dst, $src, msb, lsb
5268 // where msb = lsb + len - 1
5269
5270 // The second operand of the shift must be an immediate.
5271 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5272 return SDValue();
5273
5274 lsb = CN->getZExtValue();
5275
5276 // Return if the shifted mask does not start at bit 0 or the sum of its
5277 // length and lsb exceeds the word's size.
5278 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5279 return SDValue();
5280
5281 NewOperand = FirstOperand.getOperand(0);
5282 } else {
5283 // Pattern match BSTRPICK.
5284 // $dst = and $src, (2**len- 1) , if len > 12
5285 // => BSTRPICK $dst, $src, msb, lsb
5286 // where lsb = 0 and msb = len - 1
5287
5288 // If the mask is <= 0xfff, andi can be used instead.
5289 if (CN->getZExtValue() <= 0xfff)
5290 return SDValue();
5291
5292 // Return if the MSB exceeds.
5293 if (SMIdx + SMLen > ValTy.getSizeInBits())
5294 return SDValue();
5295
5296 if (SMIdx > 0) {
5297 // Omit if the constant has more than 2 uses. This a conservative
5298 // decision. Whether it is a win depends on the HW microarchitecture.
5299 // However it should always be better for 1 and 2 uses.
5300 if (CN->use_size() > 2)
5301 return SDValue();
5302 // Return if the constant can be composed by a single LU12I.W.
5303 if ((CN->getZExtValue() & 0xfff) == 0)
5304 return SDValue();
5305 // Return if the constand can be composed by a single ADDI with
5306 // the zero register.
5307 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5308 return SDValue();
5309 }
5310
5311 lsb = SMIdx;
5312 NewOperand = FirstOperand;
5313 }
5314
5315 msb = lsb + SMLen - 1;
5316 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5317 DAG.getConstant(msb, DL, GRLenVT),
5318 DAG.getConstant(lsb, DL, GRLenVT));
5319 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5320 return NR0;
5321 // Try to optimize to
5322 // bstrpick $Rd, $Rs, msb, lsb
5323 // slli $Rd, $Rd, lsb
5324 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5325 DAG.getConstant(lsb, DL, GRLenVT));
5326}
5327
5330 const LoongArchSubtarget &Subtarget) {
5331 // BSTRPICK requires the 32S feature.
5332 if (!Subtarget.has32S())
5333 return SDValue();
5334
5335 if (DCI.isBeforeLegalizeOps())
5336 return SDValue();
5337
5338 // $dst = srl (and $src, Mask), Shamt
5339 // =>
5340 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5341 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5342 //
5343
5344 SDValue FirstOperand = N->getOperand(0);
5345 ConstantSDNode *CN;
5346 EVT ValTy = N->getValueType(0);
5347 SDLoc DL(N);
5348 MVT GRLenVT = Subtarget.getGRLenVT();
5349 unsigned MaskIdx, MaskLen;
5350 uint64_t Shamt;
5351
5352 // The first operand must be an AND and the second operand of the AND must be
5353 // a shifted mask.
5354 if (FirstOperand.getOpcode() != ISD::AND ||
5355 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5356 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5357 return SDValue();
5358
5359 // The second operand (shift amount) must be an immediate.
5360 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5361 return SDValue();
5362
5363 Shamt = CN->getZExtValue();
5364 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5365 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5366 FirstOperand->getOperand(0),
5367 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5368 DAG.getConstant(Shamt, DL, GRLenVT));
5369
5370 return SDValue();
5371}
5372
5373// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5374// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5375static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5376 unsigned Depth) {
5377 // Limit recursion.
5379 return false;
5380 switch (Src.getOpcode()) {
5381 case ISD::SETCC:
5382 case ISD::TRUNCATE:
5383 return Src.getOperand(0).getValueSizeInBits() == Size;
5384 case ISD::FREEZE:
5385 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5386 case ISD::AND:
5387 case ISD::XOR:
5388 case ISD::OR:
5389 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5390 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5391 case ISD::SELECT:
5392 case ISD::VSELECT:
5393 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5394 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5395 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5396 case ISD::BUILD_VECTOR:
5397 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5398 ISD::isBuildVectorAllOnes(Src.getNode());
5399 }
5400 return false;
5401}
5402
5403// Helper to push sign extension of vXi1 SETCC result through bitops.
5405 SDValue Src, const SDLoc &DL) {
5406 switch (Src.getOpcode()) {
5407 case ISD::SETCC:
5408 case ISD::FREEZE:
5409 case ISD::TRUNCATE:
5410 case ISD::BUILD_VECTOR:
5411 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5412 case ISD::AND:
5413 case ISD::XOR:
5414 case ISD::OR:
5415 return DAG.getNode(
5416 Src.getOpcode(), DL, SExtVT,
5417 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5418 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5419 case ISD::SELECT:
5420 case ISD::VSELECT:
5421 return DAG.getSelect(
5422 DL, SExtVT, Src.getOperand(0),
5423 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5424 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5425 }
5426 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5427}
5428
5429static SDValue
5432 const LoongArchSubtarget &Subtarget) {
5433 SDLoc DL(N);
5434 EVT VT = N->getValueType(0);
5435 SDValue Src = N->getOperand(0);
5436 EVT SrcVT = Src.getValueType();
5437
5438 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5439 return SDValue();
5440
5441 bool UseLASX;
5442 unsigned Opc = ISD::DELETED_NODE;
5443 EVT CmpVT = Src.getOperand(0).getValueType();
5444 EVT EltVT = CmpVT.getVectorElementType();
5445
5446 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5447 UseLASX = false;
5448 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5449 CmpVT.getSizeInBits() == 256)
5450 UseLASX = true;
5451 else
5452 return SDValue();
5453
5454 SDValue SrcN1 = Src.getOperand(1);
5455 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5456 default:
5457 break;
5458 case ISD::SETEQ:
5459 // x == 0 => not (vmsknez.b x)
5460 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5461 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5462 break;
5463 case ISD::SETGT:
5464 // x > -1 => vmskgez.b x
5465 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5466 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5467 break;
5468 case ISD::SETGE:
5469 // x >= 0 => vmskgez.b x
5470 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5471 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5472 break;
5473 case ISD::SETLT:
5474 // x < 0 => vmskltz.{b,h,w,d} x
5475 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5476 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5477 EltVT == MVT::i64))
5478 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5479 break;
5480 case ISD::SETLE:
5481 // x <= -1 => vmskltz.{b,h,w,d} x
5482 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5483 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5484 EltVT == MVT::i64))
5485 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5486 break;
5487 case ISD::SETNE:
5488 // x != 0 => vmsknez.b x
5489 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5490 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5491 break;
5492 }
5493
5494 if (Opc == ISD::DELETED_NODE)
5495 return SDValue();
5496
5497 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5499 V = DAG.getZExtOrTrunc(V, DL, T);
5500 return DAG.getBitcast(VT, V);
5501}
5502
5505 const LoongArchSubtarget &Subtarget) {
5506 SDLoc DL(N);
5507 EVT VT = N->getValueType(0);
5508 SDValue Src = N->getOperand(0);
5509 EVT SrcVT = Src.getValueType();
5510 MVT GRLenVT = Subtarget.getGRLenVT();
5511
5512 if (!DCI.isBeforeLegalizeOps())
5513 return SDValue();
5514
5515 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5516 return SDValue();
5517
5518 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5519 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5520 if (Res)
5521 return Res;
5522
5523 // Generate vXi1 using [X]VMSKLTZ
5524 MVT SExtVT;
5525 unsigned Opc;
5526 bool UseLASX = false;
5527 bool PropagateSExt = false;
5528
5529 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5530 EVT CmpVT = Src.getOperand(0).getValueType();
5531 if (CmpVT.getSizeInBits() > 256)
5532 return SDValue();
5533 }
5534
5535 switch (SrcVT.getSimpleVT().SimpleTy) {
5536 default:
5537 return SDValue();
5538 case MVT::v2i1:
5539 SExtVT = MVT::v2i64;
5540 break;
5541 case MVT::v4i1:
5542 SExtVT = MVT::v4i32;
5543 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5544 SExtVT = MVT::v4i64;
5545 UseLASX = true;
5546 PropagateSExt = true;
5547 }
5548 break;
5549 case MVT::v8i1:
5550 SExtVT = MVT::v8i16;
5551 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5552 SExtVT = MVT::v8i32;
5553 UseLASX = true;
5554 PropagateSExt = true;
5555 }
5556 break;
5557 case MVT::v16i1:
5558 SExtVT = MVT::v16i8;
5559 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5560 SExtVT = MVT::v16i16;
5561 UseLASX = true;
5562 PropagateSExt = true;
5563 }
5564 break;
5565 case MVT::v32i1:
5566 SExtVT = MVT::v32i8;
5567 UseLASX = true;
5568 break;
5569 };
5570 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5571 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5572
5573 SDValue V;
5574 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5575 if (Src.getSimpleValueType() == MVT::v32i8) {
5576 SDValue Lo, Hi;
5577 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5578 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5579 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5580 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5581 DAG.getShiftAmountConstant(16, GRLenVT, DL));
5582 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5583 } else if (UseLASX) {
5584 return SDValue();
5585 }
5586 }
5587
5588 if (!V) {
5589 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5590 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5591 }
5592
5594 V = DAG.getZExtOrTrunc(V, DL, T);
5595 return DAG.getBitcast(VT, V);
5596}
5597
5600 const LoongArchSubtarget &Subtarget) {
5601 MVT GRLenVT = Subtarget.getGRLenVT();
5602 EVT ValTy = N->getValueType(0);
5603 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5604 ConstantSDNode *CN0, *CN1;
5605 SDLoc DL(N);
5606 unsigned ValBits = ValTy.getSizeInBits();
5607 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5608 unsigned Shamt;
5609 bool SwapAndRetried = false;
5610
5611 // BSTRPICK requires the 32S feature.
5612 if (!Subtarget.has32S())
5613 return SDValue();
5614
5615 if (DCI.isBeforeLegalizeOps())
5616 return SDValue();
5617
5618 if (ValBits != 32 && ValBits != 64)
5619 return SDValue();
5620
5621Retry:
5622 // 1st pattern to match BSTRINS:
5623 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5624 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5625 // =>
5626 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5627 if (N0.getOpcode() == ISD::AND &&
5628 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5629 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5630 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5631 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5632 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5633 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5634 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5635 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5636 (MaskIdx0 + MaskLen0 <= ValBits)) {
5637 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5638 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5639 N1.getOperand(0).getOperand(0),
5640 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5641 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5642 }
5643
5644 // 2nd pattern to match BSTRINS:
5645 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5646 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5647 // =>
5648 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5649 if (N0.getOpcode() == ISD::AND &&
5650 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5651 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5652 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5653 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5654 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5655 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5656 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5657 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5658 (MaskIdx0 + MaskLen0 <= ValBits)) {
5659 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5660 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5661 N1.getOperand(0).getOperand(0),
5662 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5663 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5664 }
5665
5666 // 3rd pattern to match BSTRINS:
5667 // R = or (and X, mask0), (and Y, mask1)
5668 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5669 // =>
5670 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5671 // where msb = lsb + size - 1
5672 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5673 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5674 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5675 (MaskIdx0 + MaskLen0 <= 64) &&
5676 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5677 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5678 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5679 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5680 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5681 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5682 DAG.getConstant(ValBits == 32
5683 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5684 : (MaskIdx0 + MaskLen0 - 1),
5685 DL, GRLenVT),
5686 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5687 }
5688
5689 // 4th pattern to match BSTRINS:
5690 // R = or (and X, mask), (shl Y, shamt)
5691 // where mask = (2**shamt - 1)
5692 // =>
5693 // R = BSTRINS X, Y, ValBits - 1, shamt
5694 // where ValBits = 32 or 64
5695 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5696 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5697 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5698 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5699 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5700 (MaskIdx0 + MaskLen0 <= ValBits)) {
5701 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5702 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5703 N1.getOperand(0),
5704 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5705 DAG.getConstant(Shamt, DL, GRLenVT));
5706 }
5707
5708 // 5th pattern to match BSTRINS:
5709 // R = or (and X, mask), const
5710 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5711 // =>
5712 // R = BSTRINS X, (const >> lsb), msb, lsb
5713 // where msb = lsb + size - 1
5714 if (N0.getOpcode() == ISD::AND &&
5715 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5716 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5717 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5718 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5719 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5720 return DAG.getNode(
5721 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5722 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5723 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5724 : (MaskIdx0 + MaskLen0 - 1),
5725 DL, GRLenVT),
5726 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5727 }
5728
5729 // 6th pattern.
5730 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5731 // by the incoming bits are known to be zero.
5732 // =>
5733 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5734 //
5735 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5736 // pattern is more common than the 1st. So we put the 1st before the 6th in
5737 // order to match as many nodes as possible.
5738 ConstantSDNode *CNMask, *CNShamt;
5739 unsigned MaskIdx, MaskLen;
5740 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5741 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5742 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5743 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5744 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5745 Shamt = CNShamt->getZExtValue();
5746 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5747 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5748 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5749 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5750 N1.getOperand(0).getOperand(0),
5751 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5752 DAG.getConstant(Shamt, DL, GRLenVT));
5753 }
5754 }
5755
5756 // 7th pattern.
5757 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5758 // overwritten by the incoming bits are known to be zero.
5759 // =>
5760 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5761 //
5762 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5763 // before the 7th in order to match as many nodes as possible.
5764 if (N1.getOpcode() == ISD::AND &&
5765 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5766 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5767 N1.getOperand(0).getOpcode() == ISD::SHL &&
5768 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5769 CNShamt->getZExtValue() == MaskIdx) {
5770 APInt ShMask(ValBits, CNMask->getZExtValue());
5771 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5772 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5773 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5774 N1.getOperand(0).getOperand(0),
5775 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5776 DAG.getConstant(MaskIdx, DL, GRLenVT));
5777 }
5778 }
5779
5780 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5781 if (!SwapAndRetried) {
5782 std::swap(N0, N1);
5783 SwapAndRetried = true;
5784 goto Retry;
5785 }
5786
5787 SwapAndRetried = false;
5788Retry2:
5789 // 8th pattern.
5790 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5791 // the incoming bits are known to be zero.
5792 // =>
5793 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5794 //
5795 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5796 // we put it here in order to match as many nodes as possible or generate less
5797 // instructions.
5798 if (N1.getOpcode() == ISD::AND &&
5799 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5800 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5801 APInt ShMask(ValBits, CNMask->getZExtValue());
5802 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5803 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5804 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5805 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5806 N1->getOperand(0),
5807 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5808 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5809 DAG.getConstant(MaskIdx, DL, GRLenVT));
5810 }
5811 }
5812 // Swap N0/N1 and retry.
5813 if (!SwapAndRetried) {
5814 std::swap(N0, N1);
5815 SwapAndRetried = true;
5816 goto Retry2;
5817 }
5818
5819 return SDValue();
5820}
5821
5822static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5823 ExtType = ISD::NON_EXTLOAD;
5824
5825 switch (V.getNode()->getOpcode()) {
5826 case ISD::LOAD: {
5827 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5828 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5829 (LoadNode->getMemoryVT() == MVT::i16)) {
5830 ExtType = LoadNode->getExtensionType();
5831 return true;
5832 }
5833 return false;
5834 }
5835 case ISD::AssertSext: {
5836 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5837 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5838 ExtType = ISD::SEXTLOAD;
5839 return true;
5840 }
5841 return false;
5842 }
5843 case ISD::AssertZext: {
5844 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5845 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5846 ExtType = ISD::ZEXTLOAD;
5847 return true;
5848 }
5849 return false;
5850 }
5851 default:
5852 return false;
5853 }
5854
5855 return false;
5856}
5857
5858// Eliminate redundant truncation and zero-extension nodes.
5859// * Case 1:
5860// +------------+ +------------+ +------------+
5861// | Input1 | | Input2 | | CC |
5862// +------------+ +------------+ +------------+
5863// | | |
5864// V V +----+
5865// +------------+ +------------+ |
5866// | TRUNCATE | | TRUNCATE | |
5867// +------------+ +------------+ |
5868// | | |
5869// V V |
5870// +------------+ +------------+ |
5871// | ZERO_EXT | | ZERO_EXT | |
5872// +------------+ +------------+ |
5873// | | |
5874// | +-------------+ |
5875// V V | |
5876// +----------------+ | |
5877// | AND | | |
5878// +----------------+ | |
5879// | | |
5880// +---------------+ | |
5881// | | |
5882// V V V
5883// +-------------+
5884// | CMP |
5885// +-------------+
5886// * Case 2:
5887// +------------+ +------------+ +-------------+ +------------+ +------------+
5888// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5889// +------------+ +------------+ +-------------+ +------------+ +------------+
5890// | | | | |
5891// V | | | |
5892// +------------+ | | | |
5893// | XOR |<---------------------+ | |
5894// +------------+ | | |
5895// | | | |
5896// V V +---------------+ |
5897// +------------+ +------------+ | |
5898// | TRUNCATE | | TRUNCATE | | +-------------------------+
5899// +------------+ +------------+ | |
5900// | | | |
5901// V V | |
5902// +------------+ +------------+ | |
5903// | ZERO_EXT | | ZERO_EXT | | |
5904// +------------+ +------------+ | |
5905// | | | |
5906// V V | |
5907// +----------------+ | |
5908// | AND | | |
5909// +----------------+ | |
5910// | | |
5911// +---------------+ | |
5912// | | |
5913// V V V
5914// +-------------+
5915// | CMP |
5916// +-------------+
5919 const LoongArchSubtarget &Subtarget) {
5920 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5921
5922 SDNode *AndNode = N->getOperand(0).getNode();
5923 if (AndNode->getOpcode() != ISD::AND)
5924 return SDValue();
5925
5926 SDValue AndInputValue2 = AndNode->getOperand(1);
5927 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5928 return SDValue();
5929
5930 SDValue CmpInputValue = N->getOperand(1);
5931 SDValue AndInputValue1 = AndNode->getOperand(0);
5932 if (AndInputValue1.getOpcode() == ISD::XOR) {
5933 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5934 return SDValue();
5935 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5936 if (!CN || !CN->isAllOnes())
5937 return SDValue();
5938 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5939 if (!CN || !CN->isZero())
5940 return SDValue();
5941 AndInputValue1 = AndInputValue1.getOperand(0);
5942 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5943 return SDValue();
5944 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5945 if (AndInputValue2 != CmpInputValue)
5946 return SDValue();
5947 } else {
5948 return SDValue();
5949 }
5950
5951 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5952 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5953 return SDValue();
5954
5955 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5956 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5957 return SDValue();
5958
5959 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5960 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5961 ISD::LoadExtType ExtType1;
5962 ISD::LoadExtType ExtType2;
5963
5964 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5965 !checkValueWidth(TruncInputValue2, ExtType2))
5966 return SDValue();
5967
5968 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5969 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5970 return SDValue();
5971
5972 if ((ExtType2 != ISD::ZEXTLOAD) &&
5973 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5974 return SDValue();
5975
5976 // These truncation and zero-extension nodes are not necessary, remove them.
5977 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5978 TruncInputValue1, TruncInputValue2);
5979 SDValue NewSetCC =
5980 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5981 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5982 return SDValue(N, 0);
5983}
5984
5985// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5988 const LoongArchSubtarget &Subtarget) {
5989 if (DCI.isBeforeLegalizeOps())
5990 return SDValue();
5991
5992 SDValue Src = N->getOperand(0);
5993 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5994 return SDValue();
5995
5996 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5997 Src.getOperand(0));
5998}
5999
6000// Perform common combines for BR_CC and SELECT_CC conditions.
6001static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
6002 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
6003 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6004
6005 // As far as arithmetic right shift always saves the sign,
6006 // shift can be omitted.
6007 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
6008 // setge (sra X, N), 0 -> setge X, 0
6009 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
6010 LHS.getOpcode() == ISD::SRA) {
6011 LHS = LHS.getOperand(0);
6012 return true;
6013 }
6014
6015 if (!ISD::isIntEqualitySetCC(CCVal))
6016 return false;
6017
6018 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
6019 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
6020 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
6021 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
6022 // If we're looking for eq 0 instead of ne 0, we need to invert the
6023 // condition.
6024 bool Invert = CCVal == ISD::SETEQ;
6025 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6026 if (Invert)
6027 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6028
6029 RHS = LHS.getOperand(1);
6030 LHS = LHS.getOperand(0);
6031 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6032
6033 CC = DAG.getCondCode(CCVal);
6034 return true;
6035 }
6036
6037 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6038 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6039 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6040 SDValue LHS0 = LHS.getOperand(0);
6041 if (LHS0.getOpcode() == ISD::AND &&
6042 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6043 uint64_t Mask = LHS0.getConstantOperandVal(1);
6044 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6045 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6046 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6047 CC = DAG.getCondCode(CCVal);
6048
6049 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6050 LHS = LHS0.getOperand(0);
6051 if (ShAmt != 0)
6052 LHS =
6053 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6054 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6055 return true;
6056 }
6057 }
6058 }
6059
6060 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6061 // This can occur when legalizing some floating point comparisons.
6062 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6063 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6064 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6065 CC = DAG.getCondCode(CCVal);
6066 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6067 return true;
6068 }
6069
6070 return false;
6071}
6072
6075 const LoongArchSubtarget &Subtarget) {
6076 SDValue LHS = N->getOperand(1);
6077 SDValue RHS = N->getOperand(2);
6078 SDValue CC = N->getOperand(3);
6079 SDLoc DL(N);
6080
6081 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6082 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6083 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6084
6085 return SDValue();
6086}
6087
6090 const LoongArchSubtarget &Subtarget) {
6091 // Transform
6092 SDValue LHS = N->getOperand(0);
6093 SDValue RHS = N->getOperand(1);
6094 SDValue CC = N->getOperand(2);
6095 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6096 SDValue TrueV = N->getOperand(3);
6097 SDValue FalseV = N->getOperand(4);
6098 SDLoc DL(N);
6099 EVT VT = N->getValueType(0);
6100
6101 // If the True and False values are the same, we don't need a select_cc.
6102 if (TrueV == FalseV)
6103 return TrueV;
6104
6105 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6106 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6107 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6109 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6110 if (CCVal == ISD::CondCode::SETGE)
6111 std::swap(TrueV, FalseV);
6112
6113 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6114 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6115 // Only handle simm12, if it is not in this range, it can be considered as
6116 // register.
6117 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6118 isInt<12>(TrueSImm - FalseSImm)) {
6119 SDValue SRA =
6120 DAG.getNode(ISD::SRA, DL, VT, LHS,
6121 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6122 SDValue AND =
6123 DAG.getNode(ISD::AND, DL, VT, SRA,
6124 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6125 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6126 }
6127
6128 if (CCVal == ISD::CondCode::SETGE)
6129 std::swap(TrueV, FalseV);
6130 }
6131
6132 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6133 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6134 {LHS, RHS, CC, TrueV, FalseV});
6135
6136 return SDValue();
6137}
6138
6139template <unsigned N>
6141 SelectionDAG &DAG,
6142 const LoongArchSubtarget &Subtarget,
6143 bool IsSigned = false) {
6144 SDLoc DL(Node);
6145 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6146 // Check the ImmArg.
6147 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6148 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6149 DAG.getContext()->emitError(Node->getOperationName(0) +
6150 ": argument out of range.");
6151 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6152 }
6153 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6154}
6155
6156template <unsigned N>
6157static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6158 SelectionDAG &DAG, bool IsSigned = false) {
6159 SDLoc DL(Node);
6160 EVT ResTy = Node->getValueType(0);
6161 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6162
6163 // Check the ImmArg.
6164 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6165 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6166 DAG.getContext()->emitError(Node->getOperationName(0) +
6167 ": argument out of range.");
6168 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6169 }
6170 return DAG.getConstant(
6172 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6173 DL, ResTy);
6174}
6175
6177 SDLoc DL(Node);
6178 EVT ResTy = Node->getValueType(0);
6179 SDValue Vec = Node->getOperand(2);
6180 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6181 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6182}
6183
6185 SDLoc DL(Node);
6186 EVT ResTy = Node->getValueType(0);
6187 SDValue One = DAG.getConstant(1, DL, ResTy);
6188 SDValue Bit =
6189 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6190
6191 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6192 DAG.getNOT(DL, Bit, ResTy));
6193}
6194
6195template <unsigned N>
6197 SDLoc DL(Node);
6198 EVT ResTy = Node->getValueType(0);
6199 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6200 // Check the unsigned ImmArg.
6201 if (!isUInt<N>(CImm->getZExtValue())) {
6202 DAG.getContext()->emitError(Node->getOperationName(0) +
6203 ": argument out of range.");
6204 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6205 }
6206
6207 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6208 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6209
6210 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6211}
6212
6213template <unsigned N>
6215 SDLoc DL(Node);
6216 EVT ResTy = Node->getValueType(0);
6217 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6218 // Check the unsigned ImmArg.
6219 if (!isUInt<N>(CImm->getZExtValue())) {
6220 DAG.getContext()->emitError(Node->getOperationName(0) +
6221 ": argument out of range.");
6222 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6223 }
6224
6225 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6226 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6227 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6228}
6229
6230template <unsigned N>
6232 SDLoc DL(Node);
6233 EVT ResTy = Node->getValueType(0);
6234 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6235 // Check the unsigned ImmArg.
6236 if (!isUInt<N>(CImm->getZExtValue())) {
6237 DAG.getContext()->emitError(Node->getOperationName(0) +
6238 ": argument out of range.");
6239 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6240 }
6241
6242 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6243 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6244 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6245}
6246
6247template <unsigned W>
6249 unsigned ResOp) {
6250 unsigned Imm = N->getConstantOperandVal(2);
6251 if (!isUInt<W>(Imm)) {
6252 const StringRef ErrorMsg = "argument out of range";
6253 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6254 return DAG.getUNDEF(N->getValueType(0));
6255 }
6256 SDLoc DL(N);
6257 SDValue Vec = N->getOperand(1);
6258 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6260 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6261}
6262
6263static SDValue
6266 const LoongArchSubtarget &Subtarget) {
6267 SDLoc DL(N);
6268 switch (N->getConstantOperandVal(0)) {
6269 default:
6270 break;
6271 case Intrinsic::loongarch_lsx_vadd_b:
6272 case Intrinsic::loongarch_lsx_vadd_h:
6273 case Intrinsic::loongarch_lsx_vadd_w:
6274 case Intrinsic::loongarch_lsx_vadd_d:
6275 case Intrinsic::loongarch_lasx_xvadd_b:
6276 case Intrinsic::loongarch_lasx_xvadd_h:
6277 case Intrinsic::loongarch_lasx_xvadd_w:
6278 case Intrinsic::loongarch_lasx_xvadd_d:
6279 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6280 N->getOperand(2));
6281 case Intrinsic::loongarch_lsx_vaddi_bu:
6282 case Intrinsic::loongarch_lsx_vaddi_hu:
6283 case Intrinsic::loongarch_lsx_vaddi_wu:
6284 case Intrinsic::loongarch_lsx_vaddi_du:
6285 case Intrinsic::loongarch_lasx_xvaddi_bu:
6286 case Intrinsic::loongarch_lasx_xvaddi_hu:
6287 case Intrinsic::loongarch_lasx_xvaddi_wu:
6288 case Intrinsic::loongarch_lasx_xvaddi_du:
6289 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6290 lowerVectorSplatImm<5>(N, 2, DAG));
6291 case Intrinsic::loongarch_lsx_vsub_b:
6292 case Intrinsic::loongarch_lsx_vsub_h:
6293 case Intrinsic::loongarch_lsx_vsub_w:
6294 case Intrinsic::loongarch_lsx_vsub_d:
6295 case Intrinsic::loongarch_lasx_xvsub_b:
6296 case Intrinsic::loongarch_lasx_xvsub_h:
6297 case Intrinsic::loongarch_lasx_xvsub_w:
6298 case Intrinsic::loongarch_lasx_xvsub_d:
6299 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6300 N->getOperand(2));
6301 case Intrinsic::loongarch_lsx_vsubi_bu:
6302 case Intrinsic::loongarch_lsx_vsubi_hu:
6303 case Intrinsic::loongarch_lsx_vsubi_wu:
6304 case Intrinsic::loongarch_lsx_vsubi_du:
6305 case Intrinsic::loongarch_lasx_xvsubi_bu:
6306 case Intrinsic::loongarch_lasx_xvsubi_hu:
6307 case Intrinsic::loongarch_lasx_xvsubi_wu:
6308 case Intrinsic::loongarch_lasx_xvsubi_du:
6309 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6310 lowerVectorSplatImm<5>(N, 2, DAG));
6311 case Intrinsic::loongarch_lsx_vneg_b:
6312 case Intrinsic::loongarch_lsx_vneg_h:
6313 case Intrinsic::loongarch_lsx_vneg_w:
6314 case Intrinsic::loongarch_lsx_vneg_d:
6315 case Intrinsic::loongarch_lasx_xvneg_b:
6316 case Intrinsic::loongarch_lasx_xvneg_h:
6317 case Intrinsic::loongarch_lasx_xvneg_w:
6318 case Intrinsic::loongarch_lasx_xvneg_d:
6319 return DAG.getNode(
6320 ISD::SUB, DL, N->getValueType(0),
6321 DAG.getConstant(
6322 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6323 /*isSigned=*/true),
6324 SDLoc(N), N->getValueType(0)),
6325 N->getOperand(1));
6326 case Intrinsic::loongarch_lsx_vmax_b:
6327 case Intrinsic::loongarch_lsx_vmax_h:
6328 case Intrinsic::loongarch_lsx_vmax_w:
6329 case Intrinsic::loongarch_lsx_vmax_d:
6330 case Intrinsic::loongarch_lasx_xvmax_b:
6331 case Intrinsic::loongarch_lasx_xvmax_h:
6332 case Intrinsic::loongarch_lasx_xvmax_w:
6333 case Intrinsic::loongarch_lasx_xvmax_d:
6334 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6335 N->getOperand(2));
6336 case Intrinsic::loongarch_lsx_vmax_bu:
6337 case Intrinsic::loongarch_lsx_vmax_hu:
6338 case Intrinsic::loongarch_lsx_vmax_wu:
6339 case Intrinsic::loongarch_lsx_vmax_du:
6340 case Intrinsic::loongarch_lasx_xvmax_bu:
6341 case Intrinsic::loongarch_lasx_xvmax_hu:
6342 case Intrinsic::loongarch_lasx_xvmax_wu:
6343 case Intrinsic::loongarch_lasx_xvmax_du:
6344 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6345 N->getOperand(2));
6346 case Intrinsic::loongarch_lsx_vmaxi_b:
6347 case Intrinsic::loongarch_lsx_vmaxi_h:
6348 case Intrinsic::loongarch_lsx_vmaxi_w:
6349 case Intrinsic::loongarch_lsx_vmaxi_d:
6350 case Intrinsic::loongarch_lasx_xvmaxi_b:
6351 case Intrinsic::loongarch_lasx_xvmaxi_h:
6352 case Intrinsic::loongarch_lasx_xvmaxi_w:
6353 case Intrinsic::loongarch_lasx_xvmaxi_d:
6354 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6355 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6356 case Intrinsic::loongarch_lsx_vmaxi_bu:
6357 case Intrinsic::loongarch_lsx_vmaxi_hu:
6358 case Intrinsic::loongarch_lsx_vmaxi_wu:
6359 case Intrinsic::loongarch_lsx_vmaxi_du:
6360 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6361 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6362 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6363 case Intrinsic::loongarch_lasx_xvmaxi_du:
6364 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6365 lowerVectorSplatImm<5>(N, 2, DAG));
6366 case Intrinsic::loongarch_lsx_vmin_b:
6367 case Intrinsic::loongarch_lsx_vmin_h:
6368 case Intrinsic::loongarch_lsx_vmin_w:
6369 case Intrinsic::loongarch_lsx_vmin_d:
6370 case Intrinsic::loongarch_lasx_xvmin_b:
6371 case Intrinsic::loongarch_lasx_xvmin_h:
6372 case Intrinsic::loongarch_lasx_xvmin_w:
6373 case Intrinsic::loongarch_lasx_xvmin_d:
6374 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6375 N->getOperand(2));
6376 case Intrinsic::loongarch_lsx_vmin_bu:
6377 case Intrinsic::loongarch_lsx_vmin_hu:
6378 case Intrinsic::loongarch_lsx_vmin_wu:
6379 case Intrinsic::loongarch_lsx_vmin_du:
6380 case Intrinsic::loongarch_lasx_xvmin_bu:
6381 case Intrinsic::loongarch_lasx_xvmin_hu:
6382 case Intrinsic::loongarch_lasx_xvmin_wu:
6383 case Intrinsic::loongarch_lasx_xvmin_du:
6384 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6385 N->getOperand(2));
6386 case Intrinsic::loongarch_lsx_vmini_b:
6387 case Intrinsic::loongarch_lsx_vmini_h:
6388 case Intrinsic::loongarch_lsx_vmini_w:
6389 case Intrinsic::loongarch_lsx_vmini_d:
6390 case Intrinsic::loongarch_lasx_xvmini_b:
6391 case Intrinsic::loongarch_lasx_xvmini_h:
6392 case Intrinsic::loongarch_lasx_xvmini_w:
6393 case Intrinsic::loongarch_lasx_xvmini_d:
6394 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6395 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6396 case Intrinsic::loongarch_lsx_vmini_bu:
6397 case Intrinsic::loongarch_lsx_vmini_hu:
6398 case Intrinsic::loongarch_lsx_vmini_wu:
6399 case Intrinsic::loongarch_lsx_vmini_du:
6400 case Intrinsic::loongarch_lasx_xvmini_bu:
6401 case Intrinsic::loongarch_lasx_xvmini_hu:
6402 case Intrinsic::loongarch_lasx_xvmini_wu:
6403 case Intrinsic::loongarch_lasx_xvmini_du:
6404 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6405 lowerVectorSplatImm<5>(N, 2, DAG));
6406 case Intrinsic::loongarch_lsx_vmul_b:
6407 case Intrinsic::loongarch_lsx_vmul_h:
6408 case Intrinsic::loongarch_lsx_vmul_w:
6409 case Intrinsic::loongarch_lsx_vmul_d:
6410 case Intrinsic::loongarch_lasx_xvmul_b:
6411 case Intrinsic::loongarch_lasx_xvmul_h:
6412 case Intrinsic::loongarch_lasx_xvmul_w:
6413 case Intrinsic::loongarch_lasx_xvmul_d:
6414 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6415 N->getOperand(2));
6416 case Intrinsic::loongarch_lsx_vmadd_b:
6417 case Intrinsic::loongarch_lsx_vmadd_h:
6418 case Intrinsic::loongarch_lsx_vmadd_w:
6419 case Intrinsic::loongarch_lsx_vmadd_d:
6420 case Intrinsic::loongarch_lasx_xvmadd_b:
6421 case Intrinsic::loongarch_lasx_xvmadd_h:
6422 case Intrinsic::loongarch_lasx_xvmadd_w:
6423 case Intrinsic::loongarch_lasx_xvmadd_d: {
6424 EVT ResTy = N->getValueType(0);
6425 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6426 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6427 N->getOperand(3)));
6428 }
6429 case Intrinsic::loongarch_lsx_vmsub_b:
6430 case Intrinsic::loongarch_lsx_vmsub_h:
6431 case Intrinsic::loongarch_lsx_vmsub_w:
6432 case Intrinsic::loongarch_lsx_vmsub_d:
6433 case Intrinsic::loongarch_lasx_xvmsub_b:
6434 case Intrinsic::loongarch_lasx_xvmsub_h:
6435 case Intrinsic::loongarch_lasx_xvmsub_w:
6436 case Intrinsic::loongarch_lasx_xvmsub_d: {
6437 EVT ResTy = N->getValueType(0);
6438 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6439 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6440 N->getOperand(3)));
6441 }
6442 case Intrinsic::loongarch_lsx_vdiv_b:
6443 case Intrinsic::loongarch_lsx_vdiv_h:
6444 case Intrinsic::loongarch_lsx_vdiv_w:
6445 case Intrinsic::loongarch_lsx_vdiv_d:
6446 case Intrinsic::loongarch_lasx_xvdiv_b:
6447 case Intrinsic::loongarch_lasx_xvdiv_h:
6448 case Intrinsic::loongarch_lasx_xvdiv_w:
6449 case Intrinsic::loongarch_lasx_xvdiv_d:
6450 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6451 N->getOperand(2));
6452 case Intrinsic::loongarch_lsx_vdiv_bu:
6453 case Intrinsic::loongarch_lsx_vdiv_hu:
6454 case Intrinsic::loongarch_lsx_vdiv_wu:
6455 case Intrinsic::loongarch_lsx_vdiv_du:
6456 case Intrinsic::loongarch_lasx_xvdiv_bu:
6457 case Intrinsic::loongarch_lasx_xvdiv_hu:
6458 case Intrinsic::loongarch_lasx_xvdiv_wu:
6459 case Intrinsic::loongarch_lasx_xvdiv_du:
6460 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6461 N->getOperand(2));
6462 case Intrinsic::loongarch_lsx_vmod_b:
6463 case Intrinsic::loongarch_lsx_vmod_h:
6464 case Intrinsic::loongarch_lsx_vmod_w:
6465 case Intrinsic::loongarch_lsx_vmod_d:
6466 case Intrinsic::loongarch_lasx_xvmod_b:
6467 case Intrinsic::loongarch_lasx_xvmod_h:
6468 case Intrinsic::loongarch_lasx_xvmod_w:
6469 case Intrinsic::loongarch_lasx_xvmod_d:
6470 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6471 N->getOperand(2));
6472 case Intrinsic::loongarch_lsx_vmod_bu:
6473 case Intrinsic::loongarch_lsx_vmod_hu:
6474 case Intrinsic::loongarch_lsx_vmod_wu:
6475 case Intrinsic::loongarch_lsx_vmod_du:
6476 case Intrinsic::loongarch_lasx_xvmod_bu:
6477 case Intrinsic::loongarch_lasx_xvmod_hu:
6478 case Intrinsic::loongarch_lasx_xvmod_wu:
6479 case Intrinsic::loongarch_lasx_xvmod_du:
6480 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6481 N->getOperand(2));
6482 case Intrinsic::loongarch_lsx_vand_v:
6483 case Intrinsic::loongarch_lasx_xvand_v:
6484 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6485 N->getOperand(2));
6486 case Intrinsic::loongarch_lsx_vor_v:
6487 case Intrinsic::loongarch_lasx_xvor_v:
6488 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6489 N->getOperand(2));
6490 case Intrinsic::loongarch_lsx_vxor_v:
6491 case Intrinsic::loongarch_lasx_xvxor_v:
6492 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6493 N->getOperand(2));
6494 case Intrinsic::loongarch_lsx_vnor_v:
6495 case Intrinsic::loongarch_lasx_xvnor_v: {
6496 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6497 N->getOperand(2));
6498 return DAG.getNOT(DL, Res, Res->getValueType(0));
6499 }
6500 case Intrinsic::loongarch_lsx_vandi_b:
6501 case Intrinsic::loongarch_lasx_xvandi_b:
6502 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6503 lowerVectorSplatImm<8>(N, 2, DAG));
6504 case Intrinsic::loongarch_lsx_vori_b:
6505 case Intrinsic::loongarch_lasx_xvori_b:
6506 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6507 lowerVectorSplatImm<8>(N, 2, DAG));
6508 case Intrinsic::loongarch_lsx_vxori_b:
6509 case Intrinsic::loongarch_lasx_xvxori_b:
6510 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6511 lowerVectorSplatImm<8>(N, 2, DAG));
6512 case Intrinsic::loongarch_lsx_vsll_b:
6513 case Intrinsic::loongarch_lsx_vsll_h:
6514 case Intrinsic::loongarch_lsx_vsll_w:
6515 case Intrinsic::loongarch_lsx_vsll_d:
6516 case Intrinsic::loongarch_lasx_xvsll_b:
6517 case Intrinsic::loongarch_lasx_xvsll_h:
6518 case Intrinsic::loongarch_lasx_xvsll_w:
6519 case Intrinsic::loongarch_lasx_xvsll_d:
6520 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6521 truncateVecElts(N, DAG));
6522 case Intrinsic::loongarch_lsx_vslli_b:
6523 case Intrinsic::loongarch_lasx_xvslli_b:
6524 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6525 lowerVectorSplatImm<3>(N, 2, DAG));
6526 case Intrinsic::loongarch_lsx_vslli_h:
6527 case Intrinsic::loongarch_lasx_xvslli_h:
6528 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6529 lowerVectorSplatImm<4>(N, 2, DAG));
6530 case Intrinsic::loongarch_lsx_vslli_w:
6531 case Intrinsic::loongarch_lasx_xvslli_w:
6532 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6533 lowerVectorSplatImm<5>(N, 2, DAG));
6534 case Intrinsic::loongarch_lsx_vslli_d:
6535 case Intrinsic::loongarch_lasx_xvslli_d:
6536 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6537 lowerVectorSplatImm<6>(N, 2, DAG));
6538 case Intrinsic::loongarch_lsx_vsrl_b:
6539 case Intrinsic::loongarch_lsx_vsrl_h:
6540 case Intrinsic::loongarch_lsx_vsrl_w:
6541 case Intrinsic::loongarch_lsx_vsrl_d:
6542 case Intrinsic::loongarch_lasx_xvsrl_b:
6543 case Intrinsic::loongarch_lasx_xvsrl_h:
6544 case Intrinsic::loongarch_lasx_xvsrl_w:
6545 case Intrinsic::loongarch_lasx_xvsrl_d:
6546 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6547 truncateVecElts(N, DAG));
6548 case Intrinsic::loongarch_lsx_vsrli_b:
6549 case Intrinsic::loongarch_lasx_xvsrli_b:
6550 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6551 lowerVectorSplatImm<3>(N, 2, DAG));
6552 case Intrinsic::loongarch_lsx_vsrli_h:
6553 case Intrinsic::loongarch_lasx_xvsrli_h:
6554 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6555 lowerVectorSplatImm<4>(N, 2, DAG));
6556 case Intrinsic::loongarch_lsx_vsrli_w:
6557 case Intrinsic::loongarch_lasx_xvsrli_w:
6558 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6559 lowerVectorSplatImm<5>(N, 2, DAG));
6560 case Intrinsic::loongarch_lsx_vsrli_d:
6561 case Intrinsic::loongarch_lasx_xvsrli_d:
6562 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6563 lowerVectorSplatImm<6>(N, 2, DAG));
6564 case Intrinsic::loongarch_lsx_vsra_b:
6565 case Intrinsic::loongarch_lsx_vsra_h:
6566 case Intrinsic::loongarch_lsx_vsra_w:
6567 case Intrinsic::loongarch_lsx_vsra_d:
6568 case Intrinsic::loongarch_lasx_xvsra_b:
6569 case Intrinsic::loongarch_lasx_xvsra_h:
6570 case Intrinsic::loongarch_lasx_xvsra_w:
6571 case Intrinsic::loongarch_lasx_xvsra_d:
6572 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6573 truncateVecElts(N, DAG));
6574 case Intrinsic::loongarch_lsx_vsrai_b:
6575 case Intrinsic::loongarch_lasx_xvsrai_b:
6576 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6577 lowerVectorSplatImm<3>(N, 2, DAG));
6578 case Intrinsic::loongarch_lsx_vsrai_h:
6579 case Intrinsic::loongarch_lasx_xvsrai_h:
6580 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6581 lowerVectorSplatImm<4>(N, 2, DAG));
6582 case Intrinsic::loongarch_lsx_vsrai_w:
6583 case Intrinsic::loongarch_lasx_xvsrai_w:
6584 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6585 lowerVectorSplatImm<5>(N, 2, DAG));
6586 case Intrinsic::loongarch_lsx_vsrai_d:
6587 case Intrinsic::loongarch_lasx_xvsrai_d:
6588 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6589 lowerVectorSplatImm<6>(N, 2, DAG));
6590 case Intrinsic::loongarch_lsx_vclz_b:
6591 case Intrinsic::loongarch_lsx_vclz_h:
6592 case Intrinsic::loongarch_lsx_vclz_w:
6593 case Intrinsic::loongarch_lsx_vclz_d:
6594 case Intrinsic::loongarch_lasx_xvclz_b:
6595 case Intrinsic::loongarch_lasx_xvclz_h:
6596 case Intrinsic::loongarch_lasx_xvclz_w:
6597 case Intrinsic::loongarch_lasx_xvclz_d:
6598 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6599 case Intrinsic::loongarch_lsx_vpcnt_b:
6600 case Intrinsic::loongarch_lsx_vpcnt_h:
6601 case Intrinsic::loongarch_lsx_vpcnt_w:
6602 case Intrinsic::loongarch_lsx_vpcnt_d:
6603 case Intrinsic::loongarch_lasx_xvpcnt_b:
6604 case Intrinsic::loongarch_lasx_xvpcnt_h:
6605 case Intrinsic::loongarch_lasx_xvpcnt_w:
6606 case Intrinsic::loongarch_lasx_xvpcnt_d:
6607 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6608 case Intrinsic::loongarch_lsx_vbitclr_b:
6609 case Intrinsic::loongarch_lsx_vbitclr_h:
6610 case Intrinsic::loongarch_lsx_vbitclr_w:
6611 case Intrinsic::loongarch_lsx_vbitclr_d:
6612 case Intrinsic::loongarch_lasx_xvbitclr_b:
6613 case Intrinsic::loongarch_lasx_xvbitclr_h:
6614 case Intrinsic::loongarch_lasx_xvbitclr_w:
6615 case Intrinsic::loongarch_lasx_xvbitclr_d:
6616 return lowerVectorBitClear(N, DAG);
6617 case Intrinsic::loongarch_lsx_vbitclri_b:
6618 case Intrinsic::loongarch_lasx_xvbitclri_b:
6619 return lowerVectorBitClearImm<3>(N, DAG);
6620 case Intrinsic::loongarch_lsx_vbitclri_h:
6621 case Intrinsic::loongarch_lasx_xvbitclri_h:
6622 return lowerVectorBitClearImm<4>(N, DAG);
6623 case Intrinsic::loongarch_lsx_vbitclri_w:
6624 case Intrinsic::loongarch_lasx_xvbitclri_w:
6625 return lowerVectorBitClearImm<5>(N, DAG);
6626 case Intrinsic::loongarch_lsx_vbitclri_d:
6627 case Intrinsic::loongarch_lasx_xvbitclri_d:
6628 return lowerVectorBitClearImm<6>(N, DAG);
6629 case Intrinsic::loongarch_lsx_vbitset_b:
6630 case Intrinsic::loongarch_lsx_vbitset_h:
6631 case Intrinsic::loongarch_lsx_vbitset_w:
6632 case Intrinsic::loongarch_lsx_vbitset_d:
6633 case Intrinsic::loongarch_lasx_xvbitset_b:
6634 case Intrinsic::loongarch_lasx_xvbitset_h:
6635 case Intrinsic::loongarch_lasx_xvbitset_w:
6636 case Intrinsic::loongarch_lasx_xvbitset_d: {
6637 EVT VecTy = N->getValueType(0);
6638 SDValue One = DAG.getConstant(1, DL, VecTy);
6639 return DAG.getNode(
6640 ISD::OR, DL, VecTy, N->getOperand(1),
6641 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6642 }
6643 case Intrinsic::loongarch_lsx_vbitseti_b:
6644 case Intrinsic::loongarch_lasx_xvbitseti_b:
6645 return lowerVectorBitSetImm<3>(N, DAG);
6646 case Intrinsic::loongarch_lsx_vbitseti_h:
6647 case Intrinsic::loongarch_lasx_xvbitseti_h:
6648 return lowerVectorBitSetImm<4>(N, DAG);
6649 case Intrinsic::loongarch_lsx_vbitseti_w:
6650 case Intrinsic::loongarch_lasx_xvbitseti_w:
6651 return lowerVectorBitSetImm<5>(N, DAG);
6652 case Intrinsic::loongarch_lsx_vbitseti_d:
6653 case Intrinsic::loongarch_lasx_xvbitseti_d:
6654 return lowerVectorBitSetImm<6>(N, DAG);
6655 case Intrinsic::loongarch_lsx_vbitrev_b:
6656 case Intrinsic::loongarch_lsx_vbitrev_h:
6657 case Intrinsic::loongarch_lsx_vbitrev_w:
6658 case Intrinsic::loongarch_lsx_vbitrev_d:
6659 case Intrinsic::loongarch_lasx_xvbitrev_b:
6660 case Intrinsic::loongarch_lasx_xvbitrev_h:
6661 case Intrinsic::loongarch_lasx_xvbitrev_w:
6662 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6663 EVT VecTy = N->getValueType(0);
6664 SDValue One = DAG.getConstant(1, DL, VecTy);
6665 return DAG.getNode(
6666 ISD::XOR, DL, VecTy, N->getOperand(1),
6667 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6668 }
6669 case Intrinsic::loongarch_lsx_vbitrevi_b:
6670 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6671 return lowerVectorBitRevImm<3>(N, DAG);
6672 case Intrinsic::loongarch_lsx_vbitrevi_h:
6673 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6674 return lowerVectorBitRevImm<4>(N, DAG);
6675 case Intrinsic::loongarch_lsx_vbitrevi_w:
6676 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6677 return lowerVectorBitRevImm<5>(N, DAG);
6678 case Intrinsic::loongarch_lsx_vbitrevi_d:
6679 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6680 return lowerVectorBitRevImm<6>(N, DAG);
6681 case Intrinsic::loongarch_lsx_vfadd_s:
6682 case Intrinsic::loongarch_lsx_vfadd_d:
6683 case Intrinsic::loongarch_lasx_xvfadd_s:
6684 case Intrinsic::loongarch_lasx_xvfadd_d:
6685 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6686 N->getOperand(2));
6687 case Intrinsic::loongarch_lsx_vfsub_s:
6688 case Intrinsic::loongarch_lsx_vfsub_d:
6689 case Intrinsic::loongarch_lasx_xvfsub_s:
6690 case Intrinsic::loongarch_lasx_xvfsub_d:
6691 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6692 N->getOperand(2));
6693 case Intrinsic::loongarch_lsx_vfmul_s:
6694 case Intrinsic::loongarch_lsx_vfmul_d:
6695 case Intrinsic::loongarch_lasx_xvfmul_s:
6696 case Intrinsic::loongarch_lasx_xvfmul_d:
6697 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6698 N->getOperand(2));
6699 case Intrinsic::loongarch_lsx_vfdiv_s:
6700 case Intrinsic::loongarch_lsx_vfdiv_d:
6701 case Intrinsic::loongarch_lasx_xvfdiv_s:
6702 case Intrinsic::loongarch_lasx_xvfdiv_d:
6703 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6704 N->getOperand(2));
6705 case Intrinsic::loongarch_lsx_vfmadd_s:
6706 case Intrinsic::loongarch_lsx_vfmadd_d:
6707 case Intrinsic::loongarch_lasx_xvfmadd_s:
6708 case Intrinsic::loongarch_lasx_xvfmadd_d:
6709 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6710 N->getOperand(2), N->getOperand(3));
6711 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6712 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6713 N->getOperand(1), N->getOperand(2),
6714 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6715 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6716 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6717 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6718 N->getOperand(1), N->getOperand(2),
6719 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6720 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6721 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6722 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6723 N->getOperand(1), N->getOperand(2),
6724 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6725 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6726 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6727 N->getOperand(1), N->getOperand(2),
6728 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6729 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6730 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6731 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6732 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6733 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6734 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6735 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6736 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6737 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6738 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6739 N->getOperand(1)));
6740 case Intrinsic::loongarch_lsx_vreplve_b:
6741 case Intrinsic::loongarch_lsx_vreplve_h:
6742 case Intrinsic::loongarch_lsx_vreplve_w:
6743 case Intrinsic::loongarch_lsx_vreplve_d:
6744 case Intrinsic::loongarch_lasx_xvreplve_b:
6745 case Intrinsic::loongarch_lasx_xvreplve_h:
6746 case Intrinsic::loongarch_lasx_xvreplve_w:
6747 case Intrinsic::loongarch_lasx_xvreplve_d:
6748 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6749 N->getOperand(1),
6750 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6751 N->getOperand(2)));
6752 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6753 if (!Subtarget.is64Bit())
6754 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6755 break;
6756 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6757 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6758 if (!Subtarget.is64Bit())
6759 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6760 break;
6761 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6762 if (!Subtarget.is64Bit())
6763 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6764 break;
6765 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6766 if (!Subtarget.is64Bit())
6767 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6768 break;
6769 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6770 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6771 if (!Subtarget.is64Bit())
6772 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6773 break;
6774 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6775 if (!Subtarget.is64Bit())
6776 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6777 break;
6778 case Intrinsic::loongarch_lsx_bz_b:
6779 case Intrinsic::loongarch_lsx_bz_h:
6780 case Intrinsic::loongarch_lsx_bz_w:
6781 case Intrinsic::loongarch_lsx_bz_d:
6782 case Intrinsic::loongarch_lasx_xbz_b:
6783 case Intrinsic::loongarch_lasx_xbz_h:
6784 case Intrinsic::loongarch_lasx_xbz_w:
6785 case Intrinsic::loongarch_lasx_xbz_d:
6786 if (!Subtarget.is64Bit())
6787 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6788 N->getOperand(1));
6789 break;
6790 case Intrinsic::loongarch_lsx_bz_v:
6791 case Intrinsic::loongarch_lasx_xbz_v:
6792 if (!Subtarget.is64Bit())
6793 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6794 N->getOperand(1));
6795 break;
6796 case Intrinsic::loongarch_lsx_bnz_b:
6797 case Intrinsic::loongarch_lsx_bnz_h:
6798 case Intrinsic::loongarch_lsx_bnz_w:
6799 case Intrinsic::loongarch_lsx_bnz_d:
6800 case Intrinsic::loongarch_lasx_xbnz_b:
6801 case Intrinsic::loongarch_lasx_xbnz_h:
6802 case Intrinsic::loongarch_lasx_xbnz_w:
6803 case Intrinsic::loongarch_lasx_xbnz_d:
6804 if (!Subtarget.is64Bit())
6805 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6806 N->getOperand(1));
6807 break;
6808 case Intrinsic::loongarch_lsx_bnz_v:
6809 case Intrinsic::loongarch_lasx_xbnz_v:
6810 if (!Subtarget.is64Bit())
6811 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6812 N->getOperand(1));
6813 break;
6814 case Intrinsic::loongarch_lasx_concat_128_s:
6815 case Intrinsic::loongarch_lasx_concat_128_d:
6816 case Intrinsic::loongarch_lasx_concat_128:
6817 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6818 N->getOperand(1), N->getOperand(2));
6819 }
6820 return SDValue();
6821}
6822
6825 const LoongArchSubtarget &Subtarget) {
6826 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6827 // conversion is unnecessary and can be replaced with the
6828 // MOVFR2GR_S_LA64 operand.
6829 SDValue Op0 = N->getOperand(0);
6830 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6831 return Op0.getOperand(0);
6832 return SDValue();
6833}
6834
6837 const LoongArchSubtarget &Subtarget) {
6838 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6839 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6840 // operand.
6841 SDValue Op0 = N->getOperand(0);
6842 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6843 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6844 "Unexpected value type!");
6845 return Op0.getOperand(0);
6846 }
6847 return SDValue();
6848}
6849
6852 const LoongArchSubtarget &Subtarget) {
6853 MVT VT = N->getSimpleValueType(0);
6854 unsigned NumBits = VT.getScalarSizeInBits();
6855
6856 // Simplify the inputs.
6857 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6858 APInt DemandedMask(APInt::getAllOnes(NumBits));
6859 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6860 return SDValue(N, 0);
6861
6862 return SDValue();
6863}
6864
6865static SDValue
6868 const LoongArchSubtarget &Subtarget) {
6869 SDValue Op0 = N->getOperand(0);
6870 SDLoc DL(N);
6871
6872 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6873 // redundant. Instead, use BuildPairF64's operands directly.
6874 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6875 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6876
6877 if (Op0->isUndef()) {
6878 SDValue Lo = DAG.getUNDEF(MVT::i32);
6879 SDValue Hi = DAG.getUNDEF(MVT::i32);
6880 return DCI.CombineTo(N, Lo, Hi);
6881 }
6882
6883 // It's cheaper to materialise two 32-bit integers than to load a double
6884 // from the constant pool and transfer it to integer registers through the
6885 // stack.
6887 APInt V = C->getValueAPF().bitcastToAPInt();
6888 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6889 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6890 return DCI.CombineTo(N, Lo, Hi);
6891 }
6892
6893 return SDValue();
6894}
6895
6896/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6899 const LoongArchSubtarget &Subtarget) {
6900 SDValue N0 = N->getOperand(0);
6901 SDValue N1 = N->getOperand(1);
6902 MVT VT = N->getSimpleValueType(0);
6903 SDLoc DL(N);
6904
6905 // VANDN(undef, x) -> 0
6906 // VANDN(x, undef) -> 0
6907 if (N0.isUndef() || N1.isUndef())
6908 return DAG.getConstant(0, DL, VT);
6909
6910 // VANDN(0, x) -> x
6912 return N1;
6913
6914 // VANDN(x, 0) -> 0
6916 return DAG.getConstant(0, DL, VT);
6917
6918 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6920 return DAG.getNOT(DL, N0, VT);
6921
6922 // Turn VANDN back to AND if input is inverted.
6923 if (SDValue Not = isNOT(N0, DAG))
6924 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6925
6926 // Folds for better commutativity:
6927 if (N1->hasOneUse()) {
6928 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6929 if (SDValue Not = isNOT(N1, DAG))
6930 return DAG.getNOT(
6931 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6932
6933 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6934 // -> NOT(OR(x, SplatVector(-Imm))
6935 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6936 // gain benefits.
6937 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6938 N1.getOpcode() == ISD::BUILD_VECTOR) {
6939 if (SDValue SplatValue =
6940 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6941 if (!N1->isOnlyUserOf(SplatValue.getNode()))
6942 return SDValue();
6943
6944 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6945 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6946 SDValue Not =
6947 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6948 return DAG.getNOT(
6949 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6950 VT);
6951 }
6952 }
6953 }
6954 }
6955
6956 return SDValue();
6957}
6958
6961 const LoongArchSubtarget &Subtarget) {
6962 SDLoc DL(N);
6963 EVT VT = N->getValueType(0);
6964
6965 if (VT != MVT::f32 && VT != MVT::f64)
6966 return SDValue();
6967 if (VT == MVT::f32 && !Subtarget.hasBasicF())
6968 return SDValue();
6969 if (VT == MVT::f64 && !Subtarget.hasBasicD())
6970 return SDValue();
6971
6972 // Only optimize when the source and destination types have the same width.
6973 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
6974 return SDValue();
6975
6976 SDValue Src = N->getOperand(0);
6977 // If the result of an integer load is only used by an integer-to-float
6978 // conversion, use a fp load instead. This eliminates an integer-to-float-move
6979 // (movgr2fr) instruction.
6980 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
6981 // Do not change the width of a volatile load. This condition check is
6982 // inspired by AArch64.
6983 !cast<LoadSDNode>(Src)->isVolatile()) {
6984 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
6985 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
6986 LN0->getPointerInfo(), LN0->getAlign(),
6987 LN0->getMemOperand()->getFlags());
6988
6989 // Make sure successors of the original load stay after it by updating them
6990 // to use the new Chain.
6991 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
6992 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
6993 }
6994
6995 return SDValue();
6996}
6997
6999 DAGCombinerInfo &DCI) const {
7000 SelectionDAG &DAG = DCI.DAG;
7001 switch (N->getOpcode()) {
7002 default:
7003 break;
7004 case ISD::AND:
7005 return performANDCombine(N, DAG, DCI, Subtarget);
7006 case ISD::OR:
7007 return performORCombine(N, DAG, DCI, Subtarget);
7008 case ISD::SETCC:
7009 return performSETCCCombine(N, DAG, DCI, Subtarget);
7010 case ISD::SRL:
7011 return performSRLCombine(N, DAG, DCI, Subtarget);
7012 case ISD::BITCAST:
7013 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7014 case ISD::SINT_TO_FP:
7015 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7016 case LoongArchISD::BITREV_W:
7017 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7018 case LoongArchISD::BR_CC:
7019 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7020 case LoongArchISD::SELECT_CC:
7021 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7023 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7024 case LoongArchISD::MOVGR2FR_W_LA64:
7025 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7026 case LoongArchISD::MOVFR2GR_S_LA64:
7027 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7028 case LoongArchISD::VMSKLTZ:
7029 case LoongArchISD::XVMSKLTZ:
7030 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7031 case LoongArchISD::SPLIT_PAIR_F64:
7032 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7033 case LoongArchISD::VANDN:
7034 return performVANDNCombine(N, DAG, DCI, Subtarget);
7035 }
7036 return SDValue();
7037}
7038
7041 if (!ZeroDivCheck)
7042 return MBB;
7043
7044 // Build instructions:
7045 // MBB:
7046 // div(or mod) $dst, $dividend, $divisor
7047 // bne $divisor, $zero, SinkMBB
7048 // BreakMBB:
7049 // break 7 // BRK_DIVZERO
7050 // SinkMBB:
7051 // fallthrough
7052 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7053 MachineFunction::iterator It = ++MBB->getIterator();
7054 MachineFunction *MF = MBB->getParent();
7055 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7056 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7057 MF->insert(It, BreakMBB);
7058 MF->insert(It, SinkMBB);
7059
7060 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7061 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7062 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7063
7064 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7065 DebugLoc DL = MI.getDebugLoc();
7066 MachineOperand &Divisor = MI.getOperand(2);
7067 Register DivisorReg = Divisor.getReg();
7068
7069 // MBB:
7070 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7071 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7072 .addReg(LoongArch::R0)
7073 .addMBB(SinkMBB);
7074 MBB->addSuccessor(BreakMBB);
7075 MBB->addSuccessor(SinkMBB);
7076
7077 // BreakMBB:
7078 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7079 // definition of BRK_DIVZERO.
7080 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7081 BreakMBB->addSuccessor(SinkMBB);
7082
7083 // Clear Divisor's kill flag.
7084 Divisor.setIsKill(false);
7085
7086 return SinkMBB;
7087}
7088
7089static MachineBasicBlock *
7091 const LoongArchSubtarget &Subtarget) {
7092 unsigned CondOpc;
7093 switch (MI.getOpcode()) {
7094 default:
7095 llvm_unreachable("Unexpected opcode");
7096 case LoongArch::PseudoVBZ:
7097 CondOpc = LoongArch::VSETEQZ_V;
7098 break;
7099 case LoongArch::PseudoVBZ_B:
7100 CondOpc = LoongArch::VSETANYEQZ_B;
7101 break;
7102 case LoongArch::PseudoVBZ_H:
7103 CondOpc = LoongArch::VSETANYEQZ_H;
7104 break;
7105 case LoongArch::PseudoVBZ_W:
7106 CondOpc = LoongArch::VSETANYEQZ_W;
7107 break;
7108 case LoongArch::PseudoVBZ_D:
7109 CondOpc = LoongArch::VSETANYEQZ_D;
7110 break;
7111 case LoongArch::PseudoVBNZ:
7112 CondOpc = LoongArch::VSETNEZ_V;
7113 break;
7114 case LoongArch::PseudoVBNZ_B:
7115 CondOpc = LoongArch::VSETALLNEZ_B;
7116 break;
7117 case LoongArch::PseudoVBNZ_H:
7118 CondOpc = LoongArch::VSETALLNEZ_H;
7119 break;
7120 case LoongArch::PseudoVBNZ_W:
7121 CondOpc = LoongArch::VSETALLNEZ_W;
7122 break;
7123 case LoongArch::PseudoVBNZ_D:
7124 CondOpc = LoongArch::VSETALLNEZ_D;
7125 break;
7126 case LoongArch::PseudoXVBZ:
7127 CondOpc = LoongArch::XVSETEQZ_V;
7128 break;
7129 case LoongArch::PseudoXVBZ_B:
7130 CondOpc = LoongArch::XVSETANYEQZ_B;
7131 break;
7132 case LoongArch::PseudoXVBZ_H:
7133 CondOpc = LoongArch::XVSETANYEQZ_H;
7134 break;
7135 case LoongArch::PseudoXVBZ_W:
7136 CondOpc = LoongArch::XVSETANYEQZ_W;
7137 break;
7138 case LoongArch::PseudoXVBZ_D:
7139 CondOpc = LoongArch::XVSETANYEQZ_D;
7140 break;
7141 case LoongArch::PseudoXVBNZ:
7142 CondOpc = LoongArch::XVSETNEZ_V;
7143 break;
7144 case LoongArch::PseudoXVBNZ_B:
7145 CondOpc = LoongArch::XVSETALLNEZ_B;
7146 break;
7147 case LoongArch::PseudoXVBNZ_H:
7148 CondOpc = LoongArch::XVSETALLNEZ_H;
7149 break;
7150 case LoongArch::PseudoXVBNZ_W:
7151 CondOpc = LoongArch::XVSETALLNEZ_W;
7152 break;
7153 case LoongArch::PseudoXVBNZ_D:
7154 CondOpc = LoongArch::XVSETALLNEZ_D;
7155 break;
7156 }
7157
7158 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7159 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7160 DebugLoc DL = MI.getDebugLoc();
7163
7164 MachineFunction *F = BB->getParent();
7165 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7166 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7167 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7168
7169 F->insert(It, FalseBB);
7170 F->insert(It, TrueBB);
7171 F->insert(It, SinkBB);
7172
7173 // Transfer the remainder of MBB and its successor edges to Sink.
7174 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7176
7177 // Insert the real instruction to BB.
7178 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7179 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7180
7181 // Insert branch.
7182 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7183 BB->addSuccessor(FalseBB);
7184 BB->addSuccessor(TrueBB);
7185
7186 // FalseBB.
7187 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7188 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7189 .addReg(LoongArch::R0)
7190 .addImm(0);
7191 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7192 FalseBB->addSuccessor(SinkBB);
7193
7194 // TrueBB.
7195 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7196 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7197 .addReg(LoongArch::R0)
7198 .addImm(1);
7199 TrueBB->addSuccessor(SinkBB);
7200
7201 // SinkBB: merge the results.
7202 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7203 MI.getOperand(0).getReg())
7204 .addReg(RD1)
7205 .addMBB(FalseBB)
7206 .addReg(RD2)
7207 .addMBB(TrueBB);
7208
7209 // The pseudo instruction is gone now.
7210 MI.eraseFromParent();
7211 return SinkBB;
7212}
7213
7214static MachineBasicBlock *
7216 const LoongArchSubtarget &Subtarget) {
7217 unsigned InsOp;
7218 unsigned BroadcastOp;
7219 unsigned HalfSize;
7220 switch (MI.getOpcode()) {
7221 default:
7222 llvm_unreachable("Unexpected opcode");
7223 case LoongArch::PseudoXVINSGR2VR_B:
7224 HalfSize = 16;
7225 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7226 InsOp = LoongArch::XVEXTRINS_B;
7227 break;
7228 case LoongArch::PseudoXVINSGR2VR_H:
7229 HalfSize = 8;
7230 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7231 InsOp = LoongArch::XVEXTRINS_H;
7232 break;
7233 }
7234 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7235 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7236 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7237 DebugLoc DL = MI.getDebugLoc();
7239 // XDst = vector_insert XSrc, Elt, Idx
7240 Register XDst = MI.getOperand(0).getReg();
7241 Register XSrc = MI.getOperand(1).getReg();
7242 Register Elt = MI.getOperand(2).getReg();
7243 unsigned Idx = MI.getOperand(3).getImm();
7244
7245 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7246 Idx < HalfSize) {
7247 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7248 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7249
7250 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7251 .addReg(XSrc, {}, LoongArch::sub_128);
7252 BuildMI(*BB, MI, DL,
7253 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7254 : LoongArch::VINSGR2VR_B),
7255 ScratchSubReg2)
7256 .addReg(ScratchSubReg1)
7257 .addReg(Elt)
7258 .addImm(Idx);
7259
7260 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7261 .addReg(ScratchSubReg2)
7262 .addImm(LoongArch::sub_128);
7263 } else {
7264 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7265 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7266
7267 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7268
7269 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7270 .addReg(ScratchReg1)
7271 .addReg(XSrc)
7272 .addImm(Idx >= HalfSize ? 48 : 18);
7273
7274 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7275 .addReg(XSrc)
7276 .addReg(ScratchReg2)
7277 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7278 }
7279
7280 MI.eraseFromParent();
7281 return BB;
7282}
7283
7286 const LoongArchSubtarget &Subtarget) {
7287 assert(Subtarget.hasExtLSX());
7288 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7289 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7290 DebugLoc DL = MI.getDebugLoc();
7292 Register Dst = MI.getOperand(0).getReg();
7293 Register Src = MI.getOperand(1).getReg();
7294 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7295 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7296 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7297
7298 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7299 BuildMI(*BB, MI, DL,
7300 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7301 : LoongArch::VINSGR2VR_W),
7302 ScratchReg2)
7303 .addReg(ScratchReg1)
7304 .addReg(Src)
7305 .addImm(0);
7306 BuildMI(
7307 *BB, MI, DL,
7308 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7309 ScratchReg3)
7310 .addReg(ScratchReg2);
7311 BuildMI(*BB, MI, DL,
7312 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7313 : LoongArch::VPICKVE2GR_W),
7314 Dst)
7315 .addReg(ScratchReg3)
7316 .addImm(0);
7317
7318 MI.eraseFromParent();
7319 return BB;
7320}
7321
7322static MachineBasicBlock *
7324 const LoongArchSubtarget &Subtarget) {
7325 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7326 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7327 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7329 Register Dst = MI.getOperand(0).getReg();
7330 Register Src = MI.getOperand(1).getReg();
7331 DebugLoc DL = MI.getDebugLoc();
7332 unsigned EleBits = 8;
7333 unsigned NotOpc = 0;
7334 unsigned MskOpc;
7335
7336 switch (MI.getOpcode()) {
7337 default:
7338 llvm_unreachable("Unexpected opcode");
7339 case LoongArch::PseudoVMSKLTZ_B:
7340 MskOpc = LoongArch::VMSKLTZ_B;
7341 break;
7342 case LoongArch::PseudoVMSKLTZ_H:
7343 MskOpc = LoongArch::VMSKLTZ_H;
7344 EleBits = 16;
7345 break;
7346 case LoongArch::PseudoVMSKLTZ_W:
7347 MskOpc = LoongArch::VMSKLTZ_W;
7348 EleBits = 32;
7349 break;
7350 case LoongArch::PseudoVMSKLTZ_D:
7351 MskOpc = LoongArch::VMSKLTZ_D;
7352 EleBits = 64;
7353 break;
7354 case LoongArch::PseudoVMSKGEZ_B:
7355 MskOpc = LoongArch::VMSKGEZ_B;
7356 break;
7357 case LoongArch::PseudoVMSKEQZ_B:
7358 MskOpc = LoongArch::VMSKNZ_B;
7359 NotOpc = LoongArch::VNOR_V;
7360 break;
7361 case LoongArch::PseudoVMSKNEZ_B:
7362 MskOpc = LoongArch::VMSKNZ_B;
7363 break;
7364 case LoongArch::PseudoXVMSKLTZ_B:
7365 MskOpc = LoongArch::XVMSKLTZ_B;
7366 RC = &LoongArch::LASX256RegClass;
7367 break;
7368 case LoongArch::PseudoXVMSKLTZ_H:
7369 MskOpc = LoongArch::XVMSKLTZ_H;
7370 RC = &LoongArch::LASX256RegClass;
7371 EleBits = 16;
7372 break;
7373 case LoongArch::PseudoXVMSKLTZ_W:
7374 MskOpc = LoongArch::XVMSKLTZ_W;
7375 RC = &LoongArch::LASX256RegClass;
7376 EleBits = 32;
7377 break;
7378 case LoongArch::PseudoXVMSKLTZ_D:
7379 MskOpc = LoongArch::XVMSKLTZ_D;
7380 RC = &LoongArch::LASX256RegClass;
7381 EleBits = 64;
7382 break;
7383 case LoongArch::PseudoXVMSKGEZ_B:
7384 MskOpc = LoongArch::XVMSKGEZ_B;
7385 RC = &LoongArch::LASX256RegClass;
7386 break;
7387 case LoongArch::PseudoXVMSKEQZ_B:
7388 MskOpc = LoongArch::XVMSKNZ_B;
7389 NotOpc = LoongArch::XVNOR_V;
7390 RC = &LoongArch::LASX256RegClass;
7391 break;
7392 case LoongArch::PseudoXVMSKNEZ_B:
7393 MskOpc = LoongArch::XVMSKNZ_B;
7394 RC = &LoongArch::LASX256RegClass;
7395 break;
7396 }
7397
7398 Register Msk = MRI.createVirtualRegister(RC);
7399 if (NotOpc) {
7400 Register Tmp = MRI.createVirtualRegister(RC);
7401 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7402 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7403 .addReg(Tmp, RegState::Kill)
7404 .addReg(Tmp, RegState::Kill);
7405 } else {
7406 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7407 }
7408
7409 if (TRI->getRegSizeInBits(*RC) > 128) {
7410 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7411 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7412 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7413 .addReg(Msk)
7414 .addImm(0);
7415 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7416 .addReg(Msk, RegState::Kill)
7417 .addImm(4);
7418 BuildMI(*BB, MI, DL,
7419 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7420 : LoongArch::BSTRINS_W),
7421 Dst)
7424 .addImm(256 / EleBits - 1)
7425 .addImm(128 / EleBits);
7426 } else {
7427 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7428 .addReg(Msk, RegState::Kill)
7429 .addImm(0);
7430 }
7431
7432 MI.eraseFromParent();
7433 return BB;
7434}
7435
7436static MachineBasicBlock *
7438 const LoongArchSubtarget &Subtarget) {
7439 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7440 "Unexpected instruction");
7441
7442 MachineFunction &MF = *BB->getParent();
7443 DebugLoc DL = MI.getDebugLoc();
7445 Register LoReg = MI.getOperand(0).getReg();
7446 Register HiReg = MI.getOperand(1).getReg();
7447 Register SrcReg = MI.getOperand(2).getReg();
7448
7449 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7450 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7451 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7452 MI.eraseFromParent(); // The pseudo instruction is gone now.
7453 return BB;
7454}
7455
7456static MachineBasicBlock *
7458 const LoongArchSubtarget &Subtarget) {
7459 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7460 "Unexpected instruction");
7461
7462 MachineFunction &MF = *BB->getParent();
7463 DebugLoc DL = MI.getDebugLoc();
7466 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7467 Register DstReg = MI.getOperand(0).getReg();
7468 Register LoReg = MI.getOperand(1).getReg();
7469 Register HiReg = MI.getOperand(2).getReg();
7470
7471 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7472 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7473 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7474 .addReg(TmpReg, RegState::Kill)
7475 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7476 MI.eraseFromParent(); // The pseudo instruction is gone now.
7477 return BB;
7478}
7479
7481 switch (MI.getOpcode()) {
7482 default:
7483 return false;
7484 case LoongArch::Select_GPR_Using_CC_GPR:
7485 return true;
7486 }
7487}
7488
7489static MachineBasicBlock *
7491 const LoongArchSubtarget &Subtarget) {
7492 // To "insert" Select_* instructions, we actually have to insert the triangle
7493 // control-flow pattern. The incoming instructions know the destination vreg
7494 // to set, the condition code register to branch on, the true/false values to
7495 // select between, and the condcode to use to select the appropriate branch.
7496 //
7497 // We produce the following control flow:
7498 // HeadMBB
7499 // | \
7500 // | IfFalseMBB
7501 // | /
7502 // TailMBB
7503 //
7504 // When we find a sequence of selects we attempt to optimize their emission
7505 // by sharing the control flow. Currently we only handle cases where we have
7506 // multiple selects with the exact same condition (same LHS, RHS and CC).
7507 // The selects may be interleaved with other instructions if the other
7508 // instructions meet some requirements we deem safe:
7509 // - They are not pseudo instructions.
7510 // - They are debug instructions. Otherwise,
7511 // - They do not have side-effects, do not access memory and their inputs do
7512 // not depend on the results of the select pseudo-instructions.
7513 // The TrueV/FalseV operands of the selects cannot depend on the result of
7514 // previous selects in the sequence.
7515 // These conditions could be further relaxed. See the X86 target for a
7516 // related approach and more information.
7517
7518 Register LHS = MI.getOperand(1).getReg();
7519 Register RHS;
7520 if (MI.getOperand(2).isReg())
7521 RHS = MI.getOperand(2).getReg();
7522 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7523
7524 SmallVector<MachineInstr *, 4> SelectDebugValues;
7525 SmallSet<Register, 4> SelectDests;
7526 SelectDests.insert(MI.getOperand(0).getReg());
7527
7528 MachineInstr *LastSelectPseudo = &MI;
7529 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7530 SequenceMBBI != E; ++SequenceMBBI) {
7531 if (SequenceMBBI->isDebugInstr())
7532 continue;
7533 if (isSelectPseudo(*SequenceMBBI)) {
7534 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7535 !SequenceMBBI->getOperand(2).isReg() ||
7536 SequenceMBBI->getOperand(2).getReg() != RHS ||
7537 SequenceMBBI->getOperand(3).getImm() != CC ||
7538 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7539 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7540 break;
7541 LastSelectPseudo = &*SequenceMBBI;
7542 SequenceMBBI->collectDebugValues(SelectDebugValues);
7543 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7544 continue;
7545 }
7546 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7547 SequenceMBBI->mayLoadOrStore() ||
7548 SequenceMBBI->usesCustomInsertionHook())
7549 break;
7550 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7551 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7552 }))
7553 break;
7554 }
7555
7556 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7557 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7558 DebugLoc DL = MI.getDebugLoc();
7560
7561 MachineBasicBlock *HeadMBB = BB;
7562 MachineFunction *F = BB->getParent();
7563 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7564 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7565
7566 F->insert(I, IfFalseMBB);
7567 F->insert(I, TailMBB);
7568
7569 // Set the call frame size on entry to the new basic blocks.
7570 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7571 IfFalseMBB->setCallFrameSize(CallFrameSize);
7572 TailMBB->setCallFrameSize(CallFrameSize);
7573
7574 // Transfer debug instructions associated with the selects to TailMBB.
7575 for (MachineInstr *DebugInstr : SelectDebugValues) {
7576 TailMBB->push_back(DebugInstr->removeFromParent());
7577 }
7578
7579 // Move all instructions after the sequence to TailMBB.
7580 TailMBB->splice(TailMBB->end(), HeadMBB,
7581 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7582 // Update machine-CFG edges by transferring all successors of the current
7583 // block to the new block which will contain the Phi nodes for the selects.
7584 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7585 // Set the successors for HeadMBB.
7586 HeadMBB->addSuccessor(IfFalseMBB);
7587 HeadMBB->addSuccessor(TailMBB);
7588
7589 // Insert appropriate branch.
7590 if (MI.getOperand(2).isImm())
7591 BuildMI(HeadMBB, DL, TII.get(CC))
7592 .addReg(LHS)
7593 .addImm(MI.getOperand(2).getImm())
7594 .addMBB(TailMBB);
7595 else
7596 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7597
7598 // IfFalseMBB just falls through to TailMBB.
7599 IfFalseMBB->addSuccessor(TailMBB);
7600
7601 // Create PHIs for all of the select pseudo-instructions.
7602 auto SelectMBBI = MI.getIterator();
7603 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7604 auto InsertionPoint = TailMBB->begin();
7605 while (SelectMBBI != SelectEnd) {
7606 auto Next = std::next(SelectMBBI);
7607 if (isSelectPseudo(*SelectMBBI)) {
7608 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7609 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7610 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7611 .addReg(SelectMBBI->getOperand(4).getReg())
7612 .addMBB(HeadMBB)
7613 .addReg(SelectMBBI->getOperand(5).getReg())
7614 .addMBB(IfFalseMBB);
7615 SelectMBBI->eraseFromParent();
7616 }
7617 SelectMBBI = Next;
7618 }
7619
7620 F->getProperties().resetNoPHIs();
7621 return TailMBB;
7622}
7623
7624MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7625 MachineInstr &MI, MachineBasicBlock *BB) const {
7626 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7627 DebugLoc DL = MI.getDebugLoc();
7628
7629 switch (MI.getOpcode()) {
7630 default:
7631 llvm_unreachable("Unexpected instr type to insert");
7632 case LoongArch::DIV_W:
7633 case LoongArch::DIV_WU:
7634 case LoongArch::MOD_W:
7635 case LoongArch::MOD_WU:
7636 case LoongArch::DIV_D:
7637 case LoongArch::DIV_DU:
7638 case LoongArch::MOD_D:
7639 case LoongArch::MOD_DU:
7640 return insertDivByZeroTrap(MI, BB);
7641 break;
7642 case LoongArch::WRFCSR: {
7643 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7644 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7645 .addReg(MI.getOperand(1).getReg());
7646 MI.eraseFromParent();
7647 return BB;
7648 }
7649 case LoongArch::RDFCSR: {
7650 MachineInstr *ReadFCSR =
7651 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7652 MI.getOperand(0).getReg())
7653 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7654 ReadFCSR->getOperand(1).setIsUndef();
7655 MI.eraseFromParent();
7656 return BB;
7657 }
7658 case LoongArch::Select_GPR_Using_CC_GPR:
7659 return emitSelectPseudo(MI, BB, Subtarget);
7660 case LoongArch::BuildPairF64Pseudo:
7661 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7662 case LoongArch::SplitPairF64Pseudo:
7663 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7664 case LoongArch::PseudoVBZ:
7665 case LoongArch::PseudoVBZ_B:
7666 case LoongArch::PseudoVBZ_H:
7667 case LoongArch::PseudoVBZ_W:
7668 case LoongArch::PseudoVBZ_D:
7669 case LoongArch::PseudoVBNZ:
7670 case LoongArch::PseudoVBNZ_B:
7671 case LoongArch::PseudoVBNZ_H:
7672 case LoongArch::PseudoVBNZ_W:
7673 case LoongArch::PseudoVBNZ_D:
7674 case LoongArch::PseudoXVBZ:
7675 case LoongArch::PseudoXVBZ_B:
7676 case LoongArch::PseudoXVBZ_H:
7677 case LoongArch::PseudoXVBZ_W:
7678 case LoongArch::PseudoXVBZ_D:
7679 case LoongArch::PseudoXVBNZ:
7680 case LoongArch::PseudoXVBNZ_B:
7681 case LoongArch::PseudoXVBNZ_H:
7682 case LoongArch::PseudoXVBNZ_W:
7683 case LoongArch::PseudoXVBNZ_D:
7684 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7685 case LoongArch::PseudoXVINSGR2VR_B:
7686 case LoongArch::PseudoXVINSGR2VR_H:
7687 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7688 case LoongArch::PseudoCTPOP:
7689 return emitPseudoCTPOP(MI, BB, Subtarget);
7690 case LoongArch::PseudoVMSKLTZ_B:
7691 case LoongArch::PseudoVMSKLTZ_H:
7692 case LoongArch::PseudoVMSKLTZ_W:
7693 case LoongArch::PseudoVMSKLTZ_D:
7694 case LoongArch::PseudoVMSKGEZ_B:
7695 case LoongArch::PseudoVMSKEQZ_B:
7696 case LoongArch::PseudoVMSKNEZ_B:
7697 case LoongArch::PseudoXVMSKLTZ_B:
7698 case LoongArch::PseudoXVMSKLTZ_H:
7699 case LoongArch::PseudoXVMSKLTZ_W:
7700 case LoongArch::PseudoXVMSKLTZ_D:
7701 case LoongArch::PseudoXVMSKGEZ_B:
7702 case LoongArch::PseudoXVMSKEQZ_B:
7703 case LoongArch::PseudoXVMSKNEZ_B:
7704 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7705 case TargetOpcode::STATEPOINT:
7706 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7707 // while bl call instruction (where statepoint will be lowered at the
7708 // end) has implicit def. This def is early-clobber as it will be set at
7709 // the moment of the call and earlier than any use is read.
7710 // Add this implicit dead def here as a workaround.
7711 MI.addOperand(*MI.getMF(),
7713 LoongArch::R1, /*isDef*/ true,
7714 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7715 /*isUndef*/ false, /*isEarlyClobber*/ true));
7716 if (!Subtarget.is64Bit())
7717 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7718 return emitPatchPoint(MI, BB);
7719 }
7720}
7721
7723 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7724 unsigned *Fast) const {
7725 if (!Subtarget.hasUAL())
7726 return false;
7727
7728 // TODO: set reasonable speed number.
7729 if (Fast)
7730 *Fast = 1;
7731 return true;
7732}
7733
7734//===----------------------------------------------------------------------===//
7735// Calling Convention Implementation
7736//===----------------------------------------------------------------------===//
7737
7738// Eight general-purpose registers a0-a7 used for passing integer arguments,
7739// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7740// fixed-point arguments, and floating-point arguments when no FPR is available
7741// or with soft float ABI.
7742const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7743 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7744 LoongArch::R10, LoongArch::R11};
7745
7746// PreserveNone calling convention:
7747// Arguments may be passed in any general-purpose registers except:
7748// - R1 : return address register
7749// - R22 : frame pointer
7750// - R31 : base pointer
7751//
7752// All general-purpose registers are treated as caller-saved,
7753// except R1 (RA) and R22 (FP).
7754//
7755// Non-volatile registers are allocated first so that a function
7756// can call normal functions without having to spill and reload
7757// argument registers.
7759 LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26,
7760 LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30,
7761 LoongArch::R4, LoongArch::R5, LoongArch::R6, LoongArch::R7,
7762 LoongArch::R8, LoongArch::R9, LoongArch::R10, LoongArch::R11,
7763 LoongArch::R12, LoongArch::R13, LoongArch::R14, LoongArch::R15,
7764 LoongArch::R16, LoongArch::R17, LoongArch::R18, LoongArch::R19,
7765 LoongArch::R20};
7766
7767// Eight floating-point registers fa0-fa7 used for passing floating-point
7768// arguments, and fa0-fa1 are also used to return values.
7769const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7770 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7771 LoongArch::F6, LoongArch::F7};
7772// FPR32 and FPR64 alias each other.
7774 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7775 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7776
7777const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7778 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7779 LoongArch::VR6, LoongArch::VR7};
7780
7781const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7782 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7783 LoongArch::XR6, LoongArch::XR7};
7784
7786 switch (State.getCallingConv()) {
7788 if (!State.isVarArg())
7789 return State.AllocateReg(PreserveNoneArgGPRs);
7790 [[fallthrough]];
7791 default:
7792 return State.AllocateReg(ArgGPRs);
7793 }
7794}
7795
7796// Pass a 2*GRLen argument that has been split into two GRLen values through
7797// registers or the stack as necessary.
7798static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7799 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7800 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7801 ISD::ArgFlagsTy ArgFlags2) {
7802 unsigned GRLenInBytes = GRLen / 8;
7803 if (Register Reg = allocateArgGPR(State)) {
7804 // At least one half can be passed via register.
7805 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7806 VA1.getLocVT(), CCValAssign::Full));
7807 } else {
7808 // Both halves must be passed on the stack, with proper alignment.
7809 Align StackAlign =
7810 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7811 State.addLoc(
7813 State.AllocateStack(GRLenInBytes, StackAlign),
7814 VA1.getLocVT(), CCValAssign::Full));
7815 State.addLoc(CCValAssign::getMem(
7816 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7817 LocVT2, CCValAssign::Full));
7818 return false;
7819 }
7820 if (Register Reg = allocateArgGPR(State)) {
7821 // The second half can also be passed via register.
7822 State.addLoc(
7823 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7824 } else {
7825 // The second half is passed via the stack, without additional alignment.
7826 State.addLoc(CCValAssign::getMem(
7827 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7828 LocVT2, CCValAssign::Full));
7829 }
7830 return false;
7831}
7832
7833// Implements the LoongArch calling convention. Returns true upon failure.
7835 unsigned ValNo, MVT ValVT,
7836 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7837 CCState &State, bool IsRet, Type *OrigTy) {
7838 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7839 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7840 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7841 MVT LocVT = ValVT;
7842
7843 // Any return value split into more than two values can't be returned
7844 // directly.
7845 if (IsRet && ValNo > 1)
7846 return true;
7847
7848 // If passing a variadic argument, or if no FPR is available.
7849 bool UseGPRForFloat = true;
7850
7851 switch (ABI) {
7852 default:
7853 llvm_unreachable("Unexpected ABI");
7854 break;
7859 UseGPRForFloat = ArgFlags.isVarArg();
7860 break;
7863 break;
7864 }
7865
7866 // If this is a variadic argument, the LoongArch calling convention requires
7867 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7868 // byte alignment. An aligned register should be used regardless of whether
7869 // the original argument was split during legalisation or not. The argument
7870 // will not be passed by registers if the original type is larger than
7871 // 2*GRLen, so the register alignment rule does not apply.
7872 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7873 if (ArgFlags.isVarArg() &&
7874 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7875 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7876 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7877 // Skip 'odd' register if necessary.
7878 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7879 State.AllocateReg(ArgGPRs);
7880 }
7881
7882 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7883 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7884 State.getPendingArgFlags();
7885
7886 assert(PendingLocs.size() == PendingArgFlags.size() &&
7887 "PendingLocs and PendingArgFlags out of sync");
7888
7889 // FPR32 and FPR64 alias each other.
7890 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7891 UseGPRForFloat = true;
7892
7893 if (UseGPRForFloat && ValVT == MVT::f32) {
7894 LocVT = GRLenVT;
7895 LocInfo = CCValAssign::BCvt;
7896 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7897 LocVT = MVT::i64;
7898 LocInfo = CCValAssign::BCvt;
7899 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7900 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7901 // registers are exhausted.
7902 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7903 // Depending on available argument GPRS, f64 may be passed in a pair of
7904 // GPRs, split between a GPR and the stack, or passed completely on the
7905 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7906 // cases.
7907 MCRegister Reg = allocateArgGPR(State);
7908 if (!Reg) {
7909 int64_t StackOffset = State.AllocateStack(8, Align(8));
7910 State.addLoc(
7911 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7912 return false;
7913 }
7914 LocVT = MVT::i32;
7915 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7916 MCRegister HiReg = allocateArgGPR(State);
7917 if (HiReg) {
7918 State.addLoc(
7919 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7920 } else {
7921 int64_t StackOffset = State.AllocateStack(4, Align(4));
7922 State.addLoc(
7923 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7924 }
7925 return false;
7926 }
7927
7928 // Split arguments might be passed indirectly, so keep track of the pending
7929 // values.
7930 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7931 LocVT = GRLenVT;
7932 LocInfo = CCValAssign::Indirect;
7933 PendingLocs.push_back(
7934 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7935 PendingArgFlags.push_back(ArgFlags);
7936 if (!ArgFlags.isSplitEnd()) {
7937 return false;
7938 }
7939 }
7940
7941 // If the split argument only had two elements, it should be passed directly
7942 // in registers or on the stack.
7943 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7944 PendingLocs.size() <= 2) {
7945 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7946 // Apply the normal calling convention rules to the first half of the
7947 // split argument.
7948 CCValAssign VA = PendingLocs[0];
7949 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7950 PendingLocs.clear();
7951 PendingArgFlags.clear();
7952 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7953 ArgFlags);
7954 }
7955
7956 // Allocate to a register if possible, or else a stack slot.
7957 Register Reg;
7958 unsigned StoreSizeBytes = GRLen / 8;
7959 Align StackAlign = Align(GRLen / 8);
7960
7961 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7962 Reg = State.AllocateReg(ArgFPR32s);
7963 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7964 Reg = State.AllocateReg(ArgFPR64s);
7965 } else if (ValVT.is128BitVector()) {
7966 Reg = State.AllocateReg(ArgVRs);
7967 UseGPRForFloat = false;
7968 StoreSizeBytes = 16;
7969 StackAlign = Align(16);
7970 } else if (ValVT.is256BitVector()) {
7971 Reg = State.AllocateReg(ArgXRs);
7972 UseGPRForFloat = false;
7973 StoreSizeBytes = 32;
7974 StackAlign = Align(32);
7975 } else {
7976 Reg = allocateArgGPR(State);
7977 }
7978
7979 unsigned StackOffset =
7980 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7981
7982 // If we reach this point and PendingLocs is non-empty, we must be at the
7983 // end of a split argument that must be passed indirectly.
7984 if (!PendingLocs.empty()) {
7985 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7986 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7987 for (auto &It : PendingLocs) {
7988 if (Reg)
7989 It.convertToReg(Reg);
7990 else
7991 It.convertToMem(StackOffset);
7992 State.addLoc(It);
7993 }
7994 PendingLocs.clear();
7995 PendingArgFlags.clear();
7996 return false;
7997 }
7998 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7999 "Expected an GRLenVT at this stage");
8000
8001 if (Reg) {
8002 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8003 return false;
8004 }
8005
8006 // When a floating-point value is passed on the stack, no bit-cast is needed.
8007 if (ValVT.isFloatingPoint()) {
8008 LocVT = ValVT;
8009 LocInfo = CCValAssign::Full;
8010 }
8011
8012 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
8013 return false;
8014}
8015
8016void LoongArchTargetLowering::analyzeInputArgs(
8017 MachineFunction &MF, CCState &CCInfo,
8018 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
8019 LoongArchCCAssignFn Fn) const {
8020 FunctionType *FType = MF.getFunction().getFunctionType();
8021 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8022 MVT ArgVT = Ins[i].VT;
8023 Type *ArgTy = nullptr;
8024 if (IsRet)
8025 ArgTy = FType->getReturnType();
8026 else if (Ins[i].isOrigArg())
8027 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8029 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8030 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8031 CCInfo, IsRet, ArgTy)) {
8032 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8033 << '\n');
8034 llvm_unreachable("");
8035 }
8036 }
8037}
8038
8039void LoongArchTargetLowering::analyzeOutputArgs(
8040 MachineFunction &MF, CCState &CCInfo,
8041 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8042 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8043 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8044 MVT ArgVT = Outs[i].VT;
8045 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8047 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8048 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8049 CCInfo, IsRet, OrigTy)) {
8050 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8051 << "\n");
8052 llvm_unreachable("");
8053 }
8054 }
8055}
8056
8057// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8058// values.
8060 const CCValAssign &VA, const SDLoc &DL) {
8061 switch (VA.getLocInfo()) {
8062 default:
8063 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8064 case CCValAssign::Full:
8066 break;
8067 case CCValAssign::BCvt:
8068 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8069 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8070 else
8071 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8072 break;
8073 }
8074 return Val;
8075}
8076
8078 const CCValAssign &VA, const SDLoc &DL,
8079 const ISD::InputArg &In,
8080 const LoongArchTargetLowering &TLI) {
8083 EVT LocVT = VA.getLocVT();
8084 SDValue Val;
8085 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8086 Register VReg = RegInfo.createVirtualRegister(RC);
8087 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8088 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8089
8090 // If input is sign extended from 32 bits, note it for the OptW pass.
8091 if (In.isOrigArg()) {
8092 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8093 if (OrigArg->getType()->isIntegerTy()) {
8094 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8095 // An input zero extended from i31 can also be considered sign extended.
8096 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8097 (BitWidth < 32 && In.Flags.isZExt())) {
8100 LAFI->addSExt32Register(VReg);
8101 }
8102 }
8103 }
8104
8105 return convertLocVTToValVT(DAG, Val, VA, DL);
8106}
8107
8108// The caller is responsible for loading the full value if the argument is
8109// passed with CCValAssign::Indirect.
8111 const CCValAssign &VA, const SDLoc &DL) {
8113 MachineFrameInfo &MFI = MF.getFrameInfo();
8114 EVT ValVT = VA.getValVT();
8115 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8116 /*IsImmutable=*/true);
8117 SDValue FIN = DAG.getFrameIndex(
8119
8120 ISD::LoadExtType ExtType;
8121 switch (VA.getLocInfo()) {
8122 default:
8123 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8124 case CCValAssign::Full:
8126 case CCValAssign::BCvt:
8127 ExtType = ISD::NON_EXTLOAD;
8128 break;
8129 }
8130 return DAG.getExtLoad(
8131 ExtType, DL, VA.getLocVT(), Chain, FIN,
8133}
8134
8136 const CCValAssign &VA,
8137 const CCValAssign &HiVA,
8138 const SDLoc &DL) {
8139 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8140 "Unexpected VA");
8142 MachineFrameInfo &MFI = MF.getFrameInfo();
8144
8145 assert(VA.isRegLoc() && "Expected register VA assignment");
8146
8147 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8148 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8149 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8150 SDValue Hi;
8151 if (HiVA.isMemLoc()) {
8152 // Second half of f64 is passed on the stack.
8153 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8154 /*IsImmutable=*/true);
8155 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8156 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8158 } else {
8159 // Second half of f64 is passed in another GPR.
8160 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8161 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8162 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8163 }
8164 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8165}
8166
8168 const CCValAssign &VA, const SDLoc &DL) {
8169 EVT LocVT = VA.getLocVT();
8170
8171 switch (VA.getLocInfo()) {
8172 default:
8173 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8174 case CCValAssign::Full:
8175 break;
8176 case CCValAssign::BCvt:
8177 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8178 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8179 else
8180 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8181 break;
8182 }
8183 return Val;
8184}
8185
8186static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8187 CCValAssign::LocInfo LocInfo,
8188 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8189 CCState &State) {
8190 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8191 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8192 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8193 static const MCPhysReg GPRList[] = {
8194 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8195 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8196 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8197 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8198 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8199 return false;
8200 }
8201 }
8202
8203 if (LocVT == MVT::f32) {
8204 // Pass in STG registers: F1, F2, F3, F4
8205 // fs0,fs1,fs2,fs3
8206 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8207 LoongArch::F26, LoongArch::F27};
8208 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8209 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8210 return false;
8211 }
8212 }
8213
8214 if (LocVT == MVT::f64) {
8215 // Pass in STG registers: D1, D2, D3, D4
8216 // fs4,fs5,fs6,fs7
8217 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8218 LoongArch::F30_64, LoongArch::F31_64};
8219 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8220 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8221 return false;
8222 }
8223 }
8224
8225 report_fatal_error("No registers left in GHC calling convention");
8226 return true;
8227}
8228
8229// Transform physical registers into virtual registers.
8231 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8232 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8233 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8234
8236 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8237
8238 switch (CallConv) {
8239 default:
8240 llvm_unreachable("Unsupported calling convention");
8241 case CallingConv::C:
8242 case CallingConv::Fast:
8245 break;
8246 case CallingConv::GHC:
8247 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8248 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8250 "GHC calling convention requires the F and D extensions");
8251 }
8252
8253 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8254 MVT GRLenVT = Subtarget.getGRLenVT();
8255 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8256 // Used with varargs to acumulate store chains.
8257 std::vector<SDValue> OutChains;
8258
8259 // Assign locations to all of the incoming arguments.
8261 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8262
8263 if (CallConv == CallingConv::GHC)
8265 else
8266 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8267
8268 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8269 CCValAssign &VA = ArgLocs[i];
8270 SDValue ArgValue;
8271 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8272 // case.
8273 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8274 assert(VA.needsCustom());
8275 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8276 } else if (VA.isRegLoc())
8277 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8278 else
8279 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8280 if (VA.getLocInfo() == CCValAssign::Indirect) {
8281 // If the original argument was split and passed by reference, we need to
8282 // load all parts of it here (using the same address).
8283 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8285 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8286 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8287 assert(ArgPartOffset == 0);
8288 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8289 CCValAssign &PartVA = ArgLocs[i + 1];
8290 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8291 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8292 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8293 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8295 ++i;
8296 ++InsIdx;
8297 }
8298 continue;
8299 }
8300 InVals.push_back(ArgValue);
8301 if (Ins[InsIdx].Flags.isByVal())
8302 LoongArchFI->addIncomingByValArgs(ArgValue);
8303 }
8304
8305 if (IsVarArg) {
8307 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8308 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8309 MachineFrameInfo &MFI = MF.getFrameInfo();
8310 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8311
8312 // Offset of the first variable argument from stack pointer, and size of
8313 // the vararg save area. For now, the varargs save area is either zero or
8314 // large enough to hold a0-a7.
8315 int VaArgOffset, VarArgsSaveSize;
8316
8317 // If all registers are allocated, then all varargs must be passed on the
8318 // stack and we don't need to save any argregs.
8319 if (ArgRegs.size() == Idx) {
8320 VaArgOffset = CCInfo.getStackSize();
8321 VarArgsSaveSize = 0;
8322 } else {
8323 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8324 VaArgOffset = -VarArgsSaveSize;
8325 }
8326
8327 // Record the frame index of the first variable argument
8328 // which is a value necessary to VASTART.
8329 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8330 LoongArchFI->setVarArgsFrameIndex(FI);
8331
8332 // If saving an odd number of registers then create an extra stack slot to
8333 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8334 // offsets to even-numbered registered remain 2*GRLen-aligned.
8335 if (Idx % 2) {
8336 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8337 true);
8338 VarArgsSaveSize += GRLenInBytes;
8339 }
8340
8341 // Copy the integer registers that may have been used for passing varargs
8342 // to the vararg save area.
8343 for (unsigned I = Idx; I < ArgRegs.size();
8344 ++I, VaArgOffset += GRLenInBytes) {
8345 const Register Reg = RegInfo.createVirtualRegister(RC);
8346 RegInfo.addLiveIn(ArgRegs[I], Reg);
8347 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8348 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8349 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8350 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8352 cast<StoreSDNode>(Store.getNode())
8353 ->getMemOperand()
8354 ->setValue((Value *)nullptr);
8355 OutChains.push_back(Store);
8356 }
8357 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8358 }
8359
8360 LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8361
8362 // All stores are grouped in one node to allow the matching between
8363 // the size of Ins and InVals. This only happens for vararg functions.
8364 if (!OutChains.empty()) {
8365 OutChains.push_back(Chain);
8366 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8367 }
8368
8369 return Chain;
8370}
8371
8373 return CI->isTailCall();
8374}
8375
8376// Check if the return value is used as only a return value, as otherwise
8377// we can't perform a tail-call.
8379 SDValue &Chain) const {
8380 if (N->getNumValues() != 1)
8381 return false;
8382 if (!N->hasNUsesOfValue(1, 0))
8383 return false;
8384
8385 SDNode *Copy = *N->user_begin();
8386 if (Copy->getOpcode() != ISD::CopyToReg)
8387 return false;
8388
8389 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8390 // isn't safe to perform a tail call.
8391 if (Copy->getGluedNode())
8392 return false;
8393
8394 // The copy must be used by a LoongArchISD::RET, and nothing else.
8395 bool HasRet = false;
8396 for (SDNode *Node : Copy->users()) {
8397 if (Node->getOpcode() != LoongArchISD::RET)
8398 return false;
8399 HasRet = true;
8400 }
8401
8402 if (!HasRet)
8403 return false;
8404
8405 Chain = Copy->getOperand(0);
8406 return true;
8407}
8408
8409// Check whether the call is eligible for tail call optimization.
8410bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8411 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8412 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8413
8414 auto CalleeCC = CLI.CallConv;
8415 auto &Outs = CLI.Outs;
8416 auto &Caller = MF.getFunction();
8417 auto CallerCC = Caller.getCallingConv();
8418 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8419
8420 // If the stack arguments for this call do not fit into our own save area then
8421 // the call cannot be made tail.
8422 if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8423 return false;
8424
8425 // Do not tail call opt if any parameters need to be passed indirectly.
8426 for (auto &VA : ArgLocs)
8427 if (VA.getLocInfo() == CCValAssign::Indirect)
8428 return false;
8429
8430 // Do not tail call opt if either caller or callee uses struct return
8431 // semantics.
8432 auto IsCallerStructRet = Caller.hasStructRetAttr();
8433 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8434 if (IsCallerStructRet != IsCalleeStructRet)
8435 return false;
8436
8437 // Do not tail call opt if caller's and callee's byval arguments do not match.
8438 for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
8439 if (!Outs[i].Flags.isByVal())
8440 continue;
8441 if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8442 return false;
8443 if (LoongArchFI->getIncomingByValArgs(i).getValueType() != Outs[i].ArgVT)
8444 return false;
8445 }
8446
8447 // The callee has to preserve all registers the caller needs to preserve.
8448 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8449 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8450 if (CalleeCC != CallerCC) {
8451 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8452 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8453 return false;
8454 }
8455
8456 // If the callee takes no arguments then go on to check the results of the
8457 // call.
8458 const MachineRegisterInfo &MRI = MF.getRegInfo();
8459 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8460 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
8461 return false;
8462
8463 return true;
8464}
8465
8467 return DAG.getDataLayout().getPrefTypeAlign(
8468 VT.getTypeForEVT(*DAG.getContext()));
8469}
8470
8471// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8472// and output parameter nodes.
8473SDValue
8475 SmallVectorImpl<SDValue> &InVals) const {
8476 SelectionDAG &DAG = CLI.DAG;
8477 SDLoc &DL = CLI.DL;
8479 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8481 SDValue Chain = CLI.Chain;
8482 SDValue Callee = CLI.Callee;
8483 CallingConv::ID CallConv = CLI.CallConv;
8484 bool IsVarArg = CLI.IsVarArg;
8485 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8486 MVT GRLenVT = Subtarget.getGRLenVT();
8487 bool &IsTailCall = CLI.IsTailCall;
8488
8490 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8491
8492 // Analyze the operands of the call, assigning locations to each operand.
8494 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8495
8496 if (CallConv == CallingConv::GHC)
8497 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8498 else
8499 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8500
8501 // Check if it's really possible to do a tail call.
8502 if (IsTailCall)
8503 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8504
8505 if (IsTailCall)
8506 ++NumTailCalls;
8507 else if (CLI.CB && CLI.CB->isMustTailCall())
8508 report_fatal_error("failed to perform tail call elimination on a call "
8509 "site marked musttail");
8510
8511 // Get a count of how many bytes are to be pushed on the stack.
8512 unsigned NumBytes = ArgCCInfo.getStackSize();
8513
8514 // Create local copies for byval args.
8515 SmallVector<SDValue> ByValArgs;
8516 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
8517 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8518 if (!Flags.isByVal())
8519 continue;
8520
8521 SDValue Arg = OutVals[i];
8522 unsigned Size = Flags.getByValSize();
8523 Align Alignment = Flags.getNonZeroByValAlign();
8524 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8525 SDValue Dst;
8526
8527 if (IsTailCall) {
8528 SDValue CallerArg = LoongArchFI->getIncomingByValArgs(j++);
8531 Dst = CallerArg;
8532 } else {
8533 int FI =
8534 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8535 Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8536 }
8537 if (Dst) {
8538 Chain =
8539 DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment,
8540 /*IsVolatile=*/false,
8541 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8543 ByValArgs.push_back(Dst);
8544 }
8545 }
8546
8547 if (!IsTailCall)
8548 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8549
8550 // During a tail call, stores to the argument area must happen after all of
8551 // the function's incoming arguments have been loaded because they may alias.
8552 // This is done by folding in a TokenFactor from LowerFormalArguments, but
8553 // there's no point in doing so repeatedly so this tracks whether that's
8554 // happened yet.
8555 bool AfterFormalArgLoads = false;
8556
8557 // Copy argument values to their designated locations.
8559 SmallVector<SDValue> MemOpChains;
8560 SDValue StackPtr;
8561 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8562 ++i, ++OutIdx) {
8563 CCValAssign &VA = ArgLocs[i];
8564 SDValue ArgValue = OutVals[OutIdx];
8565 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8566
8567 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8568 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8569 assert(VA.isRegLoc() && "Expected register VA assignment");
8570 assert(VA.needsCustom());
8571 SDValue SplitF64 =
8572 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8573 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8574 SDValue Lo = SplitF64.getValue(0);
8575 SDValue Hi = SplitF64.getValue(1);
8576
8577 Register RegLo = VA.getLocReg();
8578 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8579
8580 // Get the CCValAssign for the Hi part.
8581 CCValAssign &HiVA = ArgLocs[++i];
8582
8583 if (HiVA.isMemLoc()) {
8584 // Second half of f64 is passed on the stack.
8585 if (!StackPtr.getNode())
8586 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8588 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8589 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8590 // Emit the store.
8591 MemOpChains.push_back(DAG.getStore(
8592 Chain, DL, Hi, Address,
8594 } else {
8595 // Second half of f64 is passed in another GPR.
8596 Register RegHigh = HiVA.getLocReg();
8597 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8598 }
8599 continue;
8600 }
8601
8602 // Promote the value if needed.
8603 // For now, only handle fully promoted and indirect arguments.
8604 if (VA.getLocInfo() == CCValAssign::Indirect) {
8605 // Store the argument in a stack slot and pass its address.
8606 Align StackAlign =
8607 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8608 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8609 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8610 // If the original argument was split and passed by reference, we need to
8611 // store the required parts of it here (and pass just one address).
8612 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8613 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8614 assert(ArgPartOffset == 0);
8615 // Calculate the total size to store. We don't have access to what we're
8616 // actually storing other than performing the loop and collecting the
8617 // info.
8619 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8620 SDValue PartValue = OutVals[OutIdx + 1];
8621 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8622 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8623 EVT PartVT = PartValue.getValueType();
8624
8625 StoredSize += PartVT.getStoreSize();
8626 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8627 Parts.push_back(std::make_pair(PartValue, Offset));
8628 ++i;
8629 ++OutIdx;
8630 }
8631 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8632 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8633 MemOpChains.push_back(
8634 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8636 for (const auto &Part : Parts) {
8637 SDValue PartValue = Part.first;
8638 SDValue PartOffset = Part.second;
8640 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8641 MemOpChains.push_back(
8642 DAG.getStore(Chain, DL, PartValue, Address,
8644 }
8645 ArgValue = SpillSlot;
8646 } else {
8647 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8648 }
8649
8650 // Use local copy if it is a byval arg.
8651 if (Flags.isByVal()) {
8652 if (!IsTailCall || (isa<GlobalAddressSDNode>(ArgValue) ||
8653 isa<ExternalSymbolSDNode>(ArgValue) ||
8654 isa<FrameIndexSDNode>(ArgValue)))
8655 ArgValue = ByValArgs[j++];
8656 }
8657
8658 if (VA.isRegLoc()) {
8659 // Queue up the argument copies and emit them at the end.
8660 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8661 } else {
8662 assert(VA.isMemLoc() && "Argument not register or memory");
8663 SDValue DstAddr;
8664 MachinePointerInfo DstInfo;
8665 int32_t Offset = VA.getLocMemOffset();
8666
8667 // Work out the address of the stack slot.
8668 if (!StackPtr.getNode())
8669 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8670
8671 if (IsTailCall) {
8672 unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8);
8673 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
8674 DstAddr = DAG.getFrameIndex(FI, PtrVT);
8675 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8676 if (!AfterFormalArgLoads) {
8677 Chain = DAG.getStackArgumentTokenFactor(Chain);
8678 AfterFormalArgLoads = true;
8679 }
8680 } else {
8681 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
8682 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
8683 DstInfo = MachinePointerInfo::getStack(MF, Offset);
8684 }
8685
8686 // Emit the store.
8687 MemOpChains.push_back(
8688 DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
8689 }
8690 }
8691
8692 // Join the stores, which are independent of one another.
8693 if (!MemOpChains.empty())
8694 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8695
8696 SDValue Glue;
8697
8698 // Build a sequence of copy-to-reg nodes, chained and glued together.
8699 for (auto &Reg : RegsToPass) {
8700 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8701 Glue = Chain.getValue(1);
8702 }
8703
8704 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8705 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8706 // split it and then direct call can be matched by PseudoCALL_SMALL.
8708 const GlobalValue *GV = S->getGlobal();
8709 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8712 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8713 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8714 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8717 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8718 }
8719
8720 // The first call operand is the chain and the second is the target address.
8722 Ops.push_back(Chain);
8723 Ops.push_back(Callee);
8724
8725 // Add argument registers to the end of the list so that they are
8726 // known live into the call.
8727 for (auto &Reg : RegsToPass)
8728 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8729
8730 if (!IsTailCall) {
8731 // Add a register mask operand representing the call-preserved registers.
8732 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8733 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8734 assert(Mask && "Missing call preserved mask for calling convention");
8735 Ops.push_back(DAG.getRegisterMask(Mask));
8736 }
8737
8738 // Glue the call to the argument copies, if any.
8739 if (Glue.getNode())
8740 Ops.push_back(Glue);
8741
8742 // Emit the call.
8743 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8744 unsigned Op;
8745 switch (DAG.getTarget().getCodeModel()) {
8746 default:
8747 report_fatal_error("Unsupported code model");
8748 case CodeModel::Small:
8749 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8750 break;
8751 case CodeModel::Medium:
8752 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8753 break;
8754 case CodeModel::Large:
8755 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8756 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8757 break;
8758 }
8759
8760 if (IsTailCall) {
8762 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8763 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8764 return Ret;
8765 }
8766
8767 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8768 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8769 Glue = Chain.getValue(1);
8770
8771 // Mark the end of the call, which is glued to the call itself.
8772 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8773 Glue = Chain.getValue(1);
8774
8775 // Assign locations to each value returned by this call.
8777 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8778 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8779
8780 // Copy all of the result registers out of their specified physreg.
8781 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8782 auto &VA = RVLocs[i];
8783 // Copy the value out.
8784 SDValue RetValue =
8785 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8786 // Glue the RetValue to the end of the call sequence.
8787 Chain = RetValue.getValue(1);
8788 Glue = RetValue.getValue(2);
8789
8790 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8791 assert(VA.needsCustom());
8792 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8793 MVT::i32, Glue);
8794 Chain = RetValue2.getValue(1);
8795 Glue = RetValue2.getValue(2);
8796 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8797 RetValue, RetValue2);
8798 } else
8799 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8800
8801 InVals.push_back(RetValue);
8802 }
8803
8804 return Chain;
8805}
8806
8808 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8809 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8810 const Type *RetTy) const {
8812 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8813
8814 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8815 LoongArchABI::ABI ABI =
8816 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8817 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8818 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8819 return false;
8820 }
8821 return true;
8822}
8823
8825 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8827 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8828 SelectionDAG &DAG) const {
8829 // Stores the assignment of the return value to a location.
8831
8832 // Info about the registers and stack slot.
8833 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8834 *DAG.getContext());
8835
8836 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8837 nullptr, CC_LoongArch);
8838 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8839 report_fatal_error("GHC functions return void only");
8840 SDValue Glue;
8841 SmallVector<SDValue, 4> RetOps(1, Chain);
8842
8843 // Copy the result values into the output registers.
8844 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8845 SDValue Val = OutVals[OutIdx];
8846 CCValAssign &VA = RVLocs[i];
8847 assert(VA.isRegLoc() && "Can only return in registers!");
8848
8849 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8850 // Handle returning f64 on LA32D with a soft float ABI.
8851 assert(VA.isRegLoc() && "Expected return via registers");
8852 assert(VA.needsCustom());
8853 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8854 DAG.getVTList(MVT::i32, MVT::i32), Val);
8855 SDValue Lo = SplitF64.getValue(0);
8856 SDValue Hi = SplitF64.getValue(1);
8857 Register RegLo = VA.getLocReg();
8858 Register RegHi = RVLocs[++i].getLocReg();
8859
8860 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8861 Glue = Chain.getValue(1);
8862 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8863 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8864 Glue = Chain.getValue(1);
8865 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8866 } else {
8867 // Handle a 'normal' return.
8868 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8869 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8870
8871 // Guarantee that all emitted copies are stuck together.
8872 Glue = Chain.getValue(1);
8873 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8874 }
8875 }
8876
8877 RetOps[0] = Chain; // Update chain.
8878
8879 // Add the glue node if we have it.
8880 if (Glue.getNode())
8881 RetOps.push_back(Glue);
8882
8883 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8884}
8885
8886// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8887// Note: The following prefixes are excluded:
8888// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8889// as they can be represented using [x]vrepli.[whb]
8891 const APInt &SplatValue, const unsigned SplatBitSize) const {
8892 uint64_t RequiredImm = 0;
8893 uint64_t V = SplatValue.getZExtValue();
8894 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8895 // 4'b0101
8896 RequiredImm = (0b10101 << 8) | (V >> 8);
8897 return {true, RequiredImm};
8898 } else if (SplatBitSize == 32) {
8899 // 4'b0001
8900 if (!(V & 0xFFFF00FF)) {
8901 RequiredImm = (0b10001 << 8) | (V >> 8);
8902 return {true, RequiredImm};
8903 }
8904 // 4'b0010
8905 if (!(V & 0xFF00FFFF)) {
8906 RequiredImm = (0b10010 << 8) | (V >> 16);
8907 return {true, RequiredImm};
8908 }
8909 // 4'b0011
8910 if (!(V & 0x00FFFFFF)) {
8911 RequiredImm = (0b10011 << 8) | (V >> 24);
8912 return {true, RequiredImm};
8913 }
8914 // 4'b0110
8915 if ((V & 0xFFFF00FF) == 0xFF) {
8916 RequiredImm = (0b10110 << 8) | (V >> 8);
8917 return {true, RequiredImm};
8918 }
8919 // 4'b0111
8920 if ((V & 0xFF00FFFF) == 0xFFFF) {
8921 RequiredImm = (0b10111 << 8) | (V >> 16);
8922 return {true, RequiredImm};
8923 }
8924 // 4'b1010
8925 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8926 RequiredImm =
8927 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8928 return {true, RequiredImm};
8929 }
8930 } else if (SplatBitSize == 64) {
8931 // 4'b1011
8932 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8933 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8934 RequiredImm =
8935 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8936 return {true, RequiredImm};
8937 }
8938 // 4'b1100
8939 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8940 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8941 RequiredImm =
8942 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8943 return {true, RequiredImm};
8944 }
8945 // 4'b1001
8946 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8947 uint8_t res = 0;
8948 for (int i = 0; i < 8; ++i) {
8949 uint8_t byte = x & 0xFF;
8950 if (byte == 0 || byte == 0xFF)
8951 res |= ((byte & 1) << i);
8952 else
8953 return {false, 0};
8954 x >>= 8;
8955 }
8956 return {true, res};
8957 };
8958 auto [IsSame, Suffix] = sameBitsPreByte(V);
8959 if (IsSame) {
8960 RequiredImm = (0b11001 << 8) | Suffix;
8961 return {true, RequiredImm};
8962 }
8963 }
8964 return {false, RequiredImm};
8965}
8966
8968 EVT VT) const {
8969 if (!Subtarget.hasExtLSX())
8970 return false;
8971
8972 if (VT == MVT::f32) {
8973 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8974 return (masked == 0x3e000000 || masked == 0x40000000);
8975 }
8976
8977 if (VT == MVT::f64) {
8978 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8979 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8980 }
8981
8982 return false;
8983}
8984
8985bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8986 bool ForCodeSize) const {
8987 // TODO: Maybe need more checks here after vector extension is supported.
8988 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8989 return false;
8990 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8991 return false;
8992 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8993}
8994
8996 return true;
8997}
8998
9000 return true;
9001}
9002
9003bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
9004 const Instruction *I) const {
9005 if (!Subtarget.is64Bit())
9006 return isa<LoadInst>(I) || isa<StoreInst>(I);
9007
9008 if (isa<LoadInst>(I))
9009 return true;
9010
9011 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
9012 // require fences beacuse we can use amswap_db.[w/d].
9013 Type *Ty = I->getOperand(0)->getType();
9014 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
9015 unsigned Size = Ty->getIntegerBitWidth();
9016 return (Size == 8 || Size == 16);
9017 }
9018
9019 return false;
9020}
9021
9023 LLVMContext &Context,
9024 EVT VT) const {
9025 if (!VT.isVector())
9026 return getPointerTy(DL);
9028}
9029
9031 EVT VT = Y.getValueType();
9032
9033 if (VT.isVector())
9034 return Subtarget.hasExtLSX() && VT.isInteger();
9035
9036 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9037}
9038
9041 MachineFunction &MF, unsigned Intrinsic) const {
9042 switch (Intrinsic) {
9043 default:
9044 return;
9045 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9046 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9047 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9048 case Intrinsic::loongarch_masked_atomicrmw_nand_i32: {
9049 IntrinsicInfo Info;
9051 Info.memVT = MVT::i32;
9052 Info.ptrVal = I.getArgOperand(0);
9053 Info.offset = 0;
9054 Info.align = Align(4);
9057 Infos.push_back(Info);
9058 return;
9059 // TODO: Add more Intrinsics later.
9060 }
9061 }
9062}
9063
9064// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9065// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9066// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9067// regression, we need to implement it manually.
9070
9072 Op == AtomicRMWInst::And) &&
9073 "Unable to expand");
9074 unsigned MinWordSize = 4;
9075
9076 IRBuilder<> Builder(AI);
9077 LLVMContext &Ctx = Builder.getContext();
9078 const DataLayout &DL = AI->getDataLayout();
9079 Type *ValueType = AI->getType();
9080 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9081
9082 Value *Addr = AI->getPointerOperand();
9083 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9084 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9085
9086 Value *AlignedAddr = Builder.CreateIntrinsic(
9087 Intrinsic::ptrmask, {PtrTy, IntTy},
9088 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9089 "AlignedAddr");
9090
9091 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9092 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9093 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9094 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9095 Value *Mask = Builder.CreateShl(
9096 ConstantInt::get(WordType,
9097 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9098 ShiftAmt, "Mask");
9099 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9100 Value *ValOperand_Shifted =
9101 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9102 ShiftAmt, "ValOperand_Shifted");
9103 Value *NewOperand;
9104 if (Op == AtomicRMWInst::And)
9105 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9106 else
9107 NewOperand = ValOperand_Shifted;
9108
9109 AtomicRMWInst *NewAI =
9110 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9111 AI->getOrdering(), AI->getSyncScopeID());
9112
9113 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9114 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9115 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9116 AI->replaceAllUsesWith(FinalOldResult);
9117 AI->eraseFromParent();
9118}
9119
9122 const AtomicRMWInst *AI) const {
9123 // TODO: Add more AtomicRMWInst that needs to be extended.
9124
9125 // Since floating-point operation requires a non-trivial set of data
9126 // operations, use CmpXChg to expand.
9127 if (AI->isFloatingPointOperation() ||
9133
9134 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9137 AI->getOperation() == AtomicRMWInst::Sub)) {
9139 }
9140
9141 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9142 if (Subtarget.hasLAMCAS()) {
9143 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9147 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9149 }
9150
9151 if (Size == 8 || Size == 16)
9154}
9155
9156static Intrinsic::ID
9158 AtomicRMWInst::BinOp BinOp) {
9159 if (GRLen == 64) {
9160 switch (BinOp) {
9161 default:
9162 llvm_unreachable("Unexpected AtomicRMW BinOp");
9164 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9165 case AtomicRMWInst::Add:
9166 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9167 case AtomicRMWInst::Sub:
9168 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9170 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9172 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9174 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9175 case AtomicRMWInst::Max:
9176 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9177 case AtomicRMWInst::Min:
9178 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9179 // TODO: support other AtomicRMWInst.
9180 }
9181 }
9182
9183 if (GRLen == 32) {
9184 switch (BinOp) {
9185 default:
9186 llvm_unreachable("Unexpected AtomicRMW BinOp");
9188 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9189 case AtomicRMWInst::Add:
9190 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9191 case AtomicRMWInst::Sub:
9192 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9194 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9196 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9198 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9199 case AtomicRMWInst::Max:
9200 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9201 case AtomicRMWInst::Min:
9202 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9203 // TODO: support other AtomicRMWInst.
9204 }
9205 }
9206
9207 llvm_unreachable("Unexpected GRLen\n");
9208}
9209
9212 const AtomicCmpXchgInst *CI) const {
9213
9214 if (Subtarget.hasLAMCAS())
9216
9218 if (Size == 8 || Size == 16)
9221}
9222
9224 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9225 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9226 unsigned GRLen = Subtarget.getGRLen();
9227 AtomicOrdering FailOrd = CI->getFailureOrdering();
9228 Value *FailureOrdering =
9229 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9230 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9231 if (GRLen == 64) {
9232 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9233 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9234 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9235 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9236 }
9237 Type *Tys[] = {AlignedAddr->getType()};
9238 Value *Result = Builder.CreateIntrinsic(
9239 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9240 if (GRLen == 64)
9241 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9242 return Result;
9243}
9244
9246 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9247 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9248 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9249 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9250 // mask, as this produces better code than the LL/SC loop emitted by
9251 // int_loongarch_masked_atomicrmw_xchg.
9252 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9255 if (CVal->isZero())
9256 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9257 Builder.CreateNot(Mask, "Inv_Mask"),
9258 AI->getAlign(), Ord);
9259 if (CVal->isMinusOne())
9260 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9261 AI->getAlign(), Ord);
9262 }
9263
9264 unsigned GRLen = Subtarget.getGRLen();
9265 Value *Ordering =
9266 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9267 Type *Tys[] = {AlignedAddr->getType()};
9269 AI->getModule(),
9271
9272 if (GRLen == 64) {
9273 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9274 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9275 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9276 }
9277
9278 Value *Result;
9279
9280 // Must pass the shift amount needed to sign extend the loaded value prior
9281 // to performing a signed comparison for min/max. ShiftAmt is the number of
9282 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9283 // is the number of bits to left+right shift the value in order to
9284 // sign-extend.
9285 if (AI->getOperation() == AtomicRMWInst::Min ||
9287 const DataLayout &DL = AI->getDataLayout();
9288 unsigned ValWidth =
9289 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9290 Value *SextShamt =
9291 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9292 Result = Builder.CreateCall(LlwOpScwLoop,
9293 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9294 } else {
9295 Result =
9296 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9297 }
9298
9299 if (GRLen == 64)
9300 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9301 return Result;
9302}
9303
9305 const MachineFunction &MF, EVT VT) const {
9306 VT = VT.getScalarType();
9307
9308 if (!VT.isSimple())
9309 return false;
9310
9311 switch (VT.getSimpleVT().SimpleTy) {
9312 case MVT::f32:
9313 case MVT::f64:
9314 return true;
9315 default:
9316 break;
9317 }
9318
9319 return false;
9320}
9321
9323 const Constant *PersonalityFn) const {
9324 return LoongArch::R4;
9325}
9326
9328 const Constant *PersonalityFn) const {
9329 return LoongArch::R5;
9330}
9331
9332//===----------------------------------------------------------------------===//
9333// Target Optimization Hooks
9334//===----------------------------------------------------------------------===//
9335
9337 const LoongArchSubtarget &Subtarget) {
9338 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9339 // IEEE float has 23 digits and double has 52 digits.
9340 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9341 return RefinementSteps;
9342}
9343
9345 SelectionDAG &DAG, int Enabled,
9346 int &RefinementSteps,
9347 bool &UseOneConstNR,
9348 bool Reciprocal) const {
9349 if (Subtarget.hasFrecipe()) {
9350 SDLoc DL(Operand);
9351 EVT VT = Operand.getValueType();
9352
9353 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9354 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9355 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9356 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9357 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9358
9359 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9360 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9361
9362 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9363 if (Reciprocal)
9364 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9365
9366 return Estimate;
9367 }
9368 }
9369
9370 return SDValue();
9371}
9372
9374 SelectionDAG &DAG,
9375 int Enabled,
9376 int &RefinementSteps) const {
9377 if (Subtarget.hasFrecipe()) {
9378 SDLoc DL(Operand);
9379 EVT VT = Operand.getValueType();
9380
9381 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9382 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9383 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9384 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9385 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9386
9387 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9388 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9389
9390 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9391 }
9392 }
9393
9394 return SDValue();
9395}
9396
9397//===----------------------------------------------------------------------===//
9398// LoongArch Inline Assembly Support
9399//===----------------------------------------------------------------------===//
9400
9402LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9403 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9404 //
9405 // 'f': A floating-point register (if available).
9406 // 'k': A memory operand whose address is formed by a base register and
9407 // (optionally scaled) index register.
9408 // 'l': A signed 16-bit constant.
9409 // 'm': A memory operand whose address is formed by a base register and
9410 // offset that is suitable for use in instructions with the same
9411 // addressing mode as st.w and ld.w.
9412 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9413 // instruction)
9414 // 'I': A signed 12-bit constant (for arithmetic instructions).
9415 // 'J': Integer zero.
9416 // 'K': An unsigned 12-bit constant (for logic instructions).
9417 // "ZB": An address that is held in a general-purpose register. The offset is
9418 // zero.
9419 // "ZC": A memory operand whose address is formed by a base register and
9420 // offset that is suitable for use in instructions with the same
9421 // addressing mode as ll.w and sc.w.
9422 if (Constraint.size() == 1) {
9423 switch (Constraint[0]) {
9424 default:
9425 break;
9426 case 'f':
9427 case 'q':
9428 return C_RegisterClass;
9429 case 'l':
9430 case 'I':
9431 case 'J':
9432 case 'K':
9433 return C_Immediate;
9434 case 'k':
9435 return C_Memory;
9436 }
9437 }
9438
9439 if (Constraint == "ZC" || Constraint == "ZB")
9440 return C_Memory;
9441
9442 // 'm' is handled here.
9443 return TargetLowering::getConstraintType(Constraint);
9444}
9445
9446InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9447 StringRef ConstraintCode) const {
9448 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9452 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9453}
9454
9455std::pair<unsigned, const TargetRegisterClass *>
9456LoongArchTargetLowering::getRegForInlineAsmConstraint(
9457 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9458 // First, see if this is a constraint that directly corresponds to a LoongArch
9459 // register class.
9460 if (Constraint.size() == 1) {
9461 switch (Constraint[0]) {
9462 case 'r':
9463 // TODO: Support fixed vectors up to GRLen?
9464 if (VT.isVector())
9465 break;
9466 return std::make_pair(0U, &LoongArch::GPRRegClass);
9467 case 'q':
9468 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9469 case 'f':
9470 if (Subtarget.hasBasicF() && VT == MVT::f32)
9471 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9472 if (Subtarget.hasBasicD() && VT == MVT::f64)
9473 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9474 if (Subtarget.hasExtLSX() &&
9475 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9476 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9477 if (Subtarget.hasExtLASX() &&
9478 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9479 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9480 break;
9481 default:
9482 break;
9483 }
9484 }
9485
9486 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9487 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9488 // constraints while the official register name is prefixed with a '$'. So we
9489 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9490 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9491 // case insensitive, so no need to convert the constraint to upper case here.
9492 //
9493 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9494 // decode the usage of register name aliases into their official names. And
9495 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9496 // official register names.
9497 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9498 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9499 bool IsFP = Constraint[2] == 'f';
9500 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9501 std::pair<unsigned, const TargetRegisterClass *> R;
9503 TRI, join_items("", Temp.first, Temp.second), VT);
9504 // Match those names to the widest floating point register type available.
9505 if (IsFP) {
9506 unsigned RegNo = R.first;
9507 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9508 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9509 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9510 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9511 }
9512 }
9513 }
9514 return R;
9515 }
9516
9517 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9518}
9519
9520void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9521 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9522 SelectionDAG &DAG) const {
9523 // Currently only support length 1 constraints.
9524 if (Constraint.size() == 1) {
9525 switch (Constraint[0]) {
9526 case 'l':
9527 // Validate & create a 16-bit signed immediate operand.
9528 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9529 uint64_t CVal = C->getSExtValue();
9530 if (isInt<16>(CVal))
9531 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9532 Subtarget.getGRLenVT()));
9533 }
9534 return;
9535 case 'I':
9536 // Validate & create a 12-bit signed immediate operand.
9537 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9538 uint64_t CVal = C->getSExtValue();
9539 if (isInt<12>(CVal))
9540 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9541 Subtarget.getGRLenVT()));
9542 }
9543 return;
9544 case 'J':
9545 // Validate & create an integer zero operand.
9546 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9547 if (C->getZExtValue() == 0)
9548 Ops.push_back(
9549 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9550 return;
9551 case 'K':
9552 // Validate & create a 12-bit unsigned immediate operand.
9553 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9554 uint64_t CVal = C->getZExtValue();
9555 if (isUInt<12>(CVal))
9556 Ops.push_back(
9557 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9558 }
9559 return;
9560 default:
9561 break;
9562 }
9563 }
9565}
9566
9567#define GET_REGISTER_MATCHER
9568#include "LoongArchGenAsmMatcher.inc"
9569
9572 const MachineFunction &MF) const {
9573 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9574 std::string NewRegName = Name.second.str();
9575 Register Reg = MatchRegisterAltName(NewRegName);
9576 if (!Reg)
9577 Reg = MatchRegisterName(NewRegName);
9578 if (!Reg)
9579 return Reg;
9580 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9581 if (!ReservedRegs.test(Reg))
9582 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9583 StringRef(RegName) + "\"."));
9584 return Reg;
9585}
9586
9588 EVT VT, SDValue C) const {
9589 // TODO: Support vectors.
9590 if (!VT.isScalarInteger())
9591 return false;
9592
9593 // Omit the optimization if the data size exceeds GRLen.
9594 if (VT.getSizeInBits() > Subtarget.getGRLen())
9595 return false;
9596
9597 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9598 const APInt &Imm = ConstNode->getAPIntValue();
9599 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9600 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9601 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9602 return true;
9603 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9604 if (ConstNode->hasOneUse() &&
9605 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9606 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9607 return true;
9608 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9609 // in which the immediate has two set bits. Or Break (MUL x, imm)
9610 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9611 // equals to (1 << s0) - (1 << s1).
9612 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9613 unsigned Shifts = Imm.countr_zero();
9614 // Reject immediates which can be composed via a single LUI.
9615 if (Shifts >= 12)
9616 return false;
9617 // Reject multiplications can be optimized to
9618 // (SLLI (ALSL x, x, 1/2/3/4), s).
9619 APInt ImmPop = Imm.ashr(Shifts);
9620 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9621 return false;
9622 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9623 // since it needs one more instruction than other 3 cases.
9624 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9625 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9626 (ImmSmall - Imm).isPowerOf2())
9627 return true;
9628 }
9629 }
9630
9631 return false;
9632}
9633
9635 const AddrMode &AM,
9636 Type *Ty, unsigned AS,
9637 Instruction *I) const {
9638 // LoongArch has four basic addressing modes:
9639 // 1. reg
9640 // 2. reg + 12-bit signed offset
9641 // 3. reg + 14-bit signed offset left-shifted by 2
9642 // 4. reg1 + reg2
9643 // TODO: Add more checks after support vector extension.
9644
9645 // No global is ever allowed as a base.
9646 if (AM.BaseGV)
9647 return false;
9648
9649 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9650 // with `UAL` feature.
9651 if (!isInt<12>(AM.BaseOffs) &&
9652 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9653 return false;
9654
9655 switch (AM.Scale) {
9656 case 0:
9657 // "r+i" or just "i", depending on HasBaseReg.
9658 break;
9659 case 1:
9660 // "r+r+i" is not allowed.
9661 if (AM.HasBaseReg && AM.BaseOffs)
9662 return false;
9663 // Otherwise we have "r+r" or "r+i".
9664 break;
9665 case 2:
9666 // "2*r+r" or "2*r+i" is not allowed.
9667 if (AM.HasBaseReg || AM.BaseOffs)
9668 return false;
9669 // Allow "2*r" as "r+r".
9670 break;
9671 default:
9672 return false;
9673 }
9674
9675 return true;
9676}
9677
9679 return isInt<12>(Imm);
9680}
9681
9683 return isInt<12>(Imm);
9684}
9685
9687 // Zexts are free if they can be combined with a load.
9688 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9689 // poorly with type legalization of compares preferring sext.
9690 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9691 EVT MemVT = LD->getMemoryVT();
9692 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9693 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9694 LD->getExtensionType() == ISD::ZEXTLOAD))
9695 return true;
9696 }
9697
9698 return TargetLowering::isZExtFree(Val, VT2);
9699}
9700
9702 EVT DstVT) const {
9703 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9704}
9705
9707 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9708}
9709
9711 // TODO: Support vectors.
9712 if (Y.getValueType().isVector())
9713 return false;
9714
9715 return !isa<ConstantSDNode>(Y);
9716}
9717
9719 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9720 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9721}
9722
9724 Type *Ty, bool IsSigned) const {
9725 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9726 return true;
9727
9728 return IsSigned;
9729}
9730
9732 // Return false to suppress the unnecessary extensions if the LibCall
9733 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9734 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9735 Type.getSizeInBits() < Subtarget.getGRLen()))
9736 return false;
9737 return true;
9738}
9739
9740// memcpy, and other memory intrinsics, typically tries to use wider load/store
9741// if the source/dest is aligned and the copy size is large enough. We therefore
9742// want to align such objects passed to memory intrinsics.
9744 unsigned &MinSize,
9745 Align &PrefAlign) const {
9746 if (!isa<MemIntrinsic>(CI))
9747 return false;
9748
9749 if (Subtarget.is64Bit()) {
9750 MinSize = 8;
9751 PrefAlign = Align(8);
9752 } else {
9753 MinSize = 4;
9754 PrefAlign = Align(4);
9755 }
9756
9757 return true;
9758}
9759
9768
9769bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9770 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9771 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9772 bool IsABIRegCopy = CC.has_value();
9773 EVT ValueVT = Val.getValueType();
9774
9775 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9776 PartVT == MVT::f32) {
9777 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9778 // nan, and cast to f32.
9779 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9780 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9781 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9782 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9783 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9784 Parts[0] = Val;
9785 return true;
9786 }
9787
9788 return false;
9789}
9790
9791SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9792 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9793 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9794 bool IsABIRegCopy = CC.has_value();
9795
9796 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9797 PartVT == MVT::f32) {
9798 SDValue Val = Parts[0];
9799
9800 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9801 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9802 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9803 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9804 return Val;
9805 }
9806
9807 return SDValue();
9808}
9809
9810MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9811 CallingConv::ID CC,
9812 EVT VT) const {
9813 // Use f32 to pass f16.
9814 if (VT == MVT::f16 && Subtarget.hasBasicF())
9815 return MVT::f32;
9816
9818}
9819
9820unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9821 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9822 // Use f32 to pass f16.
9823 if (VT == MVT::f16 && Subtarget.hasBasicF())
9824 return 1;
9825
9827}
9828
9830 SDValue Op, const APInt &OriginalDemandedBits,
9831 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9832 unsigned Depth) const {
9833 EVT VT = Op.getValueType();
9834 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9835 unsigned Opc = Op.getOpcode();
9836 switch (Opc) {
9837 default:
9838 break;
9839 case LoongArchISD::VMSKLTZ:
9840 case LoongArchISD::XVMSKLTZ: {
9841 SDValue Src = Op.getOperand(0);
9842 MVT SrcVT = Src.getSimpleValueType();
9843 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9844 unsigned NumElts = SrcVT.getVectorNumElements();
9845
9846 // If we don't need the sign bits at all just return zero.
9847 if (OriginalDemandedBits.countr_zero() >= NumElts)
9848 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9849
9850 // Only demand the vector elements of the sign bits we need.
9851 APInt KnownUndef, KnownZero;
9852 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9853 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9854 TLO, Depth + 1))
9855 return true;
9856
9857 Known.Zero = KnownZero.zext(BitWidth);
9858 Known.Zero.setHighBits(BitWidth - NumElts);
9859
9860 // [X]VMSKLTZ only uses the MSB from each vector element.
9861 KnownBits KnownSrc;
9862 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9863 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9864 Depth + 1))
9865 return true;
9866
9867 if (KnownSrc.One[SrcBits - 1])
9868 Known.One.setLowBits(NumElts);
9869 else if (KnownSrc.Zero[SrcBits - 1])
9870 Known.Zero.setLowBits(NumElts);
9871
9872 // Attempt to avoid multi-use ops if we don't need anything from it.
9874 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9875 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9876 return false;
9877 }
9878 }
9879
9881 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9882}
9883
9885 unsigned Opc = VecOp.getOpcode();
9886
9887 // Assume target opcodes can't be scalarized.
9888 // TODO - do we have any exceptions?
9889 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9890 return false;
9891
9892 // If the vector op is not supported, try to convert to scalar.
9893 EVT VecVT = VecOp.getValueType();
9895 return true;
9896
9897 // If the vector op is supported, but the scalar op is not, the transform may
9898 // not be worthwhile.
9899 EVT ScalarVT = VecVT.getScalarType();
9900 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9901}
9902
9904 unsigned Index) const {
9906 return false;
9907
9908 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9909 return Index == 0;
9910}
9911
9913 unsigned Index) const {
9914 EVT EltVT = VT.getScalarType();
9915
9916 // Extract a scalar FP value from index 0 of a vector is free.
9917 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9918}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
const MCPhysReg PreserveNoneArgGPRs[]
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static Register allocateArgGPR(CCState &State)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1495
bool isZero() const
Definition APFloat.h:1508
APInt bitcastToAPInt() const
Definition APFloat.h:1404
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1406
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1403
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
Argument * getArg(unsigned i) const
Definition Function.h:886
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
void getTgtMemIntrinsic(SmallVectorImpl< IntrinsicInfo > &Infos, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...