LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
407
413
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
421 Legal);
423 VT, Legal);
430 Expand);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
459 VT, Expand);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
473 }
474
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
485 }
486
487 // Compute derived properties from the register classes.
488 computeRegisterProperties(Subtarget.getRegisterInfo());
489
491
494
495 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
496
498
499 // Function alignments.
501 // Set preferred alignments.
502 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
503 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
504 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
505
506 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
507 if (Subtarget.hasLAMCAS())
509
510 if (Subtarget.hasSCQ()) {
513 }
514
515 // Disable strict node mutation.
516 IsStrictFPEnabled = true;
517}
518
520 const GlobalAddressSDNode *GA) const {
521 // In order to maximise the opportunity for common subexpression elimination,
522 // keep a separate ADD node for the global address offset instead of folding
523 // it in the global address node. Later peephole optimisations may choose to
524 // fold it back in when profitable.
525 return false;
526}
527
529 SelectionDAG &DAG) const {
530 switch (Op.getOpcode()) {
532 return lowerATOMIC_FENCE(Op, DAG);
534 return lowerEH_DWARF_CFA(Op, DAG);
536 return lowerGlobalAddress(Op, DAG);
538 return lowerGlobalTLSAddress(Op, DAG);
540 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
542 return lowerINTRINSIC_W_CHAIN(Op, DAG);
544 return lowerINTRINSIC_VOID(Op, DAG);
546 return lowerBlockAddress(Op, DAG);
547 case ISD::JumpTable:
548 return lowerJumpTable(Op, DAG);
549 case ISD::SHL_PARTS:
550 return lowerShiftLeftParts(Op, DAG);
551 case ISD::SRA_PARTS:
552 return lowerShiftRightParts(Op, DAG, true);
553 case ISD::SRL_PARTS:
554 return lowerShiftRightParts(Op, DAG, false);
556 return lowerConstantPool(Op, DAG);
557 case ISD::FP_TO_SINT:
558 return lowerFP_TO_SINT(Op, DAG);
559 case ISD::BITCAST:
560 return lowerBITCAST(Op, DAG);
561 case ISD::UINT_TO_FP:
562 return lowerUINT_TO_FP(Op, DAG);
563 case ISD::SINT_TO_FP:
564 return lowerSINT_TO_FP(Op, DAG);
565 case ISD::VASTART:
566 return lowerVASTART(Op, DAG);
567 case ISD::FRAMEADDR:
568 return lowerFRAMEADDR(Op, DAG);
569 case ISD::RETURNADDR:
570 return lowerRETURNADDR(Op, DAG);
572 return lowerWRITE_REGISTER(Op, DAG);
574 return lowerINSERT_VECTOR_ELT(Op, DAG);
576 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
578 return lowerBUILD_VECTOR(Op, DAG);
580 return lowerCONCAT_VECTORS(Op, DAG);
582 return lowerVECTOR_SHUFFLE(Op, DAG);
583 case ISD::BITREVERSE:
584 return lowerBITREVERSE(Op, DAG);
586 return lowerSCALAR_TO_VECTOR(Op, DAG);
587 case ISD::PREFETCH:
588 return lowerPREFETCH(Op, DAG);
589 case ISD::SELECT:
590 return lowerSELECT(Op, DAG);
591 case ISD::BRCOND:
592 return lowerBRCOND(Op, DAG);
593 case ISD::FP_TO_FP16:
594 return lowerFP_TO_FP16(Op, DAG);
595 case ISD::FP16_TO_FP:
596 return lowerFP16_TO_FP(Op, DAG);
597 case ISD::FP_TO_BF16:
598 return lowerFP_TO_BF16(Op, DAG);
599 case ISD::BF16_TO_FP:
600 return lowerBF16_TO_FP(Op, DAG);
602 return lowerVECREDUCE_ADD(Op, DAG);
603 case ISD::ROTL:
604 case ISD::ROTR:
605 return lowerRotate(Op, DAG);
613 return lowerVECREDUCE(Op, DAG);
614 case ISD::ConstantFP:
615 return lowerConstantFP(Op, DAG);
616 }
617 return SDValue();
618}
619
620// Helper to attempt to return a cheaper, bit-inverted version of \p V.
622 // TODO: don't always ignore oneuse constraints.
623 V = peekThroughBitcasts(V);
624 EVT VT = V.getValueType();
625
626 // Match not(xor X, -1) -> X.
627 if (V.getOpcode() == ISD::XOR &&
628 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
629 isAllOnesConstant(V.getOperand(1))))
630 return V.getOperand(0);
631
632 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
633 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
634 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
635 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
636 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
637 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
638 V.getOperand(1));
639 }
640 }
641
642 // Match not(SplatVector(not(X)) -> SplatVector(X).
643 if (V.getOpcode() == ISD::BUILD_VECTOR) {
644 if (SDValue SplatValue =
645 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
646 if (!V->isOnlyUserOf(SplatValue.getNode()))
647 return SDValue();
648
649 if (SDValue Not = isNOT(SplatValue, DAG)) {
650 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
651 return DAG.getSplat(VT, SDLoc(Not), Not);
652 }
653 }
654 }
655
656 // Match not(or(not(X),not(Y))) -> and(X, Y).
657 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
658 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
659 // TODO: Handle cases with single NOT operand -> VANDN
660 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
661 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
662 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
663 DAG.getBitcast(VT, Op1));
664 }
665
666 // TODO: Add more matching patterns. Such as,
667 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
668 // not(slt(C, X)) -> slt(X - 1, C)
669
670 return SDValue();
671}
672
673SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
674 SelectionDAG &DAG) const {
675 EVT VT = Op.getValueType();
676 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
677 const APFloat &FPVal = CFP->getValueAPF();
678 SDLoc DL(CFP);
679
680 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
681 (VT == MVT::f64 && Subtarget.hasBasicD()));
682
683 // If value is 0.0 or -0.0, just ignore it.
684 if (FPVal.isZero())
685 return SDValue();
686
687 // If lsx enabled, use cheaper 'vldi' instruction if possible.
688 if (isFPImmVLDILegal(FPVal, VT))
689 return SDValue();
690
691 // Construct as integer, and move to float register.
692 APInt INTVal = FPVal.bitcastToAPInt();
693
694 // If more than MaterializeFPImmInsNum instructions will be used to
695 // generate the INTVal and move it to float register, fallback to
696 // use floating point load from the constant pool.
698 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
699 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
700 return SDValue();
701
702 switch (VT.getSimpleVT().SimpleTy) {
703 default:
704 llvm_unreachable("Unexpected floating point type!");
705 break;
706 case MVT::f32: {
707 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
708 if (Subtarget.is64Bit())
709 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
710 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
711 : LoongArchISD::MOVGR2FR_W,
712 DL, VT, NewVal);
713 }
714 case MVT::f64: {
715 if (Subtarget.is64Bit()) {
716 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
717 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
718 }
719 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
720 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
721 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
722 }
723 }
724
725 return SDValue();
726}
727
728// Lower vecreduce_add using vhaddw instructions.
729// For Example:
730// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
731// can be lowered to:
732// VHADDW_D_W vr0, vr0, vr0
733// VHADDW_Q_D vr0, vr0, vr0
734// VPICKVE2GR_D a0, vr0, 0
735// ADDI_W a0, a0, 0
736SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
737 SelectionDAG &DAG) const {
738
739 SDLoc DL(Op);
740 MVT OpVT = Op.getSimpleValueType();
741 SDValue Val = Op.getOperand(0);
742
743 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
744 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
745 unsigned ResBits = OpVT.getScalarSizeInBits();
746
747 unsigned LegalVecSize = 128;
748 bool isLASX256Vector =
749 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
750
751 // Ensure operand type legal or enable it legal.
752 while (!isTypeLegal(Val.getSimpleValueType())) {
753 Val = DAG.WidenVector(Val, DL);
754 }
755
756 // NumEles is designed for iterations count, v4i32 for LSX
757 // and v8i32 for LASX should have the same count.
758 if (isLASX256Vector) {
759 NumEles /= 2;
760 LegalVecSize = 256;
761 }
762
763 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
764 MVT IntTy = MVT::getIntegerVT(EleBits);
765 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
766 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
767 }
768
769 if (isLASX256Vector) {
770 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
771 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
772 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
773 }
774
775 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
776 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
777 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
778}
779
780// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
781// For Example:
782// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
783// can be lowered to:
784// VBSRL_V vr1, vr0, 8
785// VMAX_W vr0, vr1, vr0
786// VBSRL_V vr1, vr0, 4
787// VMAX_W vr0, vr1, vr0
788// VPICKVE2GR_W a0, vr0, 0
789// For 256 bit vector, it is illegal and will be spilt into
790// two 128 bit vector by default then processed by this.
791SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
792 SelectionDAG &DAG) const {
793 SDLoc DL(Op);
794
795 MVT OpVT = Op.getSimpleValueType();
796 SDValue Val = Op.getOperand(0);
797
798 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
799 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
800
801 // Ensure operand type legal or enable it legal.
802 while (!isTypeLegal(Val.getSimpleValueType())) {
803 Val = DAG.WidenVector(Val, DL);
804 }
805
806 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
807 MVT VecTy = Val.getSimpleValueType();
808 MVT GRLenVT = Subtarget.getGRLenVT();
809
810 for (int i = NumEles; i > 1; i /= 2) {
811 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
812 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
813 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
814 }
815
816 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
817 DAG.getConstant(0, DL, GRLenVT));
818}
819
820SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
821 SelectionDAG &DAG) const {
822 unsigned IsData = Op.getConstantOperandVal(4);
823
824 // We don't support non-data prefetch.
825 // Just preserve the chain.
826 if (!IsData)
827 return Op.getOperand(0);
828
829 return Op;
830}
831
832SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
833 SelectionDAG &DAG) const {
834 MVT VT = Op.getSimpleValueType();
835 assert(VT.isVector() && "Unexpected type");
836
837 SDLoc DL(Op);
838 SDValue R = Op.getOperand(0);
839 SDValue Amt = Op.getOperand(1);
840 unsigned Opcode = Op.getOpcode();
841 unsigned EltSizeInBits = VT.getScalarSizeInBits();
842
843 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
844 if (V.getOpcode() != ISD::BUILD_VECTOR)
845 return false;
846 if (SDValue SplatValue =
847 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
848 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
849 CstSplatValue = C->getAPIntValue();
850 return true;
851 }
852 }
853 return false;
854 };
855
856 // Check for constant splat rotation amount.
857 APInt CstSplatValue;
858 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
859 bool isROTL = Opcode == ISD::ROTL;
860
861 // Check for splat rotate by zero.
862 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
863 return R;
864
865 // LoongArch targets always prefer ISD::ROTR.
866 if (isROTL) {
867 SDValue Zero = DAG.getConstant(0, DL, VT);
868 return DAG.getNode(ISD::ROTR, DL, VT, R,
869 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
870 }
871
872 // Rotate by a immediate.
873 if (IsCstSplat) {
874 // ISD::ROTR: Attemp to rotate by a positive immediate.
875 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
876 if (SDValue Urem =
877 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
878 return DAG.getNode(Opcode, DL, VT, R, Urem);
879 }
880
881 return Op;
882}
883
884// Return true if Val is equal to (setcc LHS, RHS, CC).
885// Return false if Val is the inverse of (setcc LHS, RHS, CC).
886// Otherwise, return std::nullopt.
887static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
888 ISD::CondCode CC, SDValue Val) {
889 assert(Val->getOpcode() == ISD::SETCC);
890 SDValue LHS2 = Val.getOperand(0);
891 SDValue RHS2 = Val.getOperand(1);
892 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
893
894 if (LHS == LHS2 && RHS == RHS2) {
895 if (CC == CC2)
896 return true;
897 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
898 return false;
899 } else if (LHS == RHS2 && RHS == LHS2) {
901 if (CC == CC2)
902 return true;
903 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
904 return false;
905 }
906
907 return std::nullopt;
908}
909
911 const LoongArchSubtarget &Subtarget) {
912 SDValue CondV = N->getOperand(0);
913 SDValue TrueV = N->getOperand(1);
914 SDValue FalseV = N->getOperand(2);
915 MVT VT = N->getSimpleValueType(0);
916 SDLoc DL(N);
917
918 // (select c, -1, y) -> -c | y
919 if (isAllOnesConstant(TrueV)) {
920 SDValue Neg = DAG.getNegative(CondV, DL, VT);
921 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
922 }
923 // (select c, y, -1) -> (c-1) | y
924 if (isAllOnesConstant(FalseV)) {
925 SDValue Neg =
926 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
927 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
928 }
929
930 // (select c, 0, y) -> (c-1) & y
931 if (isNullConstant(TrueV)) {
932 SDValue Neg =
933 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
934 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
935 }
936 // (select c, y, 0) -> -c & y
937 if (isNullConstant(FalseV)) {
938 SDValue Neg = DAG.getNegative(CondV, DL, VT);
939 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
940 }
941
942 // select c, ~x, x --> xor -c, x
943 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
944 const APInt &TrueVal = TrueV->getAsAPIntVal();
945 const APInt &FalseVal = FalseV->getAsAPIntVal();
946 if (~TrueVal == FalseVal) {
947 SDValue Neg = DAG.getNegative(CondV, DL, VT);
948 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
949 }
950 }
951
952 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
953 // when both truev and falsev are also setcc.
954 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
955 FalseV.getOpcode() == ISD::SETCC) {
956 SDValue LHS = CondV.getOperand(0);
957 SDValue RHS = CondV.getOperand(1);
958 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
959
960 // (select x, x, y) -> x | y
961 // (select !x, x, y) -> x & y
962 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
963 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
964 DAG.getFreeze(FalseV));
965 }
966 // (select x, y, x) -> x & y
967 // (select !x, y, x) -> x | y
968 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
969 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
970 DAG.getFreeze(TrueV), FalseV);
971 }
972 }
973
974 return SDValue();
975}
976
977// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
978// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
979// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
980// being `0` or `-1`. In such cases we can replace `select` with `and`.
981// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
982// than `c0`?
983static SDValue
985 const LoongArchSubtarget &Subtarget) {
986 unsigned SelOpNo = 0;
987 SDValue Sel = BO->getOperand(0);
988 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
989 SelOpNo = 1;
990 Sel = BO->getOperand(1);
991 }
992
993 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
994 return SDValue();
995
996 unsigned ConstSelOpNo = 1;
997 unsigned OtherSelOpNo = 2;
998 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
999 ConstSelOpNo = 2;
1000 OtherSelOpNo = 1;
1001 }
1002 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1003 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1004 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1005 return SDValue();
1006
1007 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1008 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1009 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1010 return SDValue();
1011
1012 SDLoc DL(Sel);
1013 EVT VT = BO->getValueType(0);
1014
1015 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1016 if (SelOpNo == 1)
1017 std::swap(NewConstOps[0], NewConstOps[1]);
1018
1019 SDValue NewConstOp =
1020 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1021 if (!NewConstOp)
1022 return SDValue();
1023
1024 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1025 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1026 return SDValue();
1027
1028 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1029 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1030 if (SelOpNo == 1)
1031 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1032 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1033
1034 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1035 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1036 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1037}
1038
1039// Changes the condition code and swaps operands if necessary, so the SetCC
1040// operation matches one of the comparisons supported directly by branches
1041// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1042// compare with 1/-1.
1044 ISD::CondCode &CC, SelectionDAG &DAG) {
1045 // If this is a single bit test that can't be handled by ANDI, shift the
1046 // bit to be tested to the MSB and perform a signed compare with 0.
1047 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1048 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1049 isa<ConstantSDNode>(LHS.getOperand(1))) {
1050 uint64_t Mask = LHS.getConstantOperandVal(1);
1051 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1052 unsigned ShAmt = 0;
1053 if (isPowerOf2_64(Mask)) {
1054 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1055 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1056 } else {
1057 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1058 }
1059
1060 LHS = LHS.getOperand(0);
1061 if (ShAmt != 0)
1062 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1063 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1064 return;
1065 }
1066 }
1067
1068 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1069 int64_t C = RHSC->getSExtValue();
1070 switch (CC) {
1071 default:
1072 break;
1073 case ISD::SETGT:
1074 // Convert X > -1 to X >= 0.
1075 if (C == -1) {
1076 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1077 CC = ISD::SETGE;
1078 return;
1079 }
1080 break;
1081 case ISD::SETLT:
1082 // Convert X < 1 to 0 >= X.
1083 if (C == 1) {
1084 RHS = LHS;
1085 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1086 CC = ISD::SETGE;
1087 return;
1088 }
1089 break;
1090 }
1091 }
1092
1093 switch (CC) {
1094 default:
1095 break;
1096 case ISD::SETGT:
1097 case ISD::SETLE:
1098 case ISD::SETUGT:
1099 case ISD::SETULE:
1101 std::swap(LHS, RHS);
1102 break;
1103 }
1104}
1105
1106SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1107 SelectionDAG &DAG) const {
1108 SDValue CondV = Op.getOperand(0);
1109 SDValue TrueV = Op.getOperand(1);
1110 SDValue FalseV = Op.getOperand(2);
1111 SDLoc DL(Op);
1112 MVT VT = Op.getSimpleValueType();
1113 MVT GRLenVT = Subtarget.getGRLenVT();
1114
1115 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1116 return V;
1117
1118 if (Op.hasOneUse()) {
1119 unsigned UseOpc = Op->user_begin()->getOpcode();
1120 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1121 SDNode *BinOp = *Op->user_begin();
1122 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1123 DAG, Subtarget)) {
1124 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1125 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1126 // may return a constant node and cause crash in lowerSELECT.
1127 if (NewSel.getOpcode() == ISD::SELECT)
1128 return lowerSELECT(NewSel, DAG);
1129 return NewSel;
1130 }
1131 }
1132 }
1133
1134 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1135 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1136 // (select condv, truev, falsev)
1137 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1138 if (CondV.getOpcode() != ISD::SETCC ||
1139 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1140 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1141 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1142
1143 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1144
1145 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1146 }
1147
1148 // If the CondV is the output of a SETCC node which operates on GRLenVT
1149 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1150 // to take advantage of the integer compare+branch instructions. i.e.: (select
1151 // (setcc lhs, rhs, cc), truev, falsev)
1152 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1153 SDValue LHS = CondV.getOperand(0);
1154 SDValue RHS = CondV.getOperand(1);
1155 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1156
1157 // Special case for a select of 2 constants that have a difference of 1.
1158 // Normally this is done by DAGCombine, but if the select is introduced by
1159 // type legalization or op legalization, we miss it. Restricting to SETLT
1160 // case for now because that is what signed saturating add/sub need.
1161 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1162 // but we would probably want to swap the true/false values if the condition
1163 // is SETGE/SETLE to avoid an XORI.
1164 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1165 CCVal == ISD::SETLT) {
1166 const APInt &TrueVal = TrueV->getAsAPIntVal();
1167 const APInt &FalseVal = FalseV->getAsAPIntVal();
1168 if (TrueVal - 1 == FalseVal)
1169 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1170 if (TrueVal + 1 == FalseVal)
1171 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1172 }
1173
1174 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1175 // 1 < x ? x : 1 -> 0 < x ? x : 1
1176 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1177 RHS == TrueV && LHS == FalseV) {
1178 LHS = DAG.getConstant(0, DL, VT);
1179 // 0 <u x is the same as x != 0.
1180 if (CCVal == ISD::SETULT) {
1181 std::swap(LHS, RHS);
1182 CCVal = ISD::SETNE;
1183 }
1184 }
1185
1186 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1187 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1188 RHS == FalseV) {
1189 RHS = DAG.getConstant(0, DL, VT);
1190 }
1191
1192 SDValue TargetCC = DAG.getCondCode(CCVal);
1193
1194 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1195 // (select (setcc lhs, rhs, CC), constant, falsev)
1196 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1197 std::swap(TrueV, FalseV);
1198 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1199 }
1200
1201 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1202 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1203}
1204
1205SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1206 SelectionDAG &DAG) const {
1207 SDValue CondV = Op.getOperand(1);
1208 SDLoc DL(Op);
1209 MVT GRLenVT = Subtarget.getGRLenVT();
1210
1211 if (CondV.getOpcode() == ISD::SETCC) {
1212 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1213 SDValue LHS = CondV.getOperand(0);
1214 SDValue RHS = CondV.getOperand(1);
1215 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1216
1217 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1218
1219 SDValue TargetCC = DAG.getCondCode(CCVal);
1220 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1221 Op.getOperand(0), LHS, RHS, TargetCC,
1222 Op.getOperand(2));
1223 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1224 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1225 Op.getOperand(0), CondV, Op.getOperand(2));
1226 }
1227 }
1228
1229 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1230 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1231 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1232}
1233
1234SDValue
1235LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1236 SelectionDAG &DAG) const {
1237 SDLoc DL(Op);
1238 MVT OpVT = Op.getSimpleValueType();
1239
1240 SDValue Vector = DAG.getUNDEF(OpVT);
1241 SDValue Val = Op.getOperand(0);
1242 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1243
1244 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1245}
1246
1247SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1248 SelectionDAG &DAG) const {
1249 EVT ResTy = Op->getValueType(0);
1250 SDValue Src = Op->getOperand(0);
1251 SDLoc DL(Op);
1252
1253 // LoongArchISD::BITREV_8B is not supported on LA32.
1254 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1255 return SDValue();
1256
1257 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1258 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1259 unsigned int NewEltNum = NewVT.getVectorNumElements();
1260
1261 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1262
1264 for (unsigned int i = 0; i < NewEltNum; i++) {
1265 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1266 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1267 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1268 ? (unsigned)LoongArchISD::BITREV_8B
1269 : (unsigned)ISD::BITREVERSE;
1270 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1271 }
1272 SDValue Res =
1273 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1274
1275 switch (ResTy.getSimpleVT().SimpleTy) {
1276 default:
1277 return SDValue();
1278 case MVT::v16i8:
1279 case MVT::v32i8:
1280 return Res;
1281 case MVT::v8i16:
1282 case MVT::v16i16:
1283 case MVT::v4i32:
1284 case MVT::v8i32: {
1286 for (unsigned int i = 0; i < NewEltNum; i++)
1287 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1288 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1289 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1290 }
1291 }
1292}
1293
1294// Widen element type to get a new mask value (if possible).
1295// For example:
1296// shufflevector <4 x i32> %a, <4 x i32> %b,
1297// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1298// is equivalent to:
1299// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1300// can be lowered to:
1301// VPACKOD_D vr0, vr0, vr1
1303 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1304 unsigned EltBits = VT.getScalarSizeInBits();
1305
1306 if (EltBits > 32 || EltBits == 1)
1307 return SDValue();
1308
1309 SmallVector<int, 8> NewMask;
1310 if (widenShuffleMaskElts(Mask, NewMask)) {
1311 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1312 : MVT::getIntegerVT(EltBits * 2);
1313 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1314 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1315 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1316 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1317 return DAG.getBitcast(
1318 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1319 }
1320 }
1321
1322 return SDValue();
1323}
1324
1325/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1326/// instruction.
1327// The funciton matches elements from one of the input vector shuffled to the
1328// left or right with zeroable elements 'shifted in'. It handles both the
1329// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1330// lane.
1331// Mostly copied from X86.
1332static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1333 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1334 int MaskOffset, const APInt &Zeroable) {
1335 int Size = Mask.size();
1336 unsigned SizeInBits = Size * ScalarSizeInBits;
1337
1338 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1339 for (int i = 0; i < Size; i += Scale)
1340 for (int j = 0; j < Shift; ++j)
1341 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1342 return false;
1343
1344 return true;
1345 };
1346
1347 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1348 int Step = 1) {
1349 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1350 if (!(Mask[i] == -1 || Mask[i] == Low))
1351 return false;
1352 return true;
1353 };
1354
1355 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1356 for (int i = 0; i != Size; i += Scale) {
1357 unsigned Pos = Left ? i + Shift : i;
1358 unsigned Low = Left ? i : i + Shift;
1359 unsigned Len = Scale - Shift;
1360 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1361 return -1;
1362 }
1363
1364 int ShiftEltBits = ScalarSizeInBits * Scale;
1365 bool ByteShift = ShiftEltBits > 64;
1366 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1367 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1368 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1369
1370 // Normalize the scale for byte shifts to still produce an i64 element
1371 // type.
1372 Scale = ByteShift ? Scale / 2 : Scale;
1373
1374 // We need to round trip through the appropriate type for the shift.
1375 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1376 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1377 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1378 return (int)ShiftAmt;
1379 };
1380
1381 unsigned MaxWidth = 128;
1382 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1383 for (int Shift = 1; Shift != Scale; ++Shift)
1384 for (bool Left : {true, false})
1385 if (CheckZeros(Shift, Scale, Left)) {
1386 int ShiftAmt = MatchShift(Shift, Scale, Left);
1387 if (0 < ShiftAmt)
1388 return ShiftAmt;
1389 }
1390
1391 // no match
1392 return -1;
1393}
1394
1395/// Lower VECTOR_SHUFFLE as shift (if possible).
1396///
1397/// For example:
1398/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1399/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1400/// is lowered to:
1401/// (VBSLL_V $v0, $v0, 4)
1402///
1403/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1404/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1405/// is lowered to:
1406/// (VSLLI_D $v0, $v0, 32)
1408 MVT VT, SDValue V1, SDValue V2,
1409 SelectionDAG &DAG,
1410 const LoongArchSubtarget &Subtarget,
1411 const APInt &Zeroable) {
1412 int Size = Mask.size();
1413 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1414
1415 MVT ShiftVT;
1416 SDValue V = V1;
1417 unsigned Opcode;
1418
1419 // Try to match shuffle against V1 shift.
1420 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1421 Mask, 0, Zeroable);
1422
1423 // If V1 failed, try to match shuffle against V2 shift.
1424 if (ShiftAmt < 0) {
1425 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1426 Mask, Size, Zeroable);
1427 V = V2;
1428 }
1429
1430 if (ShiftAmt < 0)
1431 return SDValue();
1432
1433 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1434 "Illegal integer vector type");
1435 V = DAG.getBitcast(ShiftVT, V);
1436 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1437 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1438 return DAG.getBitcast(VT, V);
1439}
1440
1441/// Determine whether a range fits a regular pattern of values.
1442/// This function accounts for the possibility of jumping over the End iterator.
1443template <typename ValType>
1444static bool
1446 unsigned CheckStride,
1448 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1449 auto &I = Begin;
1450
1451 while (I != End) {
1452 if (*I != -1 && *I != ExpectedIndex)
1453 return false;
1454 ExpectedIndex += ExpectedIndexStride;
1455
1456 // Incrementing past End is undefined behaviour so we must increment one
1457 // step at a time and check for End at each step.
1458 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1459 ; // Empty loop body.
1460 }
1461 return true;
1462}
1463
1464/// Compute whether each element of a shuffle is zeroable.
1465///
1466/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1468 SDValue V2, APInt &KnownUndef,
1469 APInt &KnownZero) {
1470 int Size = Mask.size();
1471 KnownUndef = KnownZero = APInt::getZero(Size);
1472
1473 V1 = peekThroughBitcasts(V1);
1474 V2 = peekThroughBitcasts(V2);
1475
1476 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1477 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1478
1479 int VectorSizeInBits = V1.getValueSizeInBits();
1480 int ScalarSizeInBits = VectorSizeInBits / Size;
1481 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1482 (void)ScalarSizeInBits;
1483
1484 for (int i = 0; i < Size; ++i) {
1485 int M = Mask[i];
1486 if (M < 0) {
1487 KnownUndef.setBit(i);
1488 continue;
1489 }
1490 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1491 KnownZero.setBit(i);
1492 continue;
1493 }
1494 }
1495}
1496
1497/// Test whether a shuffle mask is equivalent within each sub-lane.
1498///
1499/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1500/// non-trivial to compute in the face of undef lanes. The representation is
1501/// suitable for use with existing 128-bit shuffles as entries from the second
1502/// vector have been remapped to [LaneSize, 2*LaneSize).
1503static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1504 ArrayRef<int> Mask,
1505 SmallVectorImpl<int> &RepeatedMask) {
1506 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1507 RepeatedMask.assign(LaneSize, -1);
1508 int Size = Mask.size();
1509 for (int i = 0; i < Size; ++i) {
1510 assert(Mask[i] == -1 || Mask[i] >= 0);
1511 if (Mask[i] < 0)
1512 continue;
1513 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1514 // This entry crosses lanes, so there is no way to model this shuffle.
1515 return false;
1516
1517 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1518 // Adjust second vector indices to start at LaneSize instead of Size.
1519 int LocalM =
1520 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1521 if (RepeatedMask[i % LaneSize] < 0)
1522 // This is the first non-undef entry in this slot of a 128-bit lane.
1523 RepeatedMask[i % LaneSize] = LocalM;
1524 else if (RepeatedMask[i % LaneSize] != LocalM)
1525 // Found a mismatch with the repeated mask.
1526 return false;
1527 }
1528 return true;
1529}
1530
1531/// Attempts to match vector shuffle as byte rotation.
1533 ArrayRef<int> Mask) {
1534
1535 SDValue Lo, Hi;
1536 SmallVector<int, 16> RepeatedMask;
1537
1538 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1539 return -1;
1540
1541 int NumElts = RepeatedMask.size();
1542 int Rotation = 0;
1543 int Scale = 16 / NumElts;
1544
1545 for (int i = 0; i < NumElts; ++i) {
1546 int M = RepeatedMask[i];
1547 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1548 "Unexpected mask index.");
1549 if (M < 0)
1550 continue;
1551
1552 // Determine where a rotated vector would have started.
1553 int StartIdx = i - (M % NumElts);
1554 if (StartIdx == 0)
1555 return -1;
1556
1557 // If we found the tail of a vector the rotation must be the missing
1558 // front. If we found the head of a vector, it must be how much of the
1559 // head.
1560 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1561
1562 if (Rotation == 0)
1563 Rotation = CandidateRotation;
1564 else if (Rotation != CandidateRotation)
1565 return -1;
1566
1567 // Compute which value this mask is pointing at.
1568 SDValue MaskV = M < NumElts ? V1 : V2;
1569
1570 // Compute which of the two target values this index should be assigned
1571 // to. This reflects whether the high elements are remaining or the low
1572 // elements are remaining.
1573 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1574
1575 // Either set up this value if we've not encountered it before, or check
1576 // that it remains consistent.
1577 if (!TargetV)
1578 TargetV = MaskV;
1579 else if (TargetV != MaskV)
1580 return -1;
1581 }
1582
1583 // Check that we successfully analyzed the mask, and normalize the results.
1584 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1585 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1586 if (!Lo)
1587 Lo = Hi;
1588 else if (!Hi)
1589 Hi = Lo;
1590
1591 V1 = Lo;
1592 V2 = Hi;
1593
1594 return Rotation * Scale;
1595}
1596
1597/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1598///
1599/// For example:
1600/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1601/// <2 x i32> <i32 3, i32 0>
1602/// is lowered to:
1603/// (VBSRL_V $v1, $v1, 8)
1604/// (VBSLL_V $v0, $v0, 8)
1605/// (VOR_V $v0, $V0, $v1)
1606static SDValue
1608 SDValue V1, SDValue V2, SelectionDAG &DAG,
1609 const LoongArchSubtarget &Subtarget) {
1610
1611 SDValue Lo = V1, Hi = V2;
1612 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1613 if (ByteRotation <= 0)
1614 return SDValue();
1615
1616 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1617 Lo = DAG.getBitcast(ByteVT, Lo);
1618 Hi = DAG.getBitcast(ByteVT, Hi);
1619
1620 int LoByteShift = 16 - ByteRotation;
1621 int HiByteShift = ByteRotation;
1622 MVT GRLenVT = Subtarget.getGRLenVT();
1623
1624 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1625 DAG.getConstant(LoByteShift, DL, GRLenVT));
1626 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1627 DAG.getConstant(HiByteShift, DL, GRLenVT));
1628 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1629}
1630
1631/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1632///
1633/// For example:
1634/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1635/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1636/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1637/// is lowered to:
1638/// (VREPLI $v1, 0)
1639/// (VILVL $v0, $v1, $v0)
1641 ArrayRef<int> Mask, MVT VT,
1642 SDValue V1, SDValue V2,
1643 SelectionDAG &DAG,
1644 const APInt &Zeroable) {
1645 int Bits = VT.getSizeInBits();
1646 int EltBits = VT.getScalarSizeInBits();
1647 int NumElements = VT.getVectorNumElements();
1648
1649 if (Zeroable.isAllOnes())
1650 return DAG.getConstant(0, DL, VT);
1651
1652 // Define a helper function to check a particular ext-scale and lower to it if
1653 // valid.
1654 auto Lower = [&](int Scale) -> SDValue {
1655 SDValue InputV;
1656 bool AnyExt = true;
1657 int Offset = 0;
1658 for (int i = 0; i < NumElements; i++) {
1659 int M = Mask[i];
1660 if (M < 0)
1661 continue;
1662 if (i % Scale != 0) {
1663 // Each of the extended elements need to be zeroable.
1664 if (!Zeroable[i])
1665 return SDValue();
1666
1667 AnyExt = false;
1668 continue;
1669 }
1670
1671 // Each of the base elements needs to be consecutive indices into the
1672 // same input vector.
1673 SDValue V = M < NumElements ? V1 : V2;
1674 M = M % NumElements;
1675 if (!InputV) {
1676 InputV = V;
1677 Offset = M - (i / Scale);
1678
1679 // These offset can't be handled
1680 if (Offset % (NumElements / Scale))
1681 return SDValue();
1682 } else if (InputV != V)
1683 return SDValue();
1684
1685 if (M != (Offset + (i / Scale)))
1686 return SDValue(); // Non-consecutive strided elements.
1687 }
1688
1689 // If we fail to find an input, we have a zero-shuffle which should always
1690 // have already been handled.
1691 if (!InputV)
1692 return SDValue();
1693
1694 do {
1695 unsigned VilVLoHi = LoongArchISD::VILVL;
1696 if (Offset >= (NumElements / 2)) {
1697 VilVLoHi = LoongArchISD::VILVH;
1698 Offset -= (NumElements / 2);
1699 }
1700
1701 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1702 SDValue Ext =
1703 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1704 InputV = DAG.getBitcast(InputVT, InputV);
1705 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1706 Scale /= 2;
1707 EltBits *= 2;
1708 NumElements /= 2;
1709 } while (Scale > 1);
1710 return DAG.getBitcast(VT, InputV);
1711 };
1712
1713 // Each iteration, try extending the elements half as much, but into twice as
1714 // many elements.
1715 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1716 NumExtElements *= 2) {
1717 if (SDValue V = Lower(NumElements / NumExtElements))
1718 return V;
1719 }
1720 return SDValue();
1721}
1722
1723/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1724///
1725/// VREPLVEI performs vector broadcast based on an element specified by an
1726/// integer immediate, with its mask being similar to:
1727/// <x, x, x, ...>
1728/// where x is any valid index.
1729///
1730/// When undef's appear in the mask they are treated as if they were whatever
1731/// value is necessary in order to fit the above form.
1732static SDValue
1734 SDValue V1, SelectionDAG &DAG,
1735 const LoongArchSubtarget &Subtarget) {
1736 int SplatIndex = -1;
1737 for (const auto &M : Mask) {
1738 if (M != -1) {
1739 SplatIndex = M;
1740 break;
1741 }
1742 }
1743
1744 if (SplatIndex == -1)
1745 return DAG.getUNDEF(VT);
1746
1747 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1748 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1749 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1750 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1751 }
1752
1753 return SDValue();
1754}
1755
1756/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1757///
1758/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1759/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1760///
1761/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1762/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1763/// When undef's appear they are treated as if they were whatever value is
1764/// necessary in order to fit the above forms.
1765///
1766/// For example:
1767/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1768/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1769/// i32 7, i32 6, i32 5, i32 4>
1770/// is lowered to:
1771/// (VSHUF4I_H $v0, $v1, 27)
1772/// where the 27 comes from:
1773/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1774static SDValue
1776 SDValue V1, SDValue V2, SelectionDAG &DAG,
1777 const LoongArchSubtarget &Subtarget) {
1778
1779 unsigned SubVecSize = 4;
1780 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1781 SubVecSize = 2;
1782
1783 int SubMask[4] = {-1, -1, -1, -1};
1784 for (unsigned i = 0; i < SubVecSize; ++i) {
1785 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1786 int M = Mask[j];
1787
1788 // Convert from vector index to 4-element subvector index
1789 // If an index refers to an element outside of the subvector then give up
1790 if (M != -1) {
1791 M -= 4 * (j / SubVecSize);
1792 if (M < 0 || M >= 4)
1793 return SDValue();
1794 }
1795
1796 // If the mask has an undef, replace it with the current index.
1797 // Note that it might still be undef if the current index is also undef
1798 if (SubMask[i] == -1)
1799 SubMask[i] = M;
1800 // Check that non-undef values are the same as in the mask. If they
1801 // aren't then give up
1802 else if (M != -1 && M != SubMask[i])
1803 return SDValue();
1804 }
1805 }
1806
1807 // Calculate the immediate. Replace any remaining undefs with zero
1808 int Imm = 0;
1809 for (int i = SubVecSize - 1; i >= 0; --i) {
1810 int M = SubMask[i];
1811
1812 if (M == -1)
1813 M = 0;
1814
1815 Imm <<= 2;
1816 Imm |= M & 0x3;
1817 }
1818
1819 MVT GRLenVT = Subtarget.getGRLenVT();
1820
1821 // Return vshuf4i.d
1822 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1823 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1824 DAG.getConstant(Imm, DL, GRLenVT));
1825
1826 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1827 DAG.getConstant(Imm, DL, GRLenVT));
1828}
1829
1830/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1831///
1832/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1833/// reverse whose mask likes:
1834/// <7, 6, 5, 4, 3, 2, 1, 0>
1835///
1836/// When undef's appear in the mask they are treated as if they were whatever
1837/// value is necessary in order to fit the above forms.
1838static SDValue
1840 SDValue V1, SelectionDAG &DAG,
1841 const LoongArchSubtarget &Subtarget) {
1842 // Only vectors with i8/i16 elements which cannot match other patterns
1843 // directly needs to do this.
1844 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1845 VT != MVT::v16i16)
1846 return SDValue();
1847
1848 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1849 return SDValue();
1850
1851 int WidenNumElts = VT.getVectorNumElements() / 4;
1852 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1853 for (int i = 0; i < WidenNumElts; ++i)
1854 WidenMask[i] = WidenNumElts - 1 - i;
1855
1856 MVT WidenVT = MVT::getVectorVT(
1857 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1858 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1859 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1860 DAG.getUNDEF(WidenVT), WidenMask);
1861
1862 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1863 DAG.getBitcast(VT, WidenRev),
1864 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1865}
1866
1867/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1868///
1869/// VPACKEV interleaves the even elements from each vector.
1870///
1871/// It is possible to lower into VPACKEV when the mask consists of two of the
1872/// following forms interleaved:
1873/// <0, 2, 4, ...>
1874/// <n, n+2, n+4, ...>
1875/// where n is the number of elements in the vector.
1876/// For example:
1877/// <0, 0, 2, 2, 4, 4, ...>
1878/// <0, n, 2, n+2, 4, n+4, ...>
1879///
1880/// When undef's appear in the mask they are treated as if they were whatever
1881/// value is necessary in order to fit the above forms.
1883 MVT VT, SDValue V1, SDValue V2,
1884 SelectionDAG &DAG) {
1885
1886 const auto &Begin = Mask.begin();
1887 const auto &End = Mask.end();
1888 SDValue OriV1 = V1, OriV2 = V2;
1889
1890 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1891 V1 = OriV1;
1892 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1893 V1 = OriV2;
1894 else
1895 return SDValue();
1896
1897 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1898 V2 = OriV1;
1899 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1900 V2 = OriV2;
1901 else
1902 return SDValue();
1903
1904 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1905}
1906
1907/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1908///
1909/// VPACKOD interleaves the odd elements from each vector.
1910///
1911/// It is possible to lower into VPACKOD when the mask consists of two of the
1912/// following forms interleaved:
1913/// <1, 3, 5, ...>
1914/// <n+1, n+3, n+5, ...>
1915/// where n is the number of elements in the vector.
1916/// For example:
1917/// <1, 1, 3, 3, 5, 5, ...>
1918/// <1, n+1, 3, n+3, 5, n+5, ...>
1919///
1920/// When undef's appear in the mask they are treated as if they were whatever
1921/// value is necessary in order to fit the above forms.
1923 MVT VT, SDValue V1, SDValue V2,
1924 SelectionDAG &DAG) {
1925
1926 const auto &Begin = Mask.begin();
1927 const auto &End = Mask.end();
1928 SDValue OriV1 = V1, OriV2 = V2;
1929
1930 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1931 V1 = OriV1;
1932 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1933 V1 = OriV2;
1934 else
1935 return SDValue();
1936
1937 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1938 V2 = OriV1;
1939 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1940 V2 = OriV2;
1941 else
1942 return SDValue();
1943
1944 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1945}
1946
1947/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1948///
1949/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1950/// of each vector.
1951///
1952/// It is possible to lower into VILVH when the mask consists of two of the
1953/// following forms interleaved:
1954/// <x, x+1, x+2, ...>
1955/// <n+x, n+x+1, n+x+2, ...>
1956/// where n is the number of elements in the vector and x is half n.
1957/// For example:
1958/// <x, x, x+1, x+1, x+2, x+2, ...>
1959/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1960///
1961/// When undef's appear in the mask they are treated as if they were whatever
1962/// value is necessary in order to fit the above forms.
1964 MVT VT, SDValue V1, SDValue V2,
1965 SelectionDAG &DAG) {
1966
1967 const auto &Begin = Mask.begin();
1968 const auto &End = Mask.end();
1969 unsigned HalfSize = Mask.size() / 2;
1970 SDValue OriV1 = V1, OriV2 = V2;
1971
1972 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1973 V1 = OriV1;
1974 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1975 V1 = OriV2;
1976 else
1977 return SDValue();
1978
1979 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1980 V2 = OriV1;
1981 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1982 1))
1983 V2 = OriV2;
1984 else
1985 return SDValue();
1986
1987 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1988}
1989
1990/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1991///
1992/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1993/// of each vector.
1994///
1995/// It is possible to lower into VILVL when the mask consists of two of the
1996/// following forms interleaved:
1997/// <0, 1, 2, ...>
1998/// <n, n+1, n+2, ...>
1999/// where n is the number of elements in the vector.
2000/// For example:
2001/// <0, 0, 1, 1, 2, 2, ...>
2002/// <0, n, 1, n+1, 2, n+2, ...>
2003///
2004/// When undef's appear in the mask they are treated as if they were whatever
2005/// value is necessary in order to fit the above forms.
2007 MVT VT, SDValue V1, SDValue V2,
2008 SelectionDAG &DAG) {
2009
2010 const auto &Begin = Mask.begin();
2011 const auto &End = Mask.end();
2012 SDValue OriV1 = V1, OriV2 = V2;
2013
2014 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2015 V1 = OriV1;
2016 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2017 V1 = OriV2;
2018 else
2019 return SDValue();
2020
2021 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2022 V2 = OriV1;
2023 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2024 V2 = OriV2;
2025 else
2026 return SDValue();
2027
2028 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2029}
2030
2031/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2032///
2033/// VPICKEV copies the even elements of each vector into the result vector.
2034///
2035/// It is possible to lower into VPICKEV when the mask consists of two of the
2036/// following forms concatenated:
2037/// <0, 2, 4, ...>
2038/// <n, n+2, n+4, ...>
2039/// where n is the number of elements in the vector.
2040/// For example:
2041/// <0, 2, 4, ..., 0, 2, 4, ...>
2042/// <0, 2, 4, ..., n, n+2, n+4, ...>
2043///
2044/// When undef's appear in the mask they are treated as if they were whatever
2045/// value is necessary in order to fit the above forms.
2047 MVT VT, SDValue V1, SDValue V2,
2048 SelectionDAG &DAG) {
2049
2050 const auto &Begin = Mask.begin();
2051 const auto &Mid = Mask.begin() + Mask.size() / 2;
2052 const auto &End = Mask.end();
2053 SDValue OriV1 = V1, OriV2 = V2;
2054
2055 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2056 V1 = OriV1;
2057 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2058 V1 = OriV2;
2059 else
2060 return SDValue();
2061
2062 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2063 V2 = OriV1;
2064 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2065 V2 = OriV2;
2066
2067 else
2068 return SDValue();
2069
2070 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2071}
2072
2073/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2074///
2075/// VPICKOD copies the odd elements of each vector into the result vector.
2076///
2077/// It is possible to lower into VPICKOD when the mask consists of two of the
2078/// following forms concatenated:
2079/// <1, 3, 5, ...>
2080/// <n+1, n+3, n+5, ...>
2081/// where n is the number of elements in the vector.
2082/// For example:
2083/// <1, 3, 5, ..., 1, 3, 5, ...>
2084/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2085///
2086/// When undef's appear in the mask they are treated as if they were whatever
2087/// value is necessary in order to fit the above forms.
2089 MVT VT, SDValue V1, SDValue V2,
2090 SelectionDAG &DAG) {
2091
2092 const auto &Begin = Mask.begin();
2093 const auto &Mid = Mask.begin() + Mask.size() / 2;
2094 const auto &End = Mask.end();
2095 SDValue OriV1 = V1, OriV2 = V2;
2096
2097 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2098 V1 = OriV1;
2099 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2100 V1 = OriV2;
2101 else
2102 return SDValue();
2103
2104 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2105 V2 = OriV1;
2106 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2107 V2 = OriV2;
2108 else
2109 return SDValue();
2110
2111 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2112}
2113
2114/// Lower VECTOR_SHUFFLE into VSHUF.
2115///
2116/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2117/// adding it as an operand to the resulting VSHUF.
2119 MVT VT, SDValue V1, SDValue V2,
2120 SelectionDAG &DAG,
2121 const LoongArchSubtarget &Subtarget) {
2122
2124 for (auto M : Mask)
2125 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2126
2127 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2128 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2129
2130 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2131 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2132 // VSHF concatenates the vectors in a bitwise fashion:
2133 // <0b00, 0b01> + <0b10, 0b11> ->
2134 // 0b0100 + 0b1110 -> 0b01001110
2135 // <0b10, 0b11, 0b00, 0b01>
2136 // We must therefore swap the operands to get the correct result.
2137 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2138}
2139
2140/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2141///
2142/// This routine breaks down the specific type of 128-bit shuffle and
2143/// dispatches to the lowering routines accordingly.
2145 SDValue V1, SDValue V2, SelectionDAG &DAG,
2146 const LoongArchSubtarget &Subtarget) {
2147 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2148 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2149 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2150 "Vector type is unsupported for lsx!");
2152 "Two operands have different types!");
2153 assert(VT.getVectorNumElements() == Mask.size() &&
2154 "Unexpected mask size for shuffle!");
2155 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2156
2157 APInt KnownUndef, KnownZero;
2158 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2159 APInt Zeroable = KnownUndef | KnownZero;
2160
2161 SDValue Result;
2162 // TODO: Add more comparison patterns.
2163 if (V2.isUndef()) {
2164 if ((Result =
2165 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2166 return Result;
2167 if ((Result =
2168 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2169 return Result;
2170 if ((Result =
2171 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2172 return Result;
2173
2174 // TODO: This comment may be enabled in the future to better match the
2175 // pattern for instruction selection.
2176 /* V2 = V1; */
2177 }
2178
2179 // It is recommended not to change the pattern comparison order for better
2180 // performance.
2181 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2182 return Result;
2183 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2184 return Result;
2185 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2186 return Result;
2187 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2188 return Result;
2189 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2190 return Result;
2191 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2192 return Result;
2193 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2194 (Result =
2195 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2196 return Result;
2197 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2198 Zeroable)))
2199 return Result;
2200 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2201 Zeroable)))
2202 return Result;
2203 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2204 Subtarget)))
2205 return Result;
2206 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2207 return NewShuffle;
2208 if ((Result =
2209 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2210 return Result;
2211 return SDValue();
2212}
2213
2214/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2215///
2216/// It is a XVREPLVEI when the mask is:
2217/// <x, x, x, ..., x+n, x+n, x+n, ...>
2218/// where the number of x is equal to n and n is half the length of vector.
2219///
2220/// When undef's appear in the mask they are treated as if they were whatever
2221/// value is necessary in order to fit the above form.
2222static SDValue
2224 SDValue V1, SelectionDAG &DAG,
2225 const LoongArchSubtarget &Subtarget) {
2226 int SplatIndex = -1;
2227 for (const auto &M : Mask) {
2228 if (M != -1) {
2229 SplatIndex = M;
2230 break;
2231 }
2232 }
2233
2234 if (SplatIndex == -1)
2235 return DAG.getUNDEF(VT);
2236
2237 const auto &Begin = Mask.begin();
2238 const auto &End = Mask.end();
2239 int HalfSize = Mask.size() / 2;
2240
2241 if (SplatIndex >= HalfSize)
2242 return SDValue();
2243
2244 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2245 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2246 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2247 0)) {
2248 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2249 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2250 }
2251
2252 return SDValue();
2253}
2254
2255/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2256static SDValue
2258 SDValue V1, SDValue V2, SelectionDAG &DAG,
2259 const LoongArchSubtarget &Subtarget) {
2260 // When the size is less than or equal to 4, lower cost instructions may be
2261 // used.
2262 if (Mask.size() <= 4)
2263 return SDValue();
2264 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2265}
2266
2267/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2268static SDValue
2270 SDValue V1, SelectionDAG &DAG,
2271 const LoongArchSubtarget &Subtarget) {
2272 // Only consider XVPERMI_D.
2273 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2274 return SDValue();
2275
2276 unsigned MaskImm = 0;
2277 for (unsigned i = 0; i < Mask.size(); ++i) {
2278 if (Mask[i] == -1)
2279 continue;
2280 MaskImm |= Mask[i] << (i * 2);
2281 }
2282
2283 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2284 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2285}
2286
2287/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2289 MVT VT, SDValue V1, SelectionDAG &DAG,
2290 const LoongArchSubtarget &Subtarget) {
2291 // LoongArch LASX only have XVPERM_W.
2292 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2293 return SDValue();
2294
2295 unsigned NumElts = VT.getVectorNumElements();
2296 unsigned HalfSize = NumElts / 2;
2297 bool FrontLo = true, FrontHi = true;
2298 bool BackLo = true, BackHi = true;
2299
2300 auto inRange = [](int val, int low, int high) {
2301 return (val == -1) || (val >= low && val < high);
2302 };
2303
2304 for (unsigned i = 0; i < HalfSize; ++i) {
2305 int Fronti = Mask[i];
2306 int Backi = Mask[i + HalfSize];
2307
2308 FrontLo &= inRange(Fronti, 0, HalfSize);
2309 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2310 BackLo &= inRange(Backi, 0, HalfSize);
2311 BackHi &= inRange(Backi, HalfSize, NumElts);
2312 }
2313
2314 // If both the lower and upper 128-bit parts access only one half of the
2315 // vector (either lower or upper), avoid using xvperm.w. The latency of
2316 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2317 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2318 return SDValue();
2319
2321 MVT GRLenVT = Subtarget.getGRLenVT();
2322 for (unsigned i = 0; i < NumElts; ++i)
2323 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2324 : DAG.getConstant(Mask[i], DL, GRLenVT));
2325 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2326
2327 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2328}
2329
2330/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2332 MVT VT, SDValue V1, SDValue V2,
2333 SelectionDAG &DAG) {
2334 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2335}
2336
2337/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2339 MVT VT, SDValue V1, SDValue V2,
2340 SelectionDAG &DAG) {
2341 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2342}
2343
2344/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2346 MVT VT, SDValue V1, SDValue V2,
2347 SelectionDAG &DAG) {
2348
2349 const auto &Begin = Mask.begin();
2350 const auto &End = Mask.end();
2351 unsigned HalfSize = Mask.size() / 2;
2352 unsigned LeftSize = HalfSize / 2;
2353 SDValue OriV1 = V1, OriV2 = V2;
2354
2355 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2356 1) &&
2357 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2358 V1 = OriV1;
2359 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2360 Mask.size() + HalfSize - LeftSize, 1) &&
2361 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2362 Mask.size() + HalfSize + LeftSize, 1))
2363 V1 = OriV2;
2364 else
2365 return SDValue();
2366
2367 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2368 1) &&
2369 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2370 1))
2371 V2 = OriV1;
2372 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2373 Mask.size() + HalfSize - LeftSize, 1) &&
2374 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2375 Mask.size() + HalfSize + LeftSize, 1))
2376 V2 = OriV2;
2377 else
2378 return SDValue();
2379
2380 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2381}
2382
2383/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2385 MVT VT, SDValue V1, SDValue V2,
2386 SelectionDAG &DAG) {
2387
2388 const auto &Begin = Mask.begin();
2389 const auto &End = Mask.end();
2390 unsigned HalfSize = Mask.size() / 2;
2391 SDValue OriV1 = V1, OriV2 = V2;
2392
2393 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2394 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2395 V1 = OriV1;
2396 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2397 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2398 Mask.size() + HalfSize, 1))
2399 V1 = OriV2;
2400 else
2401 return SDValue();
2402
2403 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2404 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2405 V2 = OriV1;
2406 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2407 1) &&
2408 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2409 Mask.size() + HalfSize, 1))
2410 V2 = OriV2;
2411 else
2412 return SDValue();
2413
2414 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2415}
2416
2417/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2419 MVT VT, SDValue V1, SDValue V2,
2420 SelectionDAG &DAG) {
2421
2422 const auto &Begin = Mask.begin();
2423 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2424 const auto &Mid = Mask.begin() + Mask.size() / 2;
2425 const auto &RightMid = Mask.end() - Mask.size() / 4;
2426 const auto &End = Mask.end();
2427 unsigned HalfSize = Mask.size() / 2;
2428 SDValue OriV1 = V1, OriV2 = V2;
2429
2430 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2431 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2432 V1 = OriV1;
2433 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2434 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2435 V1 = OriV2;
2436 else
2437 return SDValue();
2438
2439 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2440 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2441 V2 = OriV1;
2442 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2443 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2444 V2 = OriV2;
2445
2446 else
2447 return SDValue();
2448
2449 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2450}
2451
2452/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2454 MVT VT, SDValue V1, SDValue V2,
2455 SelectionDAG &DAG) {
2456
2457 const auto &Begin = Mask.begin();
2458 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2459 const auto &Mid = Mask.begin() + Mask.size() / 2;
2460 const auto &RightMid = Mask.end() - Mask.size() / 4;
2461 const auto &End = Mask.end();
2462 unsigned HalfSize = Mask.size() / 2;
2463 SDValue OriV1 = V1, OriV2 = V2;
2464
2465 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2466 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2467 V1 = OriV1;
2468 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2469 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2470 2))
2471 V1 = OriV2;
2472 else
2473 return SDValue();
2474
2475 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2476 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2477 V2 = OriV1;
2478 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2479 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2480 2))
2481 V2 = OriV2;
2482 else
2483 return SDValue();
2484
2485 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2486}
2487
2488/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2489static SDValue
2491 SDValue V1, SDValue V2, SelectionDAG &DAG,
2492 const LoongArchSubtarget &Subtarget) {
2493 // LoongArch LASX only supports xvinsve0.{w/d}.
2494 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2495 VT != MVT::v4f64)
2496 return SDValue();
2497
2498 MVT GRLenVT = Subtarget.getGRLenVT();
2499 int MaskSize = Mask.size();
2500 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2501
2502 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2503 // all other elements are either 'Base + i' or undef (-1). On success, return
2504 // the index of the replaced element. Otherwise, just return -1.
2505 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2506 int Idx = -1;
2507 for (int i = 0; i < MaskSize; ++i) {
2508 if (Mask[i] == Base + i || Mask[i] == -1)
2509 continue;
2510 if (Mask[i] != Replaced)
2511 return -1;
2512 if (Idx == -1)
2513 Idx = i;
2514 else
2515 return -1;
2516 }
2517 return Idx;
2518 };
2519
2520 // Case 1: the lowest element of V2 replaces one element in V1.
2521 int Idx = checkReplaceOne(0, MaskSize);
2522 if (Idx != -1)
2523 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2524 DAG.getConstant(Idx, DL, GRLenVT));
2525
2526 // Case 2: the lowest element of V1 replaces one element in V2.
2527 Idx = checkReplaceOne(MaskSize, 0);
2528 if (Idx != -1)
2529 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2530 DAG.getConstant(Idx, DL, GRLenVT));
2531
2532 return SDValue();
2533}
2534
2535/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2537 MVT VT, SDValue V1, SDValue V2,
2538 SelectionDAG &DAG) {
2539
2540 int MaskSize = Mask.size();
2541 int HalfSize = Mask.size() / 2;
2542 const auto &Begin = Mask.begin();
2543 const auto &Mid = Mask.begin() + HalfSize;
2544 const auto &End = Mask.end();
2545
2546 // VECTOR_SHUFFLE concatenates the vectors:
2547 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2548 // shuffling ->
2549 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2550 //
2551 // XVSHUF concatenates the vectors:
2552 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2553 // shuffling ->
2554 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2555 SmallVector<SDValue, 8> MaskAlloc;
2556 for (auto it = Begin; it < Mid; it++) {
2557 if (*it < 0) // UNDEF
2558 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2559 else if ((*it >= 0 && *it < HalfSize) ||
2560 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2561 int M = *it < HalfSize ? *it : *it - HalfSize;
2562 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2563 } else
2564 return SDValue();
2565 }
2566 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2567
2568 for (auto it = Mid; it < End; it++) {
2569 if (*it < 0) // UNDEF
2570 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2571 else if ((*it >= HalfSize && *it < MaskSize) ||
2572 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2573 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2574 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2575 } else
2576 return SDValue();
2577 }
2578 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2579
2580 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2581 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2582 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2583}
2584
2585/// Shuffle vectors by lane to generate more optimized instructions.
2586/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2587///
2588/// Therefore, except for the following four cases, other cases are regarded
2589/// as cross-lane shuffles, where optimization is relatively limited.
2590///
2591/// - Shuffle high, low lanes of two inputs vector
2592/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2593/// - Shuffle low, high lanes of two inputs vector
2594/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2595/// - Shuffle low, low lanes of two inputs vector
2596/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2597/// - Shuffle high, high lanes of two inputs vector
2598/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2599///
2600/// The first case is the closest to LoongArch instructions and the other
2601/// cases need to be converted to it for processing.
2602///
2603/// This function will return true for the last three cases above and will
2604/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2605/// cross-lane shuffle cases.
2607 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2608 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2609
2610 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2611
2612 int MaskSize = Mask.size();
2613 int HalfSize = Mask.size() / 2;
2614 MVT GRLenVT = Subtarget.getGRLenVT();
2615
2616 HalfMaskType preMask = None, postMask = None;
2617
2618 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2619 return M < 0 || (M >= 0 && M < HalfSize) ||
2620 (M >= MaskSize && M < MaskSize + HalfSize);
2621 }))
2622 preMask = HighLaneTy;
2623 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2624 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2625 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2626 }))
2627 preMask = LowLaneTy;
2628
2629 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2630 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2631 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2632 }))
2633 postMask = LowLaneTy;
2634 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2635 return M < 0 || (M >= 0 && M < HalfSize) ||
2636 (M >= MaskSize && M < MaskSize + HalfSize);
2637 }))
2638 postMask = HighLaneTy;
2639
2640 // The pre-half of mask is high lane type, and the post-half of mask
2641 // is low lane type, which is closest to the LoongArch instructions.
2642 //
2643 // Note: In the LoongArch architecture, the high lane of mask corresponds
2644 // to the lower 128-bit of vector register, and the low lane of mask
2645 // corresponds the higher 128-bit of vector register.
2646 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2647 return false;
2648 }
2649 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2650 V1 = DAG.getBitcast(MVT::v4i64, V1);
2651 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2652 DAG.getConstant(0b01001110, DL, GRLenVT));
2653 V1 = DAG.getBitcast(VT, V1);
2654
2655 if (!V2.isUndef()) {
2656 V2 = DAG.getBitcast(MVT::v4i64, V2);
2657 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2658 DAG.getConstant(0b01001110, DL, GRLenVT));
2659 V2 = DAG.getBitcast(VT, V2);
2660 }
2661
2662 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2663 *it = *it < 0 ? *it : *it - HalfSize;
2664 }
2665 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2666 *it = *it < 0 ? *it : *it + HalfSize;
2667 }
2668 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2669 V1 = DAG.getBitcast(MVT::v4i64, V1);
2670 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2671 DAG.getConstant(0b11101110, DL, GRLenVT));
2672 V1 = DAG.getBitcast(VT, V1);
2673
2674 if (!V2.isUndef()) {
2675 V2 = DAG.getBitcast(MVT::v4i64, V2);
2676 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2677 DAG.getConstant(0b11101110, DL, GRLenVT));
2678 V2 = DAG.getBitcast(VT, V2);
2679 }
2680
2681 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2682 *it = *it < 0 ? *it : *it - HalfSize;
2683 }
2684 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2685 V1 = DAG.getBitcast(MVT::v4i64, V1);
2686 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2687 DAG.getConstant(0b01000100, DL, GRLenVT));
2688 V1 = DAG.getBitcast(VT, V1);
2689
2690 if (!V2.isUndef()) {
2691 V2 = DAG.getBitcast(MVT::v4i64, V2);
2692 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2693 DAG.getConstant(0b01000100, DL, GRLenVT));
2694 V2 = DAG.getBitcast(VT, V2);
2695 }
2696
2697 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2698 *it = *it < 0 ? *it : *it + HalfSize;
2699 }
2700 } else { // cross-lane
2701 return false;
2702 }
2703
2704 return true;
2705}
2706
2707/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2708/// Only for 256-bit vector.
2709///
2710/// For example:
2711/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2712/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2713/// is lowerded to:
2714/// (XVPERMI $xr2, $xr0, 78)
2715/// (XVSHUF $xr1, $xr2, $xr0)
2716/// (XVORI $xr0, $xr1, 0)
2718 ArrayRef<int> Mask,
2719 MVT VT, SDValue V1,
2720 SDValue V2,
2721 SelectionDAG &DAG) {
2722 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2723 int Size = Mask.size();
2724 int LaneSize = Size / 2;
2725
2726 bool LaneCrossing[2] = {false, false};
2727 for (int i = 0; i < Size; ++i)
2728 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2729 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2730
2731 // Ensure that all lanes ared involved.
2732 if (!LaneCrossing[0] && !LaneCrossing[1])
2733 return SDValue();
2734
2735 SmallVector<int> InLaneMask;
2736 InLaneMask.assign(Mask.begin(), Mask.end());
2737 for (int i = 0; i < Size; ++i) {
2738 int &M = InLaneMask[i];
2739 if (M < 0)
2740 continue;
2741 if (((M % Size) / LaneSize) != (i / LaneSize))
2742 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2743 }
2744
2745 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2746 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2747 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2748 Flipped = DAG.getBitcast(VT, Flipped);
2749 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2750}
2751
2752/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2753///
2754/// This routine breaks down the specific type of 256-bit shuffle and
2755/// dispatches to the lowering routines accordingly.
2757 SDValue V1, SDValue V2, SelectionDAG &DAG,
2758 const LoongArchSubtarget &Subtarget) {
2759 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2760 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2761 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2762 "Vector type is unsupported for lasx!");
2764 "Two operands have different types!");
2765 assert(VT.getVectorNumElements() == Mask.size() &&
2766 "Unexpected mask size for shuffle!");
2767 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2768 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2769
2770 APInt KnownUndef, KnownZero;
2771 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2772 APInt Zeroable = KnownUndef | KnownZero;
2773
2774 SDValue Result;
2775 // TODO: Add more comparison patterns.
2776 if (V2.isUndef()) {
2777 if ((Result =
2778 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2779 return Result;
2780 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2781 Subtarget)))
2782 return Result;
2783 // Try to widen vectors to gain more optimization opportunities.
2784 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2785 return NewShuffle;
2786 if ((Result =
2787 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2788 return Result;
2789 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2790 return Result;
2791 if ((Result =
2792 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2793 return Result;
2794
2795 // TODO: This comment may be enabled in the future to better match the
2796 // pattern for instruction selection.
2797 /* V2 = V1; */
2798 }
2799
2800 // It is recommended not to change the pattern comparison order for better
2801 // performance.
2802 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2803 return Result;
2804 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2805 return Result;
2806 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2807 return Result;
2808 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2809 return Result;
2810 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2811 return Result;
2812 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2813 return Result;
2814 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2815 Zeroable)))
2816 return Result;
2817 if ((Result =
2818 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2819 return Result;
2820 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2821 Subtarget)))
2822 return Result;
2823
2824 // canonicalize non cross-lane shuffle vector
2825 SmallVector<int> NewMask(Mask);
2826 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2827 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2828
2829 // FIXME: Handling the remaining cases earlier can degrade performance
2830 // in some situations. Further analysis is required to enable more
2831 // effective optimizations.
2832 if (V2.isUndef()) {
2833 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2834 V1, V2, DAG)))
2835 return Result;
2836 }
2837
2838 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2839 return NewShuffle;
2840 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2841 return Result;
2842
2843 return SDValue();
2844}
2845
2846SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2847 SelectionDAG &DAG) const {
2848 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2849 ArrayRef<int> OrigMask = SVOp->getMask();
2850 SDValue V1 = Op.getOperand(0);
2851 SDValue V2 = Op.getOperand(1);
2852 MVT VT = Op.getSimpleValueType();
2853 int NumElements = VT.getVectorNumElements();
2854 SDLoc DL(Op);
2855
2856 bool V1IsUndef = V1.isUndef();
2857 bool V2IsUndef = V2.isUndef();
2858 if (V1IsUndef && V2IsUndef)
2859 return DAG.getUNDEF(VT);
2860
2861 // When we create a shuffle node we put the UNDEF node to second operand,
2862 // but in some cases the first operand may be transformed to UNDEF.
2863 // In this case we should just commute the node.
2864 if (V1IsUndef)
2865 return DAG.getCommutedVectorShuffle(*SVOp);
2866
2867 // Check for non-undef masks pointing at an undef vector and make the masks
2868 // undef as well. This makes it easier to match the shuffle based solely on
2869 // the mask.
2870 if (V2IsUndef &&
2871 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2872 SmallVector<int, 8> NewMask(OrigMask);
2873 for (int &M : NewMask)
2874 if (M >= NumElements)
2875 M = -1;
2876 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2877 }
2878
2879 // Check for illegal shuffle mask element index values.
2880 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2881 (void)MaskUpperLimit;
2882 assert(llvm::all_of(OrigMask,
2883 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2884 "Out of bounds shuffle index");
2885
2886 // For each vector width, delegate to a specialized lowering routine.
2887 if (VT.is128BitVector())
2888 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2889
2890 if (VT.is256BitVector())
2891 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2892
2893 return SDValue();
2894}
2895
2896SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2897 SelectionDAG &DAG) const {
2898 // Custom lower to ensure the libcall return is passed in an FPR on hard
2899 // float ABIs.
2900 SDLoc DL(Op);
2901 MakeLibCallOptions CallOptions;
2902 SDValue Op0 = Op.getOperand(0);
2903 SDValue Chain = SDValue();
2904 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2905 SDValue Res;
2906 std::tie(Res, Chain) =
2907 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2908 if (Subtarget.is64Bit())
2909 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2910 return DAG.getBitcast(MVT::i32, Res);
2911}
2912
2913SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2914 SelectionDAG &DAG) const {
2915 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2916 // float ABIs.
2917 SDLoc DL(Op);
2918 MakeLibCallOptions CallOptions;
2919 SDValue Op0 = Op.getOperand(0);
2920 SDValue Chain = SDValue();
2921 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2922 DL, MVT::f32, Op0)
2923 : DAG.getBitcast(MVT::f32, Op0);
2924 SDValue Res;
2925 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2926 CallOptions, DL, Chain);
2927 return Res;
2928}
2929
2930SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2931 SelectionDAG &DAG) const {
2932 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2933 SDLoc DL(Op);
2934 MakeLibCallOptions CallOptions;
2935 RTLIB::Libcall LC =
2936 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2937 SDValue Res =
2938 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2939 if (Subtarget.is64Bit())
2940 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2941 return DAG.getBitcast(MVT::i32, Res);
2942}
2943
2944SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2945 SelectionDAG &DAG) const {
2946 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2947 MVT VT = Op.getSimpleValueType();
2948 SDLoc DL(Op);
2949 Op = DAG.getNode(
2950 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2951 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2952 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2953 DL, MVT::f32, Op)
2954 : DAG.getBitcast(MVT::f32, Op);
2955 if (VT != MVT::f32)
2956 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2957 return Res;
2958}
2959
2960// Lower BUILD_VECTOR as broadcast load (if possible).
2961// For example:
2962// %a = load i8, ptr %ptr
2963// %b = build_vector %a, %a, %a, %a
2964// is lowered to :
2965// (VLDREPL_B $a0, 0)
2967 const SDLoc &DL,
2968 SelectionDAG &DAG) {
2969 MVT VT = BVOp->getSimpleValueType(0);
2970 int NumOps = BVOp->getNumOperands();
2971
2972 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2973 "Unsupported vector type for broadcast.");
2974
2975 SDValue IdentitySrc;
2976 bool IsIdeneity = true;
2977
2978 for (int i = 0; i != NumOps; i++) {
2979 SDValue Op = BVOp->getOperand(i);
2980 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2981 IsIdeneity = false;
2982 break;
2983 }
2984 IdentitySrc = BVOp->getOperand(0);
2985 }
2986
2987 // make sure that this load is valid and only has one user.
2988 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2989 return SDValue();
2990
2991 auto *LN = cast<LoadSDNode>(IdentitySrc);
2992 auto ExtType = LN->getExtensionType();
2993
2994 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2995 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2996 // Indexed loads and stores are not supported on LoongArch.
2997 assert(LN->isUnindexed() && "Unexpected indexed load.");
2998
2999 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3000 // The offset operand of unindexed load is always undefined, so there is
3001 // no need to pass it to VLDREPL.
3002 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3003 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3004 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3005 return BCast;
3006 }
3007 return SDValue();
3008}
3009
3010// Sequentially insert elements from Ops into Vector, from low to high indices.
3011// Note: Ops can have fewer elements than Vector.
3013 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3014 EVT ResTy) {
3015 assert(Ops.size() <= ResTy.getVectorNumElements());
3016
3017 SDValue Op0 = Ops[0];
3018 if (!Op0.isUndef())
3019 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3020 for (unsigned i = 1; i < Ops.size(); ++i) {
3021 SDValue Opi = Ops[i];
3022 if (Opi.isUndef())
3023 continue;
3024 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3025 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3026 }
3027}
3028
3029// Build a ResTy subvector from Node, taking NumElts elements starting at index
3030// 'first'.
3032 SelectionDAG &DAG, SDLoc DL,
3033 const LoongArchSubtarget &Subtarget,
3034 EVT ResTy, unsigned first) {
3035 unsigned NumElts = ResTy.getVectorNumElements();
3036
3037 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3038
3039 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3040 Node->op_begin() + first + NumElts);
3041 SDValue Vector = DAG.getUNDEF(ResTy);
3042 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3043 return Vector;
3044}
3045
3046SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3047 SelectionDAG &DAG) const {
3048 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3049 MVT VT = Node->getSimpleValueType(0);
3050 EVT ResTy = Op->getValueType(0);
3051 unsigned NumElts = ResTy.getVectorNumElements();
3052 SDLoc DL(Op);
3053 APInt SplatValue, SplatUndef;
3054 unsigned SplatBitSize;
3055 bool HasAnyUndefs;
3056 bool IsConstant = false;
3057 bool UseSameConstant = true;
3058 SDValue ConstantValue;
3059 bool Is128Vec = ResTy.is128BitVector();
3060 bool Is256Vec = ResTy.is256BitVector();
3061
3062 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3063 (!Subtarget.hasExtLASX() || !Is256Vec))
3064 return SDValue();
3065
3066 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3067 return Result;
3068
3069 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3070 /*MinSplatBits=*/8) &&
3071 SplatBitSize <= 64) {
3072 // We can only cope with 8, 16, 32, or 64-bit elements.
3073 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3074 SplatBitSize != 64)
3075 return SDValue();
3076
3077 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3078 // We can only handle 64-bit elements that are within
3079 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3080 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3081 if (!SplatValue.isSignedIntN(10) &&
3082 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3083 return SDValue();
3084 if ((Is128Vec && ResTy == MVT::v4i32) ||
3085 (Is256Vec && ResTy == MVT::v8i32))
3086 return Op;
3087 }
3088
3089 EVT ViaVecTy;
3090
3091 switch (SplatBitSize) {
3092 default:
3093 return SDValue();
3094 case 8:
3095 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3096 break;
3097 case 16:
3098 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3099 break;
3100 case 32:
3101 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3102 break;
3103 case 64:
3104 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3105 break;
3106 }
3107
3108 // SelectionDAG::getConstant will promote SplatValue appropriately.
3109 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3110
3111 // Bitcast to the type we originally wanted.
3112 if (ViaVecTy != ResTy)
3113 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3114
3115 return Result;
3116 }
3117
3118 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3119 return Op;
3120
3121 for (unsigned i = 0; i < NumElts; ++i) {
3122 SDValue Opi = Node->getOperand(i);
3123 if (isIntOrFPConstant(Opi)) {
3124 IsConstant = true;
3125 if (!ConstantValue.getNode())
3126 ConstantValue = Opi;
3127 else if (ConstantValue != Opi)
3128 UseSameConstant = false;
3129 }
3130 }
3131
3132 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3133 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3134 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3135 for (unsigned i = 0; i < NumElts; ++i) {
3136 SDValue Opi = Node->getOperand(i);
3137 if (!isIntOrFPConstant(Opi))
3138 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3139 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3140 }
3141 return Result;
3142 }
3143
3144 if (!IsConstant) {
3145 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3146 // the sub-sequence of the vector and then broadcast the sub-sequence.
3147 //
3148 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3149 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3150 // generates worse code in some cases. This could be further optimized
3151 // with more consideration.
3153 BitVector UndefElements;
3154 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3155 UndefElements.count() == 0) {
3156 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3157 // because the high part can be simply treated as undef.
3158 SDValue Vector = DAG.getUNDEF(ResTy);
3159 EVT FillTy = Is256Vec
3161 : ResTy;
3162 SDValue FillVec =
3163 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3164
3165 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3166
3167 unsigned SeqLen = Sequence.size();
3168 unsigned SplatLen = NumElts / SeqLen;
3169 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3170 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3171
3172 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3173 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3174 if (SplatEltTy == MVT::i128)
3175 SplatTy = MVT::v4i64;
3176
3177 SDValue SplatVec;
3178 SDValue SrcVec = DAG.getBitcast(
3179 SplatTy,
3180 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3181 if (Is256Vec) {
3182 SplatVec =
3183 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3184 : LoongArchISD::XVREPLVE0,
3185 DL, SplatTy, SrcVec);
3186 } else {
3187 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3188 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3189 }
3190
3191 return DAG.getBitcast(ResTy, SplatVec);
3192 }
3193
3194 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3195 // using memory operations is much lower.
3196 //
3197 // For 256-bit vectors, normally split into two halves and concatenate.
3198 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3199 // one non-undef element, skip spliting to avoid a worse result.
3200 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3201 ResTy == MVT::v4f64) {
3202 unsigned NonUndefCount = 0;
3203 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3204 if (!Node->getOperand(i).isUndef()) {
3205 ++NonUndefCount;
3206 if (NonUndefCount > 1)
3207 break;
3208 }
3209 }
3210 if (NonUndefCount == 1)
3211 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3212 }
3213
3214 EVT VecTy =
3215 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3216 SDValue Vector =
3217 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3218
3219 if (Is128Vec)
3220 return Vector;
3221
3222 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3223 VecTy, NumElts / 2);
3224
3225 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3226 }
3227
3228 return SDValue();
3229}
3230
3231SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3232 SelectionDAG &DAG) const {
3233 SDLoc DL(Op);
3234 MVT ResVT = Op.getSimpleValueType();
3235 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3236
3237 unsigned NumOperands = Op.getNumOperands();
3238 unsigned NumFreezeUndef = 0;
3239 unsigned NumZero = 0;
3240 unsigned NumNonZero = 0;
3241 unsigned NonZeros = 0;
3242 SmallSet<SDValue, 4> Undefs;
3243 for (unsigned i = 0; i != NumOperands; ++i) {
3244 SDValue SubVec = Op.getOperand(i);
3245 if (SubVec.isUndef())
3246 continue;
3247 if (ISD::isFreezeUndef(SubVec.getNode())) {
3248 // If the freeze(undef) has multiple uses then we must fold to zero.
3249 if (SubVec.hasOneUse()) {
3250 ++NumFreezeUndef;
3251 } else {
3252 ++NumZero;
3253 Undefs.insert(SubVec);
3254 }
3255 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3256 ++NumZero;
3257 else {
3258 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3259 NonZeros |= 1 << i;
3260 ++NumNonZero;
3261 }
3262 }
3263
3264 // If we have more than 2 non-zeros, build each half separately.
3265 if (NumNonZero > 2) {
3266 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3267 ArrayRef<SDUse> Ops = Op->ops();
3268 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3269 Ops.slice(0, NumOperands / 2));
3270 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3271 Ops.slice(NumOperands / 2));
3272 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3273 }
3274
3275 // Otherwise, build it up through insert_subvectors.
3276 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3277 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3278 : DAG.getUNDEF(ResVT));
3279
3280 // Replace Undef operands with ZeroVector.
3281 for (SDValue U : Undefs)
3282 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3283
3284 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3285 unsigned NumSubElems = SubVT.getVectorNumElements();
3286 for (unsigned i = 0; i != NumOperands; ++i) {
3287 if ((NonZeros & (1 << i)) == 0)
3288 continue;
3289
3290 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3291 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3292 }
3293
3294 return Vec;
3295}
3296
3297SDValue
3298LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3299 SelectionDAG &DAG) const {
3300 MVT EltVT = Op.getSimpleValueType();
3301 SDValue Vec = Op->getOperand(0);
3302 EVT VecTy = Vec->getValueType(0);
3303 SDValue Idx = Op->getOperand(1);
3304 SDLoc DL(Op);
3305 MVT GRLenVT = Subtarget.getGRLenVT();
3306
3307 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3308
3309 if (isa<ConstantSDNode>(Idx))
3310 return Op;
3311
3312 switch (VecTy.getSimpleVT().SimpleTy) {
3313 default:
3314 llvm_unreachable("Unexpected type");
3315 case MVT::v32i8:
3316 case MVT::v16i16:
3317 case MVT::v4i64:
3318 case MVT::v4f64: {
3319 // Extract the high half subvector and place it to the low half of a new
3320 // vector. It doesn't matter what the high half of the new vector is.
3321 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3322 SDValue VecHi =
3323 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3324 SDValue TmpVec =
3325 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3326 VecHi, DAG.getConstant(0, DL, GRLenVT));
3327
3328 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3329 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3330 // desired element.
3331 SDValue IdxCp =
3332 Subtarget.is64Bit()
3333 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3334 : DAG.getBitcast(MVT::f32, Idx);
3335 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3336 SDValue MaskVec =
3337 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3338 SDValue ResVec =
3339 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3340
3341 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3342 DAG.getConstant(0, DL, GRLenVT));
3343 }
3344 case MVT::v8i32:
3345 case MVT::v8f32: {
3346 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3347 SDValue SplatValue =
3348 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3349
3350 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3351 DAG.getConstant(0, DL, GRLenVT));
3352 }
3353 }
3354}
3355
3356SDValue
3357LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3358 SelectionDAG &DAG) const {
3359 MVT VT = Op.getSimpleValueType();
3360 MVT EltVT = VT.getVectorElementType();
3361 unsigned NumElts = VT.getVectorNumElements();
3362 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3363 SDLoc DL(Op);
3364 SDValue Op0 = Op.getOperand(0);
3365 SDValue Op1 = Op.getOperand(1);
3366 SDValue Op2 = Op.getOperand(2);
3367
3368 if (isa<ConstantSDNode>(Op2))
3369 return Op;
3370
3371 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3372 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3373
3374 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3375 return SDValue();
3376
3377 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3378 SmallVector<SDValue, 32> RawIndices;
3379 SDValue SplatIdx;
3380 SDValue Indices;
3381
3382 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3383 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3384 for (unsigned i = 0; i < NumElts; ++i) {
3385 RawIndices.push_back(Op2);
3386 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3387 }
3388 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3389 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3390
3391 RawIndices.clear();
3392 for (unsigned i = 0; i < NumElts; ++i) {
3393 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3394 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3395 }
3396 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3397 Indices = DAG.getBitcast(IdxVTy, Indices);
3398 } else {
3399 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3400
3401 for (unsigned i = 0; i < NumElts; ++i)
3402 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3403 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3404 }
3405
3406 // insert vec, elt, idx
3407 // =>
3408 // select (splatidx == {0,1,2...}) ? splatelt : vec
3409 SDValue SelectCC =
3410 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3411 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3412}
3413
3414SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3415 SelectionDAG &DAG) const {
3416 SDLoc DL(Op);
3417 SyncScope::ID FenceSSID =
3418 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3419
3420 // singlethread fences only synchronize with signal handlers on the same
3421 // thread and thus only need to preserve instruction order, not actually
3422 // enforce memory ordering.
3423 if (FenceSSID == SyncScope::SingleThread)
3424 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3425 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3426
3427 return Op;
3428}
3429
3430SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3431 SelectionDAG &DAG) const {
3432
3433 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3434 DAG.getContext()->emitError(
3435 "On LA64, only 64-bit registers can be written.");
3436 return Op.getOperand(0);
3437 }
3438
3439 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3440 DAG.getContext()->emitError(
3441 "On LA32, only 32-bit registers can be written.");
3442 return Op.getOperand(0);
3443 }
3444
3445 return Op;
3446}
3447
3448SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3449 SelectionDAG &DAG) const {
3450 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3451 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3452 "be a constant integer");
3453 return SDValue();
3454 }
3455
3456 MachineFunction &MF = DAG.getMachineFunction();
3458 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3459 EVT VT = Op.getValueType();
3460 SDLoc DL(Op);
3461 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3462 unsigned Depth = Op.getConstantOperandVal(0);
3463 int GRLenInBytes = Subtarget.getGRLen() / 8;
3464
3465 while (Depth--) {
3466 int Offset = -(GRLenInBytes * 2);
3467 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3468 DAG.getSignedConstant(Offset, DL, VT));
3469 FrameAddr =
3470 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3471 }
3472 return FrameAddr;
3473}
3474
3475SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3476 SelectionDAG &DAG) const {
3477 // Currently only support lowering return address for current frame.
3478 if (Op.getConstantOperandVal(0) != 0) {
3479 DAG.getContext()->emitError(
3480 "return address can only be determined for the current frame");
3481 return SDValue();
3482 }
3483
3484 MachineFunction &MF = DAG.getMachineFunction();
3486 MVT GRLenVT = Subtarget.getGRLenVT();
3487
3488 // Return the value of the return address register, marking it an implicit
3489 // live-in.
3490 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3491 getRegClassFor(GRLenVT));
3492 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3493}
3494
3495SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3496 SelectionDAG &DAG) const {
3497 MachineFunction &MF = DAG.getMachineFunction();
3498 auto Size = Subtarget.getGRLen() / 8;
3499 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3500 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3501}
3502
3503SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3504 SelectionDAG &DAG) const {
3505 MachineFunction &MF = DAG.getMachineFunction();
3506 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3507
3508 SDLoc DL(Op);
3509 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3511
3512 // vastart just stores the address of the VarArgsFrameIndex slot into the
3513 // memory location argument.
3514 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3515 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3516 MachinePointerInfo(SV));
3517}
3518
3519SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3520 SelectionDAG &DAG) const {
3521 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3522 !Subtarget.hasBasicD() && "unexpected target features");
3523
3524 SDLoc DL(Op);
3525 SDValue Op0 = Op.getOperand(0);
3526 if (Op0->getOpcode() == ISD::AND) {
3527 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3528 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3529 return Op;
3530 }
3531
3532 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3533 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3534 Op0.getConstantOperandVal(2) == UINT64_C(0))
3535 return Op;
3536
3537 if (Op0.getOpcode() == ISD::AssertZext &&
3538 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3539 return Op;
3540
3541 EVT OpVT = Op0.getValueType();
3542 EVT RetVT = Op.getValueType();
3543 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3544 MakeLibCallOptions CallOptions;
3545 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3546 SDValue Chain = SDValue();
3548 std::tie(Result, Chain) =
3549 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3550 return Result;
3551}
3552
3553SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3554 SelectionDAG &DAG) const {
3555 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3556 !Subtarget.hasBasicD() && "unexpected target features");
3557
3558 SDLoc DL(Op);
3559 SDValue Op0 = Op.getOperand(0);
3560
3561 if ((Op0.getOpcode() == ISD::AssertSext ||
3563 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3564 return Op;
3565
3566 EVT OpVT = Op0.getValueType();
3567 EVT RetVT = Op.getValueType();
3568 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3569 MakeLibCallOptions CallOptions;
3570 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3571 SDValue Chain = SDValue();
3573 std::tie(Result, Chain) =
3574 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3575 return Result;
3576}
3577
3578SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3579 SelectionDAG &DAG) const {
3580
3581 SDLoc DL(Op);
3582 EVT VT = Op.getValueType();
3583 SDValue Op0 = Op.getOperand(0);
3584 EVT Op0VT = Op0.getValueType();
3585
3586 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3587 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3588 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3589 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3590 }
3591 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3592 SDValue Lo, Hi;
3593 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3594 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3595 }
3596 return Op;
3597}
3598
3599SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3600 SelectionDAG &DAG) const {
3601
3602 SDLoc DL(Op);
3603 SDValue Op0 = Op.getOperand(0);
3604
3605 if (Op0.getValueType() == MVT::f16)
3606 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3607
3608 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3609 !Subtarget.hasBasicD()) {
3610 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3611 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3612 }
3613
3614 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3615 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3616 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3617}
3618
3620 SelectionDAG &DAG, unsigned Flags) {
3621 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3622}
3623
3625 SelectionDAG &DAG, unsigned Flags) {
3626 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3627 Flags);
3628}
3629
3631 SelectionDAG &DAG, unsigned Flags) {
3632 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3633 N->getOffset(), Flags);
3634}
3635
3637 SelectionDAG &DAG, unsigned Flags) {
3638 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3639}
3640
3641template <class NodeTy>
3642SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3644 bool IsLocal) const {
3645 SDLoc DL(N);
3646 EVT Ty = getPointerTy(DAG.getDataLayout());
3647 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3648 SDValue Load;
3649
3650 switch (M) {
3651 default:
3652 report_fatal_error("Unsupported code model");
3653
3654 case CodeModel::Large: {
3655 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3656
3657 // This is not actually used, but is necessary for successfully matching
3658 // the PseudoLA_*_LARGE nodes.
3659 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3660 if (IsLocal) {
3661 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3662 // eventually becomes the desired 5-insn code sequence.
3663 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3664 Tmp, Addr),
3665 0);
3666 } else {
3667 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3668 // eventually becomes the desired 5-insn code sequence.
3669 Load = SDValue(
3670 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3671 0);
3672 }
3673 break;
3674 }
3675
3676 case CodeModel::Small:
3677 case CodeModel::Medium:
3678 if (IsLocal) {
3679 // This generates the pattern (PseudoLA_PCREL sym), which
3680 //
3681 // for la32r expands to:
3682 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3683 //
3684 // for la32s and la64 expands to:
3685 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3686 Load = SDValue(
3687 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3688 } else {
3689 // This generates the pattern (PseudoLA_GOT sym), which
3690 //
3691 // for la32r expands to:
3692 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3693 //
3694 // for la32s and la64 expands to:
3695 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3696 Load =
3697 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3698 }
3699 }
3700
3701 if (!IsLocal) {
3702 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3703 MachineFunction &MF = DAG.getMachineFunction();
3704 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3708 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3709 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3710 }
3711
3712 return Load;
3713}
3714
3715SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3716 SelectionDAG &DAG) const {
3717 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3718 DAG.getTarget().getCodeModel());
3719}
3720
3721SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3722 SelectionDAG &DAG) const {
3723 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3724 DAG.getTarget().getCodeModel());
3725}
3726
3727SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3728 SelectionDAG &DAG) const {
3729 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3730 DAG.getTarget().getCodeModel());
3731}
3732
3733SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3734 SelectionDAG &DAG) const {
3735 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3736 assert(N->getOffset() == 0 && "unexpected offset in global node");
3737 auto CM = DAG.getTarget().getCodeModel();
3738 const GlobalValue *GV = N->getGlobal();
3739
3740 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3741 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3742 CM = *GCM;
3743 }
3744
3745 return getAddr(N, DAG, CM, GV->isDSOLocal());
3746}
3747
3748SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3749 SelectionDAG &DAG,
3750 unsigned Opc, bool UseGOT,
3751 bool Large) const {
3752 SDLoc DL(N);
3753 EVT Ty = getPointerTy(DAG.getDataLayout());
3754 MVT GRLenVT = Subtarget.getGRLenVT();
3755
3756 // This is not actually used, but is necessary for successfully matching the
3757 // PseudoLA_*_LARGE nodes.
3758 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3759 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3760
3761 // Only IE needs an extra argument for large code model.
3762 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3763 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3764 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3765
3766 // If it is LE for normal/medium code model, the add tp operation will occur
3767 // during the pseudo-instruction expansion.
3768 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3769 return Offset;
3770
3771 if (UseGOT) {
3772 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3773 MachineFunction &MF = DAG.getMachineFunction();
3774 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3778 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3779 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3780 }
3781
3782 // Add the thread pointer.
3783 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3784 DAG.getRegister(LoongArch::R2, GRLenVT));
3785}
3786
3787SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3788 SelectionDAG &DAG,
3789 unsigned Opc,
3790 bool Large) const {
3791 SDLoc DL(N);
3792 EVT Ty = getPointerTy(DAG.getDataLayout());
3793 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3794
3795 // This is not actually used, but is necessary for successfully matching the
3796 // PseudoLA_*_LARGE nodes.
3797 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3798
3799 // Use a PC-relative addressing mode to access the dynamic GOT address.
3800 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3801 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3802 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3803
3804 // Prepare argument list to generate call.
3806 Args.emplace_back(Load, CallTy);
3807
3808 // Setup call to __tls_get_addr.
3809 TargetLowering::CallLoweringInfo CLI(DAG);
3810 CLI.setDebugLoc(DL)
3811 .setChain(DAG.getEntryNode())
3812 .setLibCallee(CallingConv::C, CallTy,
3813 DAG.getExternalSymbol("__tls_get_addr", Ty),
3814 std::move(Args));
3815
3816 return LowerCallTo(CLI).first;
3817}
3818
3819SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3820 SelectionDAG &DAG, unsigned Opc,
3821 bool Large) const {
3822 SDLoc DL(N);
3823 EVT Ty = getPointerTy(DAG.getDataLayout());
3824 const GlobalValue *GV = N->getGlobal();
3825
3826 // This is not actually used, but is necessary for successfully matching the
3827 // PseudoLA_*_LARGE nodes.
3828 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3829
3830 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3831 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3832 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3833 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3834 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3835}
3836
3837SDValue
3838LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3839 SelectionDAG &DAG) const {
3842 report_fatal_error("In GHC calling convention TLS is not supported");
3843
3844 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3845 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3846
3847 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3848 assert(N->getOffset() == 0 && "unexpected offset in global node");
3849
3850 if (DAG.getTarget().useEmulatedTLS())
3851 reportFatalUsageError("the emulated TLS is prohibited");
3852
3853 bool IsDesc = DAG.getTarget().useTLSDESC();
3854
3855 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3857 // In this model, application code calls the dynamic linker function
3858 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3859 // runtime.
3860 if (!IsDesc)
3861 return getDynamicTLSAddr(N, DAG,
3862 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3863 : LoongArch::PseudoLA_TLS_GD,
3864 Large);
3865 break;
3867 // Same as GeneralDynamic, except for assembly modifiers and relocation
3868 // records.
3869 if (!IsDesc)
3870 return getDynamicTLSAddr(N, DAG,
3871 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3872 : LoongArch::PseudoLA_TLS_LD,
3873 Large);
3874 break;
3876 // This model uses the GOT to resolve TLS offsets.
3877 return getStaticTLSAddr(N, DAG,
3878 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3879 : LoongArch::PseudoLA_TLS_IE,
3880 /*UseGOT=*/true, Large);
3882 // This model is used when static linking as the TLS offsets are resolved
3883 // during program linking.
3884 //
3885 // This node doesn't need an extra argument for the large code model.
3886 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3887 /*UseGOT=*/false, Large);
3888 }
3889
3890 return getTLSDescAddr(N, DAG,
3891 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3892 : LoongArch::PseudoLA_TLS_DESC,
3893 Large);
3894}
3895
3896template <unsigned N>
3898 SelectionDAG &DAG, bool IsSigned = false) {
3899 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3900 // Check the ImmArg.
3901 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3902 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3903 DAG.getContext()->emitError(Op->getOperationName(0) +
3904 ": argument out of range.");
3905 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3906 }
3907 return SDValue();
3908}
3909
3910SDValue
3911LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3912 SelectionDAG &DAG) const {
3913 switch (Op.getConstantOperandVal(0)) {
3914 default:
3915 return SDValue(); // Don't custom lower most intrinsics.
3916 case Intrinsic::thread_pointer: {
3917 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3918 return DAG.getRegister(LoongArch::R2, PtrVT);
3919 }
3920 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3921 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3922 case Intrinsic::loongarch_lsx_vreplvei_d:
3923 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3924 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3925 case Intrinsic::loongarch_lsx_vreplvei_w:
3926 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3927 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3928 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3929 case Intrinsic::loongarch_lasx_xvpickve_d:
3930 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3931 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3932 case Intrinsic::loongarch_lasx_xvinsve0_d:
3933 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3934 case Intrinsic::loongarch_lsx_vsat_b:
3935 case Intrinsic::loongarch_lsx_vsat_bu:
3936 case Intrinsic::loongarch_lsx_vrotri_b:
3937 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3938 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3939 case Intrinsic::loongarch_lsx_vsrlri_b:
3940 case Intrinsic::loongarch_lsx_vsrari_b:
3941 case Intrinsic::loongarch_lsx_vreplvei_h:
3942 case Intrinsic::loongarch_lasx_xvsat_b:
3943 case Intrinsic::loongarch_lasx_xvsat_bu:
3944 case Intrinsic::loongarch_lasx_xvrotri_b:
3945 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3946 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3947 case Intrinsic::loongarch_lasx_xvsrlri_b:
3948 case Intrinsic::loongarch_lasx_xvsrari_b:
3949 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3950 case Intrinsic::loongarch_lasx_xvpickve_w:
3951 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3952 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3953 case Intrinsic::loongarch_lasx_xvinsve0_w:
3954 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3955 case Intrinsic::loongarch_lsx_vsat_h:
3956 case Intrinsic::loongarch_lsx_vsat_hu:
3957 case Intrinsic::loongarch_lsx_vrotri_h:
3958 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3959 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3960 case Intrinsic::loongarch_lsx_vsrlri_h:
3961 case Intrinsic::loongarch_lsx_vsrari_h:
3962 case Intrinsic::loongarch_lsx_vreplvei_b:
3963 case Intrinsic::loongarch_lasx_xvsat_h:
3964 case Intrinsic::loongarch_lasx_xvsat_hu:
3965 case Intrinsic::loongarch_lasx_xvrotri_h:
3966 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3967 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3968 case Intrinsic::loongarch_lasx_xvsrlri_h:
3969 case Intrinsic::loongarch_lasx_xvsrari_h:
3970 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3971 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3972 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3973 case Intrinsic::loongarch_lsx_vsrani_b_h:
3974 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3975 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3976 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3977 case Intrinsic::loongarch_lsx_vssrani_b_h:
3978 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3979 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3980 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3981 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3982 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3983 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3984 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3985 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3986 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3987 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3988 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3989 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3990 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3991 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3992 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3993 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3994 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3995 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3996 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3997 case Intrinsic::loongarch_lsx_vsat_w:
3998 case Intrinsic::loongarch_lsx_vsat_wu:
3999 case Intrinsic::loongarch_lsx_vrotri_w:
4000 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4001 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4002 case Intrinsic::loongarch_lsx_vsrlri_w:
4003 case Intrinsic::loongarch_lsx_vsrari_w:
4004 case Intrinsic::loongarch_lsx_vslei_bu:
4005 case Intrinsic::loongarch_lsx_vslei_hu:
4006 case Intrinsic::loongarch_lsx_vslei_wu:
4007 case Intrinsic::loongarch_lsx_vslei_du:
4008 case Intrinsic::loongarch_lsx_vslti_bu:
4009 case Intrinsic::loongarch_lsx_vslti_hu:
4010 case Intrinsic::loongarch_lsx_vslti_wu:
4011 case Intrinsic::loongarch_lsx_vslti_du:
4012 case Intrinsic::loongarch_lsx_vbsll_v:
4013 case Intrinsic::loongarch_lsx_vbsrl_v:
4014 case Intrinsic::loongarch_lasx_xvsat_w:
4015 case Intrinsic::loongarch_lasx_xvsat_wu:
4016 case Intrinsic::loongarch_lasx_xvrotri_w:
4017 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4018 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4019 case Intrinsic::loongarch_lasx_xvsrlri_w:
4020 case Intrinsic::loongarch_lasx_xvsrari_w:
4021 case Intrinsic::loongarch_lasx_xvslei_bu:
4022 case Intrinsic::loongarch_lasx_xvslei_hu:
4023 case Intrinsic::loongarch_lasx_xvslei_wu:
4024 case Intrinsic::loongarch_lasx_xvslei_du:
4025 case Intrinsic::loongarch_lasx_xvslti_bu:
4026 case Intrinsic::loongarch_lasx_xvslti_hu:
4027 case Intrinsic::loongarch_lasx_xvslti_wu:
4028 case Intrinsic::loongarch_lasx_xvslti_du:
4029 case Intrinsic::loongarch_lasx_xvbsll_v:
4030 case Intrinsic::loongarch_lasx_xvbsrl_v:
4031 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4032 case Intrinsic::loongarch_lsx_vseqi_b:
4033 case Intrinsic::loongarch_lsx_vseqi_h:
4034 case Intrinsic::loongarch_lsx_vseqi_w:
4035 case Intrinsic::loongarch_lsx_vseqi_d:
4036 case Intrinsic::loongarch_lsx_vslei_b:
4037 case Intrinsic::loongarch_lsx_vslei_h:
4038 case Intrinsic::loongarch_lsx_vslei_w:
4039 case Intrinsic::loongarch_lsx_vslei_d:
4040 case Intrinsic::loongarch_lsx_vslti_b:
4041 case Intrinsic::loongarch_lsx_vslti_h:
4042 case Intrinsic::loongarch_lsx_vslti_w:
4043 case Intrinsic::loongarch_lsx_vslti_d:
4044 case Intrinsic::loongarch_lasx_xvseqi_b:
4045 case Intrinsic::loongarch_lasx_xvseqi_h:
4046 case Intrinsic::loongarch_lasx_xvseqi_w:
4047 case Intrinsic::loongarch_lasx_xvseqi_d:
4048 case Intrinsic::loongarch_lasx_xvslei_b:
4049 case Intrinsic::loongarch_lasx_xvslei_h:
4050 case Intrinsic::loongarch_lasx_xvslei_w:
4051 case Intrinsic::loongarch_lasx_xvslei_d:
4052 case Intrinsic::loongarch_lasx_xvslti_b:
4053 case Intrinsic::loongarch_lasx_xvslti_h:
4054 case Intrinsic::loongarch_lasx_xvslti_w:
4055 case Intrinsic::loongarch_lasx_xvslti_d:
4056 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4057 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4058 case Intrinsic::loongarch_lsx_vsrani_h_w:
4059 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4060 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4061 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4062 case Intrinsic::loongarch_lsx_vssrani_h_w:
4063 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4064 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4065 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4066 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4067 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4068 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4069 case Intrinsic::loongarch_lsx_vfrstpi_b:
4070 case Intrinsic::loongarch_lsx_vfrstpi_h:
4071 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4072 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4073 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4074 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4075 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4076 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4077 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4078 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4079 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4080 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4081 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4082 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4083 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4084 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4085 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4086 case Intrinsic::loongarch_lsx_vsat_d:
4087 case Intrinsic::loongarch_lsx_vsat_du:
4088 case Intrinsic::loongarch_lsx_vrotri_d:
4089 case Intrinsic::loongarch_lsx_vsrlri_d:
4090 case Intrinsic::loongarch_lsx_vsrari_d:
4091 case Intrinsic::loongarch_lasx_xvsat_d:
4092 case Intrinsic::loongarch_lasx_xvsat_du:
4093 case Intrinsic::loongarch_lasx_xvrotri_d:
4094 case Intrinsic::loongarch_lasx_xvsrlri_d:
4095 case Intrinsic::loongarch_lasx_xvsrari_d:
4096 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4097 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4098 case Intrinsic::loongarch_lsx_vsrani_w_d:
4099 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4100 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4101 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4102 case Intrinsic::loongarch_lsx_vssrani_w_d:
4103 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4104 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4105 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4106 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4107 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4108 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4109 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4110 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4111 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4112 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4113 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4114 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4115 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4116 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4117 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4118 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4119 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4120 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4121 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4122 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4123 case Intrinsic::loongarch_lsx_vsrani_d_q:
4124 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4125 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4126 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4127 case Intrinsic::loongarch_lsx_vssrani_d_q:
4128 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4129 case Intrinsic::loongarch_lsx_vssrani_du_q:
4130 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4131 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4132 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4133 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4134 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4135 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4136 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4137 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4138 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4139 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4140 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4141 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4142 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4143 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4144 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4145 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4146 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4147 case Intrinsic::loongarch_lsx_vnori_b:
4148 case Intrinsic::loongarch_lsx_vshuf4i_b:
4149 case Intrinsic::loongarch_lsx_vshuf4i_h:
4150 case Intrinsic::loongarch_lsx_vshuf4i_w:
4151 case Intrinsic::loongarch_lasx_xvnori_b:
4152 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4153 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4154 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4155 case Intrinsic::loongarch_lasx_xvpermi_d:
4156 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4157 case Intrinsic::loongarch_lsx_vshuf4i_d:
4158 case Intrinsic::loongarch_lsx_vpermi_w:
4159 case Intrinsic::loongarch_lsx_vbitseli_b:
4160 case Intrinsic::loongarch_lsx_vextrins_b:
4161 case Intrinsic::loongarch_lsx_vextrins_h:
4162 case Intrinsic::loongarch_lsx_vextrins_w:
4163 case Intrinsic::loongarch_lsx_vextrins_d:
4164 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4165 case Intrinsic::loongarch_lasx_xvpermi_w:
4166 case Intrinsic::loongarch_lasx_xvpermi_q:
4167 case Intrinsic::loongarch_lasx_xvbitseli_b:
4168 case Intrinsic::loongarch_lasx_xvextrins_b:
4169 case Intrinsic::loongarch_lasx_xvextrins_h:
4170 case Intrinsic::loongarch_lasx_xvextrins_w:
4171 case Intrinsic::loongarch_lasx_xvextrins_d:
4172 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4173 case Intrinsic::loongarch_lsx_vrepli_b:
4174 case Intrinsic::loongarch_lsx_vrepli_h:
4175 case Intrinsic::loongarch_lsx_vrepli_w:
4176 case Intrinsic::loongarch_lsx_vrepli_d:
4177 case Intrinsic::loongarch_lasx_xvrepli_b:
4178 case Intrinsic::loongarch_lasx_xvrepli_h:
4179 case Intrinsic::loongarch_lasx_xvrepli_w:
4180 case Intrinsic::loongarch_lasx_xvrepli_d:
4181 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4182 case Intrinsic::loongarch_lsx_vldi:
4183 case Intrinsic::loongarch_lasx_xvldi:
4184 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4185 }
4186}
4187
4188// Helper function that emits error message for intrinsics with chain and return
4189// merge values of a UNDEF and the chain.
4191 StringRef ErrorMsg,
4192 SelectionDAG &DAG) {
4193 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4194 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4195 SDLoc(Op));
4196}
4197
4198SDValue
4199LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4200 SelectionDAG &DAG) const {
4201 SDLoc DL(Op);
4202 MVT GRLenVT = Subtarget.getGRLenVT();
4203 EVT VT = Op.getValueType();
4204 SDValue Chain = Op.getOperand(0);
4205 const StringRef ErrorMsgOOR = "argument out of range";
4206 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4207 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4208
4209 switch (Op.getConstantOperandVal(1)) {
4210 default:
4211 return Op;
4212 case Intrinsic::loongarch_crc_w_b_w:
4213 case Intrinsic::loongarch_crc_w_h_w:
4214 case Intrinsic::loongarch_crc_w_w_w:
4215 case Intrinsic::loongarch_crc_w_d_w:
4216 case Intrinsic::loongarch_crcc_w_b_w:
4217 case Intrinsic::loongarch_crcc_w_h_w:
4218 case Intrinsic::loongarch_crcc_w_w_w:
4219 case Intrinsic::loongarch_crcc_w_d_w:
4220 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4221 case Intrinsic::loongarch_csrrd_w:
4222 case Intrinsic::loongarch_csrrd_d: {
4223 unsigned Imm = Op.getConstantOperandVal(2);
4224 return !isUInt<14>(Imm)
4225 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4226 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4227 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4228 }
4229 case Intrinsic::loongarch_csrwr_w:
4230 case Intrinsic::loongarch_csrwr_d: {
4231 unsigned Imm = Op.getConstantOperandVal(3);
4232 return !isUInt<14>(Imm)
4233 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4234 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4235 {Chain, Op.getOperand(2),
4236 DAG.getConstant(Imm, DL, GRLenVT)});
4237 }
4238 case Intrinsic::loongarch_csrxchg_w:
4239 case Intrinsic::loongarch_csrxchg_d: {
4240 unsigned Imm = Op.getConstantOperandVal(4);
4241 return !isUInt<14>(Imm)
4242 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4243 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4244 {Chain, Op.getOperand(2), Op.getOperand(3),
4245 DAG.getConstant(Imm, DL, GRLenVT)});
4246 }
4247 case Intrinsic::loongarch_iocsrrd_d: {
4248 return DAG.getNode(
4249 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4250 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4251 }
4252#define IOCSRRD_CASE(NAME, NODE) \
4253 case Intrinsic::loongarch_##NAME: { \
4254 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4255 {Chain, Op.getOperand(2)}); \
4256 }
4257 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4258 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4259 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4260#undef IOCSRRD_CASE
4261 case Intrinsic::loongarch_cpucfg: {
4262 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4263 {Chain, Op.getOperand(2)});
4264 }
4265 case Intrinsic::loongarch_lddir_d: {
4266 unsigned Imm = Op.getConstantOperandVal(3);
4267 return !isUInt<8>(Imm)
4268 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4269 : Op;
4270 }
4271 case Intrinsic::loongarch_movfcsr2gr: {
4272 if (!Subtarget.hasBasicF())
4273 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4274 unsigned Imm = Op.getConstantOperandVal(2);
4275 return !isUInt<2>(Imm)
4276 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4277 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4278 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4279 }
4280 case Intrinsic::loongarch_lsx_vld:
4281 case Intrinsic::loongarch_lsx_vldrepl_b:
4282 case Intrinsic::loongarch_lasx_xvld:
4283 case Intrinsic::loongarch_lasx_xvldrepl_b:
4284 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4285 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4286 : SDValue();
4287 case Intrinsic::loongarch_lsx_vldrepl_h:
4288 case Intrinsic::loongarch_lasx_xvldrepl_h:
4289 return !isShiftedInt<11, 1>(
4290 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4292 Op, "argument out of range or not a multiple of 2", DAG)
4293 : SDValue();
4294 case Intrinsic::loongarch_lsx_vldrepl_w:
4295 case Intrinsic::loongarch_lasx_xvldrepl_w:
4296 return !isShiftedInt<10, 2>(
4297 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4299 Op, "argument out of range or not a multiple of 4", DAG)
4300 : SDValue();
4301 case Intrinsic::loongarch_lsx_vldrepl_d:
4302 case Intrinsic::loongarch_lasx_xvldrepl_d:
4303 return !isShiftedInt<9, 3>(
4304 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4306 Op, "argument out of range or not a multiple of 8", DAG)
4307 : SDValue();
4308 }
4309}
4310
4311// Helper function that emits error message for intrinsics with void return
4312// value and return the chain.
4314 SelectionDAG &DAG) {
4315
4316 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4317 return Op.getOperand(0);
4318}
4319
4320SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4321 SelectionDAG &DAG) const {
4322 SDLoc DL(Op);
4323 MVT GRLenVT = Subtarget.getGRLenVT();
4324 SDValue Chain = Op.getOperand(0);
4325 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4326 SDValue Op2 = Op.getOperand(2);
4327 const StringRef ErrorMsgOOR = "argument out of range";
4328 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4329 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4330 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4331
4332 switch (IntrinsicEnum) {
4333 default:
4334 // TODO: Add more Intrinsics.
4335 return SDValue();
4336 case Intrinsic::loongarch_cacop_d:
4337 case Intrinsic::loongarch_cacop_w: {
4338 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4339 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4340 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4341 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4342 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4343 unsigned Imm1 = Op2->getAsZExtVal();
4344 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4345 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4346 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4347 return Op;
4348 }
4349 case Intrinsic::loongarch_dbar: {
4350 unsigned Imm = Op2->getAsZExtVal();
4351 return !isUInt<15>(Imm)
4352 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4353 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4354 DAG.getConstant(Imm, DL, GRLenVT));
4355 }
4356 case Intrinsic::loongarch_ibar: {
4357 unsigned Imm = Op2->getAsZExtVal();
4358 return !isUInt<15>(Imm)
4359 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4360 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4361 DAG.getConstant(Imm, DL, GRLenVT));
4362 }
4363 case Intrinsic::loongarch_break: {
4364 unsigned Imm = Op2->getAsZExtVal();
4365 return !isUInt<15>(Imm)
4366 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4367 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4368 DAG.getConstant(Imm, DL, GRLenVT));
4369 }
4370 case Intrinsic::loongarch_movgr2fcsr: {
4371 if (!Subtarget.hasBasicF())
4372 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4373 unsigned Imm = Op2->getAsZExtVal();
4374 return !isUInt<2>(Imm)
4375 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4376 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4377 DAG.getConstant(Imm, DL, GRLenVT),
4378 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4379 Op.getOperand(3)));
4380 }
4381 case Intrinsic::loongarch_syscall: {
4382 unsigned Imm = Op2->getAsZExtVal();
4383 return !isUInt<15>(Imm)
4384 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4385 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4386 DAG.getConstant(Imm, DL, GRLenVT));
4387 }
4388#define IOCSRWR_CASE(NAME, NODE) \
4389 case Intrinsic::loongarch_##NAME: { \
4390 SDValue Op3 = Op.getOperand(3); \
4391 return Subtarget.is64Bit() \
4392 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4393 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4394 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4395 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4396 Op3); \
4397 }
4398 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4399 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4400 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4401#undef IOCSRWR_CASE
4402 case Intrinsic::loongarch_iocsrwr_d: {
4403 return !Subtarget.is64Bit()
4404 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4405 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4406 Op2,
4407 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4408 Op.getOperand(3)));
4409 }
4410#define ASRT_LE_GT_CASE(NAME) \
4411 case Intrinsic::loongarch_##NAME: { \
4412 return !Subtarget.is64Bit() \
4413 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4414 : Op; \
4415 }
4416 ASRT_LE_GT_CASE(asrtle_d)
4417 ASRT_LE_GT_CASE(asrtgt_d)
4418#undef ASRT_LE_GT_CASE
4419 case Intrinsic::loongarch_ldpte_d: {
4420 unsigned Imm = Op.getConstantOperandVal(3);
4421 return !Subtarget.is64Bit()
4422 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4423 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4424 : Op;
4425 }
4426 case Intrinsic::loongarch_lsx_vst:
4427 case Intrinsic::loongarch_lasx_xvst:
4428 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4429 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4430 : SDValue();
4431 case Intrinsic::loongarch_lasx_xvstelm_b:
4432 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4433 !isUInt<5>(Op.getConstantOperandVal(5)))
4434 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4435 : SDValue();
4436 case Intrinsic::loongarch_lsx_vstelm_b:
4437 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4438 !isUInt<4>(Op.getConstantOperandVal(5)))
4439 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4440 : SDValue();
4441 case Intrinsic::loongarch_lasx_xvstelm_h:
4442 return (!isShiftedInt<8, 1>(
4443 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4444 !isUInt<4>(Op.getConstantOperandVal(5)))
4446 Op, "argument out of range or not a multiple of 2", DAG)
4447 : SDValue();
4448 case Intrinsic::loongarch_lsx_vstelm_h:
4449 return (!isShiftedInt<8, 1>(
4450 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4451 !isUInt<3>(Op.getConstantOperandVal(5)))
4453 Op, "argument out of range or not a multiple of 2", DAG)
4454 : SDValue();
4455 case Intrinsic::loongarch_lasx_xvstelm_w:
4456 return (!isShiftedInt<8, 2>(
4457 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4458 !isUInt<3>(Op.getConstantOperandVal(5)))
4460 Op, "argument out of range or not a multiple of 4", DAG)
4461 : SDValue();
4462 case Intrinsic::loongarch_lsx_vstelm_w:
4463 return (!isShiftedInt<8, 2>(
4464 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4465 !isUInt<2>(Op.getConstantOperandVal(5)))
4467 Op, "argument out of range or not a multiple of 4", DAG)
4468 : SDValue();
4469 case Intrinsic::loongarch_lasx_xvstelm_d:
4470 return (!isShiftedInt<8, 3>(
4471 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4472 !isUInt<2>(Op.getConstantOperandVal(5)))
4474 Op, "argument out of range or not a multiple of 8", DAG)
4475 : SDValue();
4476 case Intrinsic::loongarch_lsx_vstelm_d:
4477 return (!isShiftedInt<8, 3>(
4478 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4479 !isUInt<1>(Op.getConstantOperandVal(5)))
4481 Op, "argument out of range or not a multiple of 8", DAG)
4482 : SDValue();
4483 }
4484}
4485
4486SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4487 SelectionDAG &DAG) const {
4488 SDLoc DL(Op);
4489 SDValue Lo = Op.getOperand(0);
4490 SDValue Hi = Op.getOperand(1);
4491 SDValue Shamt = Op.getOperand(2);
4492 EVT VT = Lo.getValueType();
4493
4494 // if Shamt-GRLen < 0: // Shamt < GRLen
4495 // Lo = Lo << Shamt
4496 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4497 // else:
4498 // Lo = 0
4499 // Hi = Lo << (Shamt-GRLen)
4500
4501 SDValue Zero = DAG.getConstant(0, DL, VT);
4502 SDValue One = DAG.getConstant(1, DL, VT);
4503 SDValue MinusGRLen =
4504 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4505 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4506 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4507 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4508
4509 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4510 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4511 SDValue ShiftRightLo =
4512 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4513 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4514 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4515 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4516
4517 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4518
4519 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4520 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4521
4522 SDValue Parts[2] = {Lo, Hi};
4523 return DAG.getMergeValues(Parts, DL);
4524}
4525
4526SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4527 SelectionDAG &DAG,
4528 bool IsSRA) const {
4529 SDLoc DL(Op);
4530 SDValue Lo = Op.getOperand(0);
4531 SDValue Hi = Op.getOperand(1);
4532 SDValue Shamt = Op.getOperand(2);
4533 EVT VT = Lo.getValueType();
4534
4535 // SRA expansion:
4536 // if Shamt-GRLen < 0: // Shamt < GRLen
4537 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4538 // Hi = Hi >>s Shamt
4539 // else:
4540 // Lo = Hi >>s (Shamt-GRLen);
4541 // Hi = Hi >>s (GRLen-1)
4542 //
4543 // SRL expansion:
4544 // if Shamt-GRLen < 0: // Shamt < GRLen
4545 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4546 // Hi = Hi >>u Shamt
4547 // else:
4548 // Lo = Hi >>u (Shamt-GRLen);
4549 // Hi = 0;
4550
4551 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4552
4553 SDValue Zero = DAG.getConstant(0, DL, VT);
4554 SDValue One = DAG.getConstant(1, DL, VT);
4555 SDValue MinusGRLen =
4556 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4557 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4558 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4559 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4560
4561 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4562 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4563 SDValue ShiftLeftHi =
4564 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4565 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4566 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4567 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4568 SDValue HiFalse =
4569 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4570
4571 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4572
4573 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4574 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4575
4576 SDValue Parts[2] = {Lo, Hi};
4577 return DAG.getMergeValues(Parts, DL);
4578}
4579
4580// Returns the opcode of the target-specific SDNode that implements the 32-bit
4581// form of the given Opcode.
4582static unsigned getLoongArchWOpcode(unsigned Opcode) {
4583 switch (Opcode) {
4584 default:
4585 llvm_unreachable("Unexpected opcode");
4586 case ISD::SDIV:
4587 return LoongArchISD::DIV_W;
4588 case ISD::UDIV:
4589 return LoongArchISD::DIV_WU;
4590 case ISD::SREM:
4591 return LoongArchISD::MOD_W;
4592 case ISD::UREM:
4593 return LoongArchISD::MOD_WU;
4594 case ISD::SHL:
4595 return LoongArchISD::SLL_W;
4596 case ISD::SRA:
4597 return LoongArchISD::SRA_W;
4598 case ISD::SRL:
4599 return LoongArchISD::SRL_W;
4600 case ISD::ROTL:
4601 case ISD::ROTR:
4602 return LoongArchISD::ROTR_W;
4603 case ISD::CTTZ:
4604 return LoongArchISD::CTZ_W;
4605 case ISD::CTLZ:
4606 return LoongArchISD::CLZ_W;
4607 }
4608}
4609
4610// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4611// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4612// otherwise be promoted to i64, making it difficult to select the
4613// SLL_W/.../*W later one because the fact the operation was originally of
4614// type i8/i16/i32 is lost.
4616 unsigned ExtOpc = ISD::ANY_EXTEND) {
4617 SDLoc DL(N);
4618 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4619 SDValue NewOp0, NewRes;
4620
4621 switch (NumOp) {
4622 default:
4623 llvm_unreachable("Unexpected NumOp");
4624 case 1: {
4625 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4626 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4627 break;
4628 }
4629 case 2: {
4630 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4631 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4632 if (N->getOpcode() == ISD::ROTL) {
4633 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4634 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4635 }
4636 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4637 break;
4638 }
4639 // TODO:Handle more NumOp.
4640 }
4641
4642 // ReplaceNodeResults requires we maintain the same type for the return
4643 // value.
4644 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4645}
4646
4647// Converts the given 32-bit operation to a i64 operation with signed extension
4648// semantic to reduce the signed extension instructions.
4650 SDLoc DL(N);
4651 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4652 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4653 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4654 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4655 DAG.getValueType(MVT::i32));
4656 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4657}
4658
4659// Helper function that emits error message for intrinsics with/without chain
4660// and return a UNDEF or and the chain as the results.
4663 StringRef ErrorMsg, bool WithChain = true) {
4664 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4665 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4666 if (!WithChain)
4667 return;
4668 Results.push_back(N->getOperand(0));
4669}
4670
4671template <unsigned N>
4672static void
4674 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4675 unsigned ResOp) {
4676 const StringRef ErrorMsgOOR = "argument out of range";
4677 unsigned Imm = Node->getConstantOperandVal(2);
4678 if (!isUInt<N>(Imm)) {
4680 /*WithChain=*/false);
4681 return;
4682 }
4683 SDLoc DL(Node);
4684 SDValue Vec = Node->getOperand(1);
4685
4686 SDValue PickElt =
4687 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4688 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4690 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4691 PickElt.getValue(0)));
4692}
4693
4696 SelectionDAG &DAG,
4697 const LoongArchSubtarget &Subtarget,
4698 unsigned ResOp) {
4699 SDLoc DL(N);
4700 SDValue Vec = N->getOperand(1);
4701
4702 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4703 Results.push_back(
4704 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4705}
4706
4707static void
4709 SelectionDAG &DAG,
4710 const LoongArchSubtarget &Subtarget) {
4711 switch (N->getConstantOperandVal(0)) {
4712 default:
4713 llvm_unreachable("Unexpected Intrinsic.");
4714 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4715 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4716 LoongArchISD::VPICK_SEXT_ELT);
4717 break;
4718 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4719 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4720 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4721 LoongArchISD::VPICK_SEXT_ELT);
4722 break;
4723 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4724 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4725 LoongArchISD::VPICK_SEXT_ELT);
4726 break;
4727 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4728 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4729 LoongArchISD::VPICK_ZEXT_ELT);
4730 break;
4731 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4732 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4733 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4734 LoongArchISD::VPICK_ZEXT_ELT);
4735 break;
4736 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4737 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4738 LoongArchISD::VPICK_ZEXT_ELT);
4739 break;
4740 case Intrinsic::loongarch_lsx_bz_b:
4741 case Intrinsic::loongarch_lsx_bz_h:
4742 case Intrinsic::loongarch_lsx_bz_w:
4743 case Intrinsic::loongarch_lsx_bz_d:
4744 case Intrinsic::loongarch_lasx_xbz_b:
4745 case Intrinsic::loongarch_lasx_xbz_h:
4746 case Intrinsic::loongarch_lasx_xbz_w:
4747 case Intrinsic::loongarch_lasx_xbz_d:
4748 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4749 LoongArchISD::VALL_ZERO);
4750 break;
4751 case Intrinsic::loongarch_lsx_bz_v:
4752 case Intrinsic::loongarch_lasx_xbz_v:
4753 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4754 LoongArchISD::VANY_ZERO);
4755 break;
4756 case Intrinsic::loongarch_lsx_bnz_b:
4757 case Intrinsic::loongarch_lsx_bnz_h:
4758 case Intrinsic::loongarch_lsx_bnz_w:
4759 case Intrinsic::loongarch_lsx_bnz_d:
4760 case Intrinsic::loongarch_lasx_xbnz_b:
4761 case Intrinsic::loongarch_lasx_xbnz_h:
4762 case Intrinsic::loongarch_lasx_xbnz_w:
4763 case Intrinsic::loongarch_lasx_xbnz_d:
4764 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4765 LoongArchISD::VALL_NONZERO);
4766 break;
4767 case Intrinsic::loongarch_lsx_bnz_v:
4768 case Intrinsic::loongarch_lasx_xbnz_v:
4769 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4770 LoongArchISD::VANY_NONZERO);
4771 break;
4772 }
4773}
4774
4777 SelectionDAG &DAG) {
4778 assert(N->getValueType(0) == MVT::i128 &&
4779 "AtomicCmpSwap on types less than 128 should be legal");
4780 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4781
4782 unsigned Opcode;
4783 switch (MemOp->getMergedOrdering()) {
4787 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4788 break;
4791 Opcode = LoongArch::PseudoCmpXchg128;
4792 break;
4793 default:
4794 llvm_unreachable("Unexpected ordering!");
4795 }
4796
4797 SDLoc DL(N);
4798 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4799 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4800 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4801 NewVal.first, NewVal.second, N->getOperand(0)};
4802
4803 SDNode *CmpSwap = DAG.getMachineNode(
4804 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4805 Ops);
4806 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4807 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4808 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4809 Results.push_back(SDValue(CmpSwap, 3));
4810}
4811
4814 SDLoc DL(N);
4815 EVT VT = N->getValueType(0);
4816 switch (N->getOpcode()) {
4817 default:
4818 llvm_unreachable("Don't know how to legalize this operation");
4819 case ISD::ADD:
4820 case ISD::SUB:
4821 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4822 "Unexpected custom legalisation");
4823 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4824 break;
4825 case ISD::SDIV:
4826 case ISD::UDIV:
4827 case ISD::SREM:
4828 case ISD::UREM:
4829 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4830 "Unexpected custom legalisation");
4831 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4832 Subtarget.hasDiv32() && VT == MVT::i32
4834 : ISD::SIGN_EXTEND));
4835 break;
4836 case ISD::SHL:
4837 case ISD::SRA:
4838 case ISD::SRL:
4839 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4840 "Unexpected custom legalisation");
4841 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4842 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4843 break;
4844 }
4845 break;
4846 case ISD::ROTL:
4847 case ISD::ROTR:
4848 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4849 "Unexpected custom legalisation");
4850 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4851 break;
4852 case ISD::FP_TO_SINT: {
4853 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4854 "Unexpected custom legalisation");
4855 SDValue Src = N->getOperand(0);
4856 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4857 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4859 if (!isTypeLegal(Src.getValueType()))
4860 return;
4861 if (Src.getValueType() == MVT::f16)
4862 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4863 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4864 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4865 return;
4866 }
4867 // If the FP type needs to be softened, emit a library call using the 'si'
4868 // version. If we left it to default legalization we'd end up with 'di'.
4869 RTLIB::Libcall LC;
4870 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4871 MakeLibCallOptions CallOptions;
4872 EVT OpVT = Src.getValueType();
4873 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4874 SDValue Chain = SDValue();
4875 SDValue Result;
4876 std::tie(Result, Chain) =
4877 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4878 Results.push_back(Result);
4879 break;
4880 }
4881 case ISD::BITCAST: {
4882 SDValue Src = N->getOperand(0);
4883 EVT SrcVT = Src.getValueType();
4884 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4885 Subtarget.hasBasicF()) {
4886 SDValue Dst =
4887 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4888 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4889 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4890 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4891 DAG.getVTList(MVT::i32, MVT::i32), Src);
4892 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4893 NewReg.getValue(0), NewReg.getValue(1));
4894 Results.push_back(RetReg);
4895 }
4896 break;
4897 }
4898 case ISD::FP_TO_UINT: {
4899 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4900 "Unexpected custom legalisation");
4901 auto &TLI = DAG.getTargetLoweringInfo();
4902 SDValue Tmp1, Tmp2;
4903 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4904 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4905 break;
4906 }
4907 case ISD::BSWAP: {
4908 SDValue Src = N->getOperand(0);
4909 assert((VT == MVT::i16 || VT == MVT::i32) &&
4910 "Unexpected custom legalization");
4911 MVT GRLenVT = Subtarget.getGRLenVT();
4912 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4913 SDValue Tmp;
4914 switch (VT.getSizeInBits()) {
4915 default:
4916 llvm_unreachable("Unexpected operand width");
4917 case 16:
4918 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4919 break;
4920 case 32:
4921 // Only LA64 will get to here due to the size mismatch between VT and
4922 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4923 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4924 break;
4925 }
4926 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4927 break;
4928 }
4929 case ISD::BITREVERSE: {
4930 SDValue Src = N->getOperand(0);
4931 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4932 "Unexpected custom legalization");
4933 MVT GRLenVT = Subtarget.getGRLenVT();
4934 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4935 SDValue Tmp;
4936 switch (VT.getSizeInBits()) {
4937 default:
4938 llvm_unreachable("Unexpected operand width");
4939 case 8:
4940 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4941 break;
4942 case 32:
4943 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4944 break;
4945 }
4946 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4947 break;
4948 }
4949 case ISD::CTLZ:
4950 case ISD::CTTZ: {
4951 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4952 "Unexpected custom legalisation");
4953 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4954 break;
4955 }
4957 SDValue Chain = N->getOperand(0);
4958 SDValue Op2 = N->getOperand(2);
4959 MVT GRLenVT = Subtarget.getGRLenVT();
4960 const StringRef ErrorMsgOOR = "argument out of range";
4961 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4962 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4963
4964 switch (N->getConstantOperandVal(1)) {
4965 default:
4966 llvm_unreachable("Unexpected Intrinsic.");
4967 case Intrinsic::loongarch_movfcsr2gr: {
4968 if (!Subtarget.hasBasicF()) {
4969 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4970 return;
4971 }
4972 unsigned Imm = Op2->getAsZExtVal();
4973 if (!isUInt<2>(Imm)) {
4974 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4975 return;
4976 }
4977 SDValue MOVFCSR2GRResults = DAG.getNode(
4978 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4979 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4980 Results.push_back(
4981 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4982 Results.push_back(MOVFCSR2GRResults.getValue(1));
4983 break;
4984 }
4985#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4986 case Intrinsic::loongarch_##NAME: { \
4987 SDValue NODE = DAG.getNode( \
4988 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4989 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4990 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4991 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4992 Results.push_back(NODE.getValue(1)); \
4993 break; \
4994 }
4995 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4996 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4997 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4998 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4999 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5000 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5001#undef CRC_CASE_EXT_BINARYOP
5002
5003#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5004 case Intrinsic::loongarch_##NAME: { \
5005 SDValue NODE = DAG.getNode( \
5006 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5007 {Chain, Op2, \
5008 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5009 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5010 Results.push_back(NODE.getValue(1)); \
5011 break; \
5012 }
5013 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5014 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5015#undef CRC_CASE_EXT_UNARYOP
5016#define CSR_CASE(ID) \
5017 case Intrinsic::loongarch_##ID: { \
5018 if (!Subtarget.is64Bit()) \
5019 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5020 break; \
5021 }
5022 CSR_CASE(csrrd_d);
5023 CSR_CASE(csrwr_d);
5024 CSR_CASE(csrxchg_d);
5025 CSR_CASE(iocsrrd_d);
5026#undef CSR_CASE
5027 case Intrinsic::loongarch_csrrd_w: {
5028 unsigned Imm = Op2->getAsZExtVal();
5029 if (!isUInt<14>(Imm)) {
5030 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5031 return;
5032 }
5033 SDValue CSRRDResults =
5034 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5035 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5036 Results.push_back(
5037 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5038 Results.push_back(CSRRDResults.getValue(1));
5039 break;
5040 }
5041 case Intrinsic::loongarch_csrwr_w: {
5042 unsigned Imm = N->getConstantOperandVal(3);
5043 if (!isUInt<14>(Imm)) {
5044 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5045 return;
5046 }
5047 SDValue CSRWRResults =
5048 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5049 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5050 DAG.getConstant(Imm, DL, GRLenVT)});
5051 Results.push_back(
5052 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5053 Results.push_back(CSRWRResults.getValue(1));
5054 break;
5055 }
5056 case Intrinsic::loongarch_csrxchg_w: {
5057 unsigned Imm = N->getConstantOperandVal(4);
5058 if (!isUInt<14>(Imm)) {
5059 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5060 return;
5061 }
5062 SDValue CSRXCHGResults = DAG.getNode(
5063 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5064 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5065 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5066 DAG.getConstant(Imm, DL, GRLenVT)});
5067 Results.push_back(
5068 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5069 Results.push_back(CSRXCHGResults.getValue(1));
5070 break;
5071 }
5072#define IOCSRRD_CASE(NAME, NODE) \
5073 case Intrinsic::loongarch_##NAME: { \
5074 SDValue IOCSRRDResults = \
5075 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5076 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5077 Results.push_back( \
5078 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5079 Results.push_back(IOCSRRDResults.getValue(1)); \
5080 break; \
5081 }
5082 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5083 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5084 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5085#undef IOCSRRD_CASE
5086 case Intrinsic::loongarch_cpucfg: {
5087 SDValue CPUCFGResults =
5088 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5089 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5090 Results.push_back(
5091 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5092 Results.push_back(CPUCFGResults.getValue(1));
5093 break;
5094 }
5095 case Intrinsic::loongarch_lddir_d: {
5096 if (!Subtarget.is64Bit()) {
5097 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5098 return;
5099 }
5100 break;
5101 }
5102 }
5103 break;
5104 }
5105 case ISD::READ_REGISTER: {
5106 if (Subtarget.is64Bit())
5107 DAG.getContext()->emitError(
5108 "On LA64, only 64-bit registers can be read.");
5109 else
5110 DAG.getContext()->emitError(
5111 "On LA32, only 32-bit registers can be read.");
5112 Results.push_back(DAG.getUNDEF(VT));
5113 Results.push_back(N->getOperand(0));
5114 break;
5115 }
5117 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5118 break;
5119 }
5120 case ISD::LROUND: {
5121 SDValue Op0 = N->getOperand(0);
5122 EVT OpVT = Op0.getValueType();
5123 RTLIB::Libcall LC =
5124 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5125 MakeLibCallOptions CallOptions;
5126 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5127 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5128 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5129 Results.push_back(Result);
5130 break;
5131 }
5132 case ISD::ATOMIC_CMP_SWAP: {
5134 break;
5135 }
5136 case ISD::TRUNCATE: {
5137 MVT VT = N->getSimpleValueType(0);
5138 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5139 return;
5140
5141 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5142 SDValue In = N->getOperand(0);
5143 EVT InVT = In.getValueType();
5144 EVT InEltVT = InVT.getVectorElementType();
5145 EVT EltVT = VT.getVectorElementType();
5146 unsigned MinElts = VT.getVectorNumElements();
5147 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5148 unsigned InBits = InVT.getSizeInBits();
5149
5150 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5151 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5152 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5153 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5154 for (unsigned I = 0; I < MinElts; ++I)
5155 TruncMask[I] = Scale * I;
5156
5157 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5158 MVT SVT = In.getSimpleValueType().getScalarType();
5159 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5160 SDValue WidenIn =
5161 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5162 DAG.getVectorIdxConstant(0, DL));
5163 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5164 "Illegal vector type in truncation");
5165 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5166 Results.push_back(
5167 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5168 return;
5169 }
5170 }
5171
5172 break;
5173 }
5174 }
5175}
5176
5177/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5179 SelectionDAG &DAG) {
5180 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5181
5182 MVT VT = N->getSimpleValueType(0);
5183 if (!VT.is128BitVector() && !VT.is256BitVector())
5184 return SDValue();
5185
5186 SDValue X, Y;
5187 SDValue N0 = N->getOperand(0);
5188 SDValue N1 = N->getOperand(1);
5189
5190 if (SDValue Not = isNOT(N0, DAG)) {
5191 X = Not;
5192 Y = N1;
5193 } else if (SDValue Not = isNOT(N1, DAG)) {
5194 X = Not;
5195 Y = N0;
5196 } else
5197 return SDValue();
5198
5199 X = DAG.getBitcast(VT, X);
5200 Y = DAG.getBitcast(VT, Y);
5201 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5202}
5203
5206 const LoongArchSubtarget &Subtarget) {
5207 if (DCI.isBeforeLegalizeOps())
5208 return SDValue();
5209
5210 SDValue FirstOperand = N->getOperand(0);
5211 SDValue SecondOperand = N->getOperand(1);
5212 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5213 EVT ValTy = N->getValueType(0);
5214 SDLoc DL(N);
5215 uint64_t lsb, msb;
5216 unsigned SMIdx, SMLen;
5217 ConstantSDNode *CN;
5218 SDValue NewOperand;
5219 MVT GRLenVT = Subtarget.getGRLenVT();
5220
5221 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5222 return R;
5223
5224 // BSTRPICK requires the 32S feature.
5225 if (!Subtarget.has32S())
5226 return SDValue();
5227
5228 // Op's second operand must be a shifted mask.
5229 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5230 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5231 return SDValue();
5232
5233 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5234 // Pattern match BSTRPICK.
5235 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5236 // => BSTRPICK $dst, $src, msb, lsb
5237 // where msb = lsb + len - 1
5238
5239 // The second operand of the shift must be an immediate.
5240 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5241 return SDValue();
5242
5243 lsb = CN->getZExtValue();
5244
5245 // Return if the shifted mask does not start at bit 0 or the sum of its
5246 // length and lsb exceeds the word's size.
5247 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5248 return SDValue();
5249
5250 NewOperand = FirstOperand.getOperand(0);
5251 } else {
5252 // Pattern match BSTRPICK.
5253 // $dst = and $src, (2**len- 1) , if len > 12
5254 // => BSTRPICK $dst, $src, msb, lsb
5255 // where lsb = 0 and msb = len - 1
5256
5257 // If the mask is <= 0xfff, andi can be used instead.
5258 if (CN->getZExtValue() <= 0xfff)
5259 return SDValue();
5260
5261 // Return if the MSB exceeds.
5262 if (SMIdx + SMLen > ValTy.getSizeInBits())
5263 return SDValue();
5264
5265 if (SMIdx > 0) {
5266 // Omit if the constant has more than 2 uses. This a conservative
5267 // decision. Whether it is a win depends on the HW microarchitecture.
5268 // However it should always be better for 1 and 2 uses.
5269 if (CN->use_size() > 2)
5270 return SDValue();
5271 // Return if the constant can be composed by a single LU12I.W.
5272 if ((CN->getZExtValue() & 0xfff) == 0)
5273 return SDValue();
5274 // Return if the constand can be composed by a single ADDI with
5275 // the zero register.
5276 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5277 return SDValue();
5278 }
5279
5280 lsb = SMIdx;
5281 NewOperand = FirstOperand;
5282 }
5283
5284 msb = lsb + SMLen - 1;
5285 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5286 DAG.getConstant(msb, DL, GRLenVT),
5287 DAG.getConstant(lsb, DL, GRLenVT));
5288 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5289 return NR0;
5290 // Try to optimize to
5291 // bstrpick $Rd, $Rs, msb, lsb
5292 // slli $Rd, $Rd, lsb
5293 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5294 DAG.getConstant(lsb, DL, GRLenVT));
5295}
5296
5299 const LoongArchSubtarget &Subtarget) {
5300 // BSTRPICK requires the 32S feature.
5301 if (!Subtarget.has32S())
5302 return SDValue();
5303
5304 if (DCI.isBeforeLegalizeOps())
5305 return SDValue();
5306
5307 // $dst = srl (and $src, Mask), Shamt
5308 // =>
5309 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5310 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5311 //
5312
5313 SDValue FirstOperand = N->getOperand(0);
5314 ConstantSDNode *CN;
5315 EVT ValTy = N->getValueType(0);
5316 SDLoc DL(N);
5317 MVT GRLenVT = Subtarget.getGRLenVT();
5318 unsigned MaskIdx, MaskLen;
5319 uint64_t Shamt;
5320
5321 // The first operand must be an AND and the second operand of the AND must be
5322 // a shifted mask.
5323 if (FirstOperand.getOpcode() != ISD::AND ||
5324 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5325 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5326 return SDValue();
5327
5328 // The second operand (shift amount) must be an immediate.
5329 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5330 return SDValue();
5331
5332 Shamt = CN->getZExtValue();
5333 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5334 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5335 FirstOperand->getOperand(0),
5336 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5337 DAG.getConstant(Shamt, DL, GRLenVT));
5338
5339 return SDValue();
5340}
5341
5342// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5343// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5344static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5345 unsigned Depth) {
5346 // Limit recursion.
5348 return false;
5349 switch (Src.getOpcode()) {
5350 case ISD::SETCC:
5351 case ISD::TRUNCATE:
5352 return Src.getOperand(0).getValueSizeInBits() == Size;
5353 case ISD::FREEZE:
5354 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5355 case ISD::AND:
5356 case ISD::XOR:
5357 case ISD::OR:
5358 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5359 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5360 case ISD::SELECT:
5361 case ISD::VSELECT:
5362 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5363 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5364 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5365 case ISD::BUILD_VECTOR:
5366 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5367 ISD::isBuildVectorAllOnes(Src.getNode());
5368 }
5369 return false;
5370}
5371
5372// Helper to push sign extension of vXi1 SETCC result through bitops.
5374 SDValue Src, const SDLoc &DL) {
5375 switch (Src.getOpcode()) {
5376 case ISD::SETCC:
5377 case ISD::FREEZE:
5378 case ISD::TRUNCATE:
5379 case ISD::BUILD_VECTOR:
5380 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5381 case ISD::AND:
5382 case ISD::XOR:
5383 case ISD::OR:
5384 return DAG.getNode(
5385 Src.getOpcode(), DL, SExtVT,
5386 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5387 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5388 case ISD::SELECT:
5389 case ISD::VSELECT:
5390 return DAG.getSelect(
5391 DL, SExtVT, Src.getOperand(0),
5392 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5393 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5394 }
5395 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5396}
5397
5398static SDValue
5401 const LoongArchSubtarget &Subtarget) {
5402 SDLoc DL(N);
5403 EVT VT = N->getValueType(0);
5404 SDValue Src = N->getOperand(0);
5405 EVT SrcVT = Src.getValueType();
5406
5407 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5408 return SDValue();
5409
5410 bool UseLASX;
5411 unsigned Opc = ISD::DELETED_NODE;
5412 EVT CmpVT = Src.getOperand(0).getValueType();
5413 EVT EltVT = CmpVT.getVectorElementType();
5414
5415 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5416 UseLASX = false;
5417 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5418 CmpVT.getSizeInBits() == 256)
5419 UseLASX = true;
5420 else
5421 return SDValue();
5422
5423 SDValue SrcN1 = Src.getOperand(1);
5424 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5425 default:
5426 break;
5427 case ISD::SETEQ:
5428 // x == 0 => not (vmsknez.b x)
5429 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5430 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5431 break;
5432 case ISD::SETGT:
5433 // x > -1 => vmskgez.b x
5434 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5435 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5436 break;
5437 case ISD::SETGE:
5438 // x >= 0 => vmskgez.b x
5439 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5440 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5441 break;
5442 case ISD::SETLT:
5443 // x < 0 => vmskltz.{b,h,w,d} x
5444 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5445 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5446 EltVT == MVT::i64))
5447 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5448 break;
5449 case ISD::SETLE:
5450 // x <= -1 => vmskltz.{b,h,w,d} x
5451 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5452 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5453 EltVT == MVT::i64))
5454 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5455 break;
5456 case ISD::SETNE:
5457 // x != 0 => vmsknez.b x
5458 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5459 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5460 break;
5461 }
5462
5463 if (Opc == ISD::DELETED_NODE)
5464 return SDValue();
5465
5466 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5468 V = DAG.getZExtOrTrunc(V, DL, T);
5469 return DAG.getBitcast(VT, V);
5470}
5471
5474 const LoongArchSubtarget &Subtarget) {
5475 SDLoc DL(N);
5476 EVT VT = N->getValueType(0);
5477 SDValue Src = N->getOperand(0);
5478 EVT SrcVT = Src.getValueType();
5479 MVT GRLenVT = Subtarget.getGRLenVT();
5480
5481 if (!DCI.isBeforeLegalizeOps())
5482 return SDValue();
5483
5484 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5485 return SDValue();
5486
5487 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5488 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5489 if (Res)
5490 return Res;
5491
5492 // Generate vXi1 using [X]VMSKLTZ
5493 MVT SExtVT;
5494 unsigned Opc;
5495 bool UseLASX = false;
5496 bool PropagateSExt = false;
5497
5498 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5499 EVT CmpVT = Src.getOperand(0).getValueType();
5500 if (CmpVT.getSizeInBits() > 256)
5501 return SDValue();
5502 }
5503
5504 switch (SrcVT.getSimpleVT().SimpleTy) {
5505 default:
5506 return SDValue();
5507 case MVT::v2i1:
5508 SExtVT = MVT::v2i64;
5509 break;
5510 case MVT::v4i1:
5511 SExtVT = MVT::v4i32;
5512 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5513 SExtVT = MVT::v4i64;
5514 UseLASX = true;
5515 PropagateSExt = true;
5516 }
5517 break;
5518 case MVT::v8i1:
5519 SExtVT = MVT::v8i16;
5520 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5521 SExtVT = MVT::v8i32;
5522 UseLASX = true;
5523 PropagateSExt = true;
5524 }
5525 break;
5526 case MVT::v16i1:
5527 SExtVT = MVT::v16i8;
5528 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5529 SExtVT = MVT::v16i16;
5530 UseLASX = true;
5531 PropagateSExt = true;
5532 }
5533 break;
5534 case MVT::v32i1:
5535 SExtVT = MVT::v32i8;
5536 UseLASX = true;
5537 break;
5538 };
5539 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5540 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5541
5542 SDValue V;
5543 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5544 if (Src.getSimpleValueType() == MVT::v32i8) {
5545 SDValue Lo, Hi;
5546 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5547 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5548 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5549 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5550 DAG.getShiftAmountConstant(16, GRLenVT, DL));
5551 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5552 } else if (UseLASX) {
5553 return SDValue();
5554 }
5555 }
5556
5557 if (!V) {
5558 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5559 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5560 }
5561
5563 V = DAG.getZExtOrTrunc(V, DL, T);
5564 return DAG.getBitcast(VT, V);
5565}
5566
5569 const LoongArchSubtarget &Subtarget) {
5570 MVT GRLenVT = Subtarget.getGRLenVT();
5571 EVT ValTy = N->getValueType(0);
5572 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5573 ConstantSDNode *CN0, *CN1;
5574 SDLoc DL(N);
5575 unsigned ValBits = ValTy.getSizeInBits();
5576 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5577 unsigned Shamt;
5578 bool SwapAndRetried = false;
5579
5580 // BSTRPICK requires the 32S feature.
5581 if (!Subtarget.has32S())
5582 return SDValue();
5583
5584 if (DCI.isBeforeLegalizeOps())
5585 return SDValue();
5586
5587 if (ValBits != 32 && ValBits != 64)
5588 return SDValue();
5589
5590Retry:
5591 // 1st pattern to match BSTRINS:
5592 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5593 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5594 // =>
5595 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5596 if (N0.getOpcode() == ISD::AND &&
5597 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5598 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5599 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5600 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5601 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5602 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5603 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5604 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5605 (MaskIdx0 + MaskLen0 <= ValBits)) {
5606 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5607 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5608 N1.getOperand(0).getOperand(0),
5609 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5610 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5611 }
5612
5613 // 2nd pattern to match BSTRINS:
5614 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5615 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5616 // =>
5617 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5618 if (N0.getOpcode() == ISD::AND &&
5619 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5620 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5621 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5622 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5623 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5624 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5625 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5626 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5627 (MaskIdx0 + MaskLen0 <= ValBits)) {
5628 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5629 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5630 N1.getOperand(0).getOperand(0),
5631 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5632 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5633 }
5634
5635 // 3rd pattern to match BSTRINS:
5636 // R = or (and X, mask0), (and Y, mask1)
5637 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5638 // =>
5639 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5640 // where msb = lsb + size - 1
5641 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5642 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5643 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5644 (MaskIdx0 + MaskLen0 <= 64) &&
5645 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5646 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5647 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5648 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5649 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5650 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5651 DAG.getConstant(ValBits == 32
5652 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5653 : (MaskIdx0 + MaskLen0 - 1),
5654 DL, GRLenVT),
5655 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5656 }
5657
5658 // 4th pattern to match BSTRINS:
5659 // R = or (and X, mask), (shl Y, shamt)
5660 // where mask = (2**shamt - 1)
5661 // =>
5662 // R = BSTRINS X, Y, ValBits - 1, shamt
5663 // where ValBits = 32 or 64
5664 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5665 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5666 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5667 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5668 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5669 (MaskIdx0 + MaskLen0 <= ValBits)) {
5670 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5671 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5672 N1.getOperand(0),
5673 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5674 DAG.getConstant(Shamt, DL, GRLenVT));
5675 }
5676
5677 // 5th pattern to match BSTRINS:
5678 // R = or (and X, mask), const
5679 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5680 // =>
5681 // R = BSTRINS X, (const >> lsb), msb, lsb
5682 // where msb = lsb + size - 1
5683 if (N0.getOpcode() == ISD::AND &&
5684 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5685 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5686 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5687 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5688 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5689 return DAG.getNode(
5690 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5691 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5692 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5693 : (MaskIdx0 + MaskLen0 - 1),
5694 DL, GRLenVT),
5695 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5696 }
5697
5698 // 6th pattern.
5699 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5700 // by the incoming bits are known to be zero.
5701 // =>
5702 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5703 //
5704 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5705 // pattern is more common than the 1st. So we put the 1st before the 6th in
5706 // order to match as many nodes as possible.
5707 ConstantSDNode *CNMask, *CNShamt;
5708 unsigned MaskIdx, MaskLen;
5709 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5710 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5711 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5712 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5713 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5714 Shamt = CNShamt->getZExtValue();
5715 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5716 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5717 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5718 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5719 N1.getOperand(0).getOperand(0),
5720 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5721 DAG.getConstant(Shamt, DL, GRLenVT));
5722 }
5723 }
5724
5725 // 7th pattern.
5726 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5727 // overwritten by the incoming bits are known to be zero.
5728 // =>
5729 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5730 //
5731 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5732 // before the 7th in order to match as many nodes as possible.
5733 if (N1.getOpcode() == ISD::AND &&
5734 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5735 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5736 N1.getOperand(0).getOpcode() == ISD::SHL &&
5737 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5738 CNShamt->getZExtValue() == MaskIdx) {
5739 APInt ShMask(ValBits, CNMask->getZExtValue());
5740 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5741 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5742 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5743 N1.getOperand(0).getOperand(0),
5744 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5745 DAG.getConstant(MaskIdx, DL, GRLenVT));
5746 }
5747 }
5748
5749 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5750 if (!SwapAndRetried) {
5751 std::swap(N0, N1);
5752 SwapAndRetried = true;
5753 goto Retry;
5754 }
5755
5756 SwapAndRetried = false;
5757Retry2:
5758 // 8th pattern.
5759 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5760 // the incoming bits are known to be zero.
5761 // =>
5762 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5763 //
5764 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5765 // we put it here in order to match as many nodes as possible or generate less
5766 // instructions.
5767 if (N1.getOpcode() == ISD::AND &&
5768 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5769 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5770 APInt ShMask(ValBits, CNMask->getZExtValue());
5771 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5772 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5773 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5774 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5775 N1->getOperand(0),
5776 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5777 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5778 DAG.getConstant(MaskIdx, DL, GRLenVT));
5779 }
5780 }
5781 // Swap N0/N1 and retry.
5782 if (!SwapAndRetried) {
5783 std::swap(N0, N1);
5784 SwapAndRetried = true;
5785 goto Retry2;
5786 }
5787
5788 return SDValue();
5789}
5790
5791static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5792 ExtType = ISD::NON_EXTLOAD;
5793
5794 switch (V.getNode()->getOpcode()) {
5795 case ISD::LOAD: {
5796 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5797 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5798 (LoadNode->getMemoryVT() == MVT::i16)) {
5799 ExtType = LoadNode->getExtensionType();
5800 return true;
5801 }
5802 return false;
5803 }
5804 case ISD::AssertSext: {
5805 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5806 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5807 ExtType = ISD::SEXTLOAD;
5808 return true;
5809 }
5810 return false;
5811 }
5812 case ISD::AssertZext: {
5813 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5814 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5815 ExtType = ISD::ZEXTLOAD;
5816 return true;
5817 }
5818 return false;
5819 }
5820 default:
5821 return false;
5822 }
5823
5824 return false;
5825}
5826
5827// Eliminate redundant truncation and zero-extension nodes.
5828// * Case 1:
5829// +------------+ +------------+ +------------+
5830// | Input1 | | Input2 | | CC |
5831// +------------+ +------------+ +------------+
5832// | | |
5833// V V +----+
5834// +------------+ +------------+ |
5835// | TRUNCATE | | TRUNCATE | |
5836// +------------+ +------------+ |
5837// | | |
5838// V V |
5839// +------------+ +------------+ |
5840// | ZERO_EXT | | ZERO_EXT | |
5841// +------------+ +------------+ |
5842// | | |
5843// | +-------------+ |
5844// V V | |
5845// +----------------+ | |
5846// | AND | | |
5847// +----------------+ | |
5848// | | |
5849// +---------------+ | |
5850// | | |
5851// V V V
5852// +-------------+
5853// | CMP |
5854// +-------------+
5855// * Case 2:
5856// +------------+ +------------+ +-------------+ +------------+ +------------+
5857// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5858// +------------+ +------------+ +-------------+ +------------+ +------------+
5859// | | | | |
5860// V | | | |
5861// +------------+ | | | |
5862// | XOR |<---------------------+ | |
5863// +------------+ | | |
5864// | | | |
5865// V V +---------------+ |
5866// +------------+ +------------+ | |
5867// | TRUNCATE | | TRUNCATE | | +-------------------------+
5868// +------------+ +------------+ | |
5869// | | | |
5870// V V | |
5871// +------------+ +------------+ | |
5872// | ZERO_EXT | | ZERO_EXT | | |
5873// +------------+ +------------+ | |
5874// | | | |
5875// V V | |
5876// +----------------+ | |
5877// | AND | | |
5878// +----------------+ | |
5879// | | |
5880// +---------------+ | |
5881// | | |
5882// V V V
5883// +-------------+
5884// | CMP |
5885// +-------------+
5888 const LoongArchSubtarget &Subtarget) {
5889 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5890
5891 SDNode *AndNode = N->getOperand(0).getNode();
5892 if (AndNode->getOpcode() != ISD::AND)
5893 return SDValue();
5894
5895 SDValue AndInputValue2 = AndNode->getOperand(1);
5896 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5897 return SDValue();
5898
5899 SDValue CmpInputValue = N->getOperand(1);
5900 SDValue AndInputValue1 = AndNode->getOperand(0);
5901 if (AndInputValue1.getOpcode() == ISD::XOR) {
5902 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5903 return SDValue();
5904 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5905 if (!CN || !CN->isAllOnes())
5906 return SDValue();
5907 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5908 if (!CN || !CN->isZero())
5909 return SDValue();
5910 AndInputValue1 = AndInputValue1.getOperand(0);
5911 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5912 return SDValue();
5913 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5914 if (AndInputValue2 != CmpInputValue)
5915 return SDValue();
5916 } else {
5917 return SDValue();
5918 }
5919
5920 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5921 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5922 return SDValue();
5923
5924 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5925 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5926 return SDValue();
5927
5928 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5929 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5930 ISD::LoadExtType ExtType1;
5931 ISD::LoadExtType ExtType2;
5932
5933 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5934 !checkValueWidth(TruncInputValue2, ExtType2))
5935 return SDValue();
5936
5937 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5938 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5939 return SDValue();
5940
5941 if ((ExtType2 != ISD::ZEXTLOAD) &&
5942 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5943 return SDValue();
5944
5945 // These truncation and zero-extension nodes are not necessary, remove them.
5946 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5947 TruncInputValue1, TruncInputValue2);
5948 SDValue NewSetCC =
5949 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5950 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5951 return SDValue(N, 0);
5952}
5953
5954// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5957 const LoongArchSubtarget &Subtarget) {
5958 if (DCI.isBeforeLegalizeOps())
5959 return SDValue();
5960
5961 SDValue Src = N->getOperand(0);
5962 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5963 return SDValue();
5964
5965 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5966 Src.getOperand(0));
5967}
5968
5969// Perform common combines for BR_CC and SELECT_CC conditions.
5970static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5971 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5972 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5973
5974 // As far as arithmetic right shift always saves the sign,
5975 // shift can be omitted.
5976 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5977 // setge (sra X, N), 0 -> setge X, 0
5978 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5979 LHS.getOpcode() == ISD::SRA) {
5980 LHS = LHS.getOperand(0);
5981 return true;
5982 }
5983
5984 if (!ISD::isIntEqualitySetCC(CCVal))
5985 return false;
5986
5987 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5988 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5989 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5990 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5991 // If we're looking for eq 0 instead of ne 0, we need to invert the
5992 // condition.
5993 bool Invert = CCVal == ISD::SETEQ;
5994 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5995 if (Invert)
5996 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5997
5998 RHS = LHS.getOperand(1);
5999 LHS = LHS.getOperand(0);
6000 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6001
6002 CC = DAG.getCondCode(CCVal);
6003 return true;
6004 }
6005
6006 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6007 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6008 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6009 SDValue LHS0 = LHS.getOperand(0);
6010 if (LHS0.getOpcode() == ISD::AND &&
6011 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6012 uint64_t Mask = LHS0.getConstantOperandVal(1);
6013 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6014 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6015 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6016 CC = DAG.getCondCode(CCVal);
6017
6018 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6019 LHS = LHS0.getOperand(0);
6020 if (ShAmt != 0)
6021 LHS =
6022 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6023 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6024 return true;
6025 }
6026 }
6027 }
6028
6029 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6030 // This can occur when legalizing some floating point comparisons.
6031 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6032 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6033 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6034 CC = DAG.getCondCode(CCVal);
6035 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6036 return true;
6037 }
6038
6039 return false;
6040}
6041
6044 const LoongArchSubtarget &Subtarget) {
6045 SDValue LHS = N->getOperand(1);
6046 SDValue RHS = N->getOperand(2);
6047 SDValue CC = N->getOperand(3);
6048 SDLoc DL(N);
6049
6050 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6051 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6052 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6053
6054 return SDValue();
6055}
6056
6059 const LoongArchSubtarget &Subtarget) {
6060 // Transform
6061 SDValue LHS = N->getOperand(0);
6062 SDValue RHS = N->getOperand(1);
6063 SDValue CC = N->getOperand(2);
6064 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6065 SDValue TrueV = N->getOperand(3);
6066 SDValue FalseV = N->getOperand(4);
6067 SDLoc DL(N);
6068 EVT VT = N->getValueType(0);
6069
6070 // If the True and False values are the same, we don't need a select_cc.
6071 if (TrueV == FalseV)
6072 return TrueV;
6073
6074 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6075 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6076 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6078 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6079 if (CCVal == ISD::CondCode::SETGE)
6080 std::swap(TrueV, FalseV);
6081
6082 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6083 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6084 // Only handle simm12, if it is not in this range, it can be considered as
6085 // register.
6086 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6087 isInt<12>(TrueSImm - FalseSImm)) {
6088 SDValue SRA =
6089 DAG.getNode(ISD::SRA, DL, VT, LHS,
6090 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6091 SDValue AND =
6092 DAG.getNode(ISD::AND, DL, VT, SRA,
6093 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6094 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6095 }
6096
6097 if (CCVal == ISD::CondCode::SETGE)
6098 std::swap(TrueV, FalseV);
6099 }
6100
6101 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6102 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6103 {LHS, RHS, CC, TrueV, FalseV});
6104
6105 return SDValue();
6106}
6107
6108template <unsigned N>
6110 SelectionDAG &DAG,
6111 const LoongArchSubtarget &Subtarget,
6112 bool IsSigned = false) {
6113 SDLoc DL(Node);
6114 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6115 // Check the ImmArg.
6116 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6117 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6118 DAG.getContext()->emitError(Node->getOperationName(0) +
6119 ": argument out of range.");
6120 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6121 }
6122 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6123}
6124
6125template <unsigned N>
6126static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6127 SelectionDAG &DAG, bool IsSigned = false) {
6128 SDLoc DL(Node);
6129 EVT ResTy = Node->getValueType(0);
6130 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6131
6132 // Check the ImmArg.
6133 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6134 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6135 DAG.getContext()->emitError(Node->getOperationName(0) +
6136 ": argument out of range.");
6137 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6138 }
6139 return DAG.getConstant(
6141 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6142 DL, ResTy);
6143}
6144
6146 SDLoc DL(Node);
6147 EVT ResTy = Node->getValueType(0);
6148 SDValue Vec = Node->getOperand(2);
6149 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6150 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6151}
6152
6154 SDLoc DL(Node);
6155 EVT ResTy = Node->getValueType(0);
6156 SDValue One = DAG.getConstant(1, DL, ResTy);
6157 SDValue Bit =
6158 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6159
6160 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6161 DAG.getNOT(DL, Bit, ResTy));
6162}
6163
6164template <unsigned N>
6166 SDLoc DL(Node);
6167 EVT ResTy = Node->getValueType(0);
6168 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6169 // Check the unsigned ImmArg.
6170 if (!isUInt<N>(CImm->getZExtValue())) {
6171 DAG.getContext()->emitError(Node->getOperationName(0) +
6172 ": argument out of range.");
6173 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6174 }
6175
6176 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6177 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6178
6179 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6180}
6181
6182template <unsigned N>
6184 SDLoc DL(Node);
6185 EVT ResTy = Node->getValueType(0);
6186 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6187 // Check the unsigned ImmArg.
6188 if (!isUInt<N>(CImm->getZExtValue())) {
6189 DAG.getContext()->emitError(Node->getOperationName(0) +
6190 ": argument out of range.");
6191 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6192 }
6193
6194 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6195 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6196 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6197}
6198
6199template <unsigned N>
6201 SDLoc DL(Node);
6202 EVT ResTy = Node->getValueType(0);
6203 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6204 // Check the unsigned ImmArg.
6205 if (!isUInt<N>(CImm->getZExtValue())) {
6206 DAG.getContext()->emitError(Node->getOperationName(0) +
6207 ": argument out of range.");
6208 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6209 }
6210
6211 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6212 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6213 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6214}
6215
6216template <unsigned W>
6218 unsigned ResOp) {
6219 unsigned Imm = N->getConstantOperandVal(2);
6220 if (!isUInt<W>(Imm)) {
6221 const StringRef ErrorMsg = "argument out of range";
6222 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6223 return DAG.getUNDEF(N->getValueType(0));
6224 }
6225 SDLoc DL(N);
6226 SDValue Vec = N->getOperand(1);
6227 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6229 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6230}
6231
6232static SDValue
6235 const LoongArchSubtarget &Subtarget) {
6236 SDLoc DL(N);
6237 switch (N->getConstantOperandVal(0)) {
6238 default:
6239 break;
6240 case Intrinsic::loongarch_lsx_vadd_b:
6241 case Intrinsic::loongarch_lsx_vadd_h:
6242 case Intrinsic::loongarch_lsx_vadd_w:
6243 case Intrinsic::loongarch_lsx_vadd_d:
6244 case Intrinsic::loongarch_lasx_xvadd_b:
6245 case Intrinsic::loongarch_lasx_xvadd_h:
6246 case Intrinsic::loongarch_lasx_xvadd_w:
6247 case Intrinsic::loongarch_lasx_xvadd_d:
6248 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6249 N->getOperand(2));
6250 case Intrinsic::loongarch_lsx_vaddi_bu:
6251 case Intrinsic::loongarch_lsx_vaddi_hu:
6252 case Intrinsic::loongarch_lsx_vaddi_wu:
6253 case Intrinsic::loongarch_lsx_vaddi_du:
6254 case Intrinsic::loongarch_lasx_xvaddi_bu:
6255 case Intrinsic::loongarch_lasx_xvaddi_hu:
6256 case Intrinsic::loongarch_lasx_xvaddi_wu:
6257 case Intrinsic::loongarch_lasx_xvaddi_du:
6258 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6259 lowerVectorSplatImm<5>(N, 2, DAG));
6260 case Intrinsic::loongarch_lsx_vsub_b:
6261 case Intrinsic::loongarch_lsx_vsub_h:
6262 case Intrinsic::loongarch_lsx_vsub_w:
6263 case Intrinsic::loongarch_lsx_vsub_d:
6264 case Intrinsic::loongarch_lasx_xvsub_b:
6265 case Intrinsic::loongarch_lasx_xvsub_h:
6266 case Intrinsic::loongarch_lasx_xvsub_w:
6267 case Intrinsic::loongarch_lasx_xvsub_d:
6268 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6269 N->getOperand(2));
6270 case Intrinsic::loongarch_lsx_vsubi_bu:
6271 case Intrinsic::loongarch_lsx_vsubi_hu:
6272 case Intrinsic::loongarch_lsx_vsubi_wu:
6273 case Intrinsic::loongarch_lsx_vsubi_du:
6274 case Intrinsic::loongarch_lasx_xvsubi_bu:
6275 case Intrinsic::loongarch_lasx_xvsubi_hu:
6276 case Intrinsic::loongarch_lasx_xvsubi_wu:
6277 case Intrinsic::loongarch_lasx_xvsubi_du:
6278 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6279 lowerVectorSplatImm<5>(N, 2, DAG));
6280 case Intrinsic::loongarch_lsx_vneg_b:
6281 case Intrinsic::loongarch_lsx_vneg_h:
6282 case Intrinsic::loongarch_lsx_vneg_w:
6283 case Intrinsic::loongarch_lsx_vneg_d:
6284 case Intrinsic::loongarch_lasx_xvneg_b:
6285 case Intrinsic::loongarch_lasx_xvneg_h:
6286 case Intrinsic::loongarch_lasx_xvneg_w:
6287 case Intrinsic::loongarch_lasx_xvneg_d:
6288 return DAG.getNode(
6289 ISD::SUB, DL, N->getValueType(0),
6290 DAG.getConstant(
6291 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6292 /*isSigned=*/true),
6293 SDLoc(N), N->getValueType(0)),
6294 N->getOperand(1));
6295 case Intrinsic::loongarch_lsx_vmax_b:
6296 case Intrinsic::loongarch_lsx_vmax_h:
6297 case Intrinsic::loongarch_lsx_vmax_w:
6298 case Intrinsic::loongarch_lsx_vmax_d:
6299 case Intrinsic::loongarch_lasx_xvmax_b:
6300 case Intrinsic::loongarch_lasx_xvmax_h:
6301 case Intrinsic::loongarch_lasx_xvmax_w:
6302 case Intrinsic::loongarch_lasx_xvmax_d:
6303 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6304 N->getOperand(2));
6305 case Intrinsic::loongarch_lsx_vmax_bu:
6306 case Intrinsic::loongarch_lsx_vmax_hu:
6307 case Intrinsic::loongarch_lsx_vmax_wu:
6308 case Intrinsic::loongarch_lsx_vmax_du:
6309 case Intrinsic::loongarch_lasx_xvmax_bu:
6310 case Intrinsic::loongarch_lasx_xvmax_hu:
6311 case Intrinsic::loongarch_lasx_xvmax_wu:
6312 case Intrinsic::loongarch_lasx_xvmax_du:
6313 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6314 N->getOperand(2));
6315 case Intrinsic::loongarch_lsx_vmaxi_b:
6316 case Intrinsic::loongarch_lsx_vmaxi_h:
6317 case Intrinsic::loongarch_lsx_vmaxi_w:
6318 case Intrinsic::loongarch_lsx_vmaxi_d:
6319 case Intrinsic::loongarch_lasx_xvmaxi_b:
6320 case Intrinsic::loongarch_lasx_xvmaxi_h:
6321 case Intrinsic::loongarch_lasx_xvmaxi_w:
6322 case Intrinsic::loongarch_lasx_xvmaxi_d:
6323 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6324 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6325 case Intrinsic::loongarch_lsx_vmaxi_bu:
6326 case Intrinsic::loongarch_lsx_vmaxi_hu:
6327 case Intrinsic::loongarch_lsx_vmaxi_wu:
6328 case Intrinsic::loongarch_lsx_vmaxi_du:
6329 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6330 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6331 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6332 case Intrinsic::loongarch_lasx_xvmaxi_du:
6333 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6334 lowerVectorSplatImm<5>(N, 2, DAG));
6335 case Intrinsic::loongarch_lsx_vmin_b:
6336 case Intrinsic::loongarch_lsx_vmin_h:
6337 case Intrinsic::loongarch_lsx_vmin_w:
6338 case Intrinsic::loongarch_lsx_vmin_d:
6339 case Intrinsic::loongarch_lasx_xvmin_b:
6340 case Intrinsic::loongarch_lasx_xvmin_h:
6341 case Intrinsic::loongarch_lasx_xvmin_w:
6342 case Intrinsic::loongarch_lasx_xvmin_d:
6343 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6344 N->getOperand(2));
6345 case Intrinsic::loongarch_lsx_vmin_bu:
6346 case Intrinsic::loongarch_lsx_vmin_hu:
6347 case Intrinsic::loongarch_lsx_vmin_wu:
6348 case Intrinsic::loongarch_lsx_vmin_du:
6349 case Intrinsic::loongarch_lasx_xvmin_bu:
6350 case Intrinsic::loongarch_lasx_xvmin_hu:
6351 case Intrinsic::loongarch_lasx_xvmin_wu:
6352 case Intrinsic::loongarch_lasx_xvmin_du:
6353 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6354 N->getOperand(2));
6355 case Intrinsic::loongarch_lsx_vmini_b:
6356 case Intrinsic::loongarch_lsx_vmini_h:
6357 case Intrinsic::loongarch_lsx_vmini_w:
6358 case Intrinsic::loongarch_lsx_vmini_d:
6359 case Intrinsic::loongarch_lasx_xvmini_b:
6360 case Intrinsic::loongarch_lasx_xvmini_h:
6361 case Intrinsic::loongarch_lasx_xvmini_w:
6362 case Intrinsic::loongarch_lasx_xvmini_d:
6363 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6364 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6365 case Intrinsic::loongarch_lsx_vmini_bu:
6366 case Intrinsic::loongarch_lsx_vmini_hu:
6367 case Intrinsic::loongarch_lsx_vmini_wu:
6368 case Intrinsic::loongarch_lsx_vmini_du:
6369 case Intrinsic::loongarch_lasx_xvmini_bu:
6370 case Intrinsic::loongarch_lasx_xvmini_hu:
6371 case Intrinsic::loongarch_lasx_xvmini_wu:
6372 case Intrinsic::loongarch_lasx_xvmini_du:
6373 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6374 lowerVectorSplatImm<5>(N, 2, DAG));
6375 case Intrinsic::loongarch_lsx_vmul_b:
6376 case Intrinsic::loongarch_lsx_vmul_h:
6377 case Intrinsic::loongarch_lsx_vmul_w:
6378 case Intrinsic::loongarch_lsx_vmul_d:
6379 case Intrinsic::loongarch_lasx_xvmul_b:
6380 case Intrinsic::loongarch_lasx_xvmul_h:
6381 case Intrinsic::loongarch_lasx_xvmul_w:
6382 case Intrinsic::loongarch_lasx_xvmul_d:
6383 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6384 N->getOperand(2));
6385 case Intrinsic::loongarch_lsx_vmadd_b:
6386 case Intrinsic::loongarch_lsx_vmadd_h:
6387 case Intrinsic::loongarch_lsx_vmadd_w:
6388 case Intrinsic::loongarch_lsx_vmadd_d:
6389 case Intrinsic::loongarch_lasx_xvmadd_b:
6390 case Intrinsic::loongarch_lasx_xvmadd_h:
6391 case Intrinsic::loongarch_lasx_xvmadd_w:
6392 case Intrinsic::loongarch_lasx_xvmadd_d: {
6393 EVT ResTy = N->getValueType(0);
6394 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6395 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6396 N->getOperand(3)));
6397 }
6398 case Intrinsic::loongarch_lsx_vmsub_b:
6399 case Intrinsic::loongarch_lsx_vmsub_h:
6400 case Intrinsic::loongarch_lsx_vmsub_w:
6401 case Intrinsic::loongarch_lsx_vmsub_d:
6402 case Intrinsic::loongarch_lasx_xvmsub_b:
6403 case Intrinsic::loongarch_lasx_xvmsub_h:
6404 case Intrinsic::loongarch_lasx_xvmsub_w:
6405 case Intrinsic::loongarch_lasx_xvmsub_d: {
6406 EVT ResTy = N->getValueType(0);
6407 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6408 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6409 N->getOperand(3)));
6410 }
6411 case Intrinsic::loongarch_lsx_vdiv_b:
6412 case Intrinsic::loongarch_lsx_vdiv_h:
6413 case Intrinsic::loongarch_lsx_vdiv_w:
6414 case Intrinsic::loongarch_lsx_vdiv_d:
6415 case Intrinsic::loongarch_lasx_xvdiv_b:
6416 case Intrinsic::loongarch_lasx_xvdiv_h:
6417 case Intrinsic::loongarch_lasx_xvdiv_w:
6418 case Intrinsic::loongarch_lasx_xvdiv_d:
6419 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6420 N->getOperand(2));
6421 case Intrinsic::loongarch_lsx_vdiv_bu:
6422 case Intrinsic::loongarch_lsx_vdiv_hu:
6423 case Intrinsic::loongarch_lsx_vdiv_wu:
6424 case Intrinsic::loongarch_lsx_vdiv_du:
6425 case Intrinsic::loongarch_lasx_xvdiv_bu:
6426 case Intrinsic::loongarch_lasx_xvdiv_hu:
6427 case Intrinsic::loongarch_lasx_xvdiv_wu:
6428 case Intrinsic::loongarch_lasx_xvdiv_du:
6429 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6430 N->getOperand(2));
6431 case Intrinsic::loongarch_lsx_vmod_b:
6432 case Intrinsic::loongarch_lsx_vmod_h:
6433 case Intrinsic::loongarch_lsx_vmod_w:
6434 case Intrinsic::loongarch_lsx_vmod_d:
6435 case Intrinsic::loongarch_lasx_xvmod_b:
6436 case Intrinsic::loongarch_lasx_xvmod_h:
6437 case Intrinsic::loongarch_lasx_xvmod_w:
6438 case Intrinsic::loongarch_lasx_xvmod_d:
6439 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6440 N->getOperand(2));
6441 case Intrinsic::loongarch_lsx_vmod_bu:
6442 case Intrinsic::loongarch_lsx_vmod_hu:
6443 case Intrinsic::loongarch_lsx_vmod_wu:
6444 case Intrinsic::loongarch_lsx_vmod_du:
6445 case Intrinsic::loongarch_lasx_xvmod_bu:
6446 case Intrinsic::loongarch_lasx_xvmod_hu:
6447 case Intrinsic::loongarch_lasx_xvmod_wu:
6448 case Intrinsic::loongarch_lasx_xvmod_du:
6449 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6450 N->getOperand(2));
6451 case Intrinsic::loongarch_lsx_vand_v:
6452 case Intrinsic::loongarch_lasx_xvand_v:
6453 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6454 N->getOperand(2));
6455 case Intrinsic::loongarch_lsx_vor_v:
6456 case Intrinsic::loongarch_lasx_xvor_v:
6457 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6458 N->getOperand(2));
6459 case Intrinsic::loongarch_lsx_vxor_v:
6460 case Intrinsic::loongarch_lasx_xvxor_v:
6461 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6462 N->getOperand(2));
6463 case Intrinsic::loongarch_lsx_vnor_v:
6464 case Intrinsic::loongarch_lasx_xvnor_v: {
6465 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6466 N->getOperand(2));
6467 return DAG.getNOT(DL, Res, Res->getValueType(0));
6468 }
6469 case Intrinsic::loongarch_lsx_vandi_b:
6470 case Intrinsic::loongarch_lasx_xvandi_b:
6471 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6472 lowerVectorSplatImm<8>(N, 2, DAG));
6473 case Intrinsic::loongarch_lsx_vori_b:
6474 case Intrinsic::loongarch_lasx_xvori_b:
6475 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6476 lowerVectorSplatImm<8>(N, 2, DAG));
6477 case Intrinsic::loongarch_lsx_vxori_b:
6478 case Intrinsic::loongarch_lasx_xvxori_b:
6479 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6480 lowerVectorSplatImm<8>(N, 2, DAG));
6481 case Intrinsic::loongarch_lsx_vsll_b:
6482 case Intrinsic::loongarch_lsx_vsll_h:
6483 case Intrinsic::loongarch_lsx_vsll_w:
6484 case Intrinsic::loongarch_lsx_vsll_d:
6485 case Intrinsic::loongarch_lasx_xvsll_b:
6486 case Intrinsic::loongarch_lasx_xvsll_h:
6487 case Intrinsic::loongarch_lasx_xvsll_w:
6488 case Intrinsic::loongarch_lasx_xvsll_d:
6489 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6490 truncateVecElts(N, DAG));
6491 case Intrinsic::loongarch_lsx_vslli_b:
6492 case Intrinsic::loongarch_lasx_xvslli_b:
6493 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6494 lowerVectorSplatImm<3>(N, 2, DAG));
6495 case Intrinsic::loongarch_lsx_vslli_h:
6496 case Intrinsic::loongarch_lasx_xvslli_h:
6497 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6498 lowerVectorSplatImm<4>(N, 2, DAG));
6499 case Intrinsic::loongarch_lsx_vslli_w:
6500 case Intrinsic::loongarch_lasx_xvslli_w:
6501 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6502 lowerVectorSplatImm<5>(N, 2, DAG));
6503 case Intrinsic::loongarch_lsx_vslli_d:
6504 case Intrinsic::loongarch_lasx_xvslli_d:
6505 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6506 lowerVectorSplatImm<6>(N, 2, DAG));
6507 case Intrinsic::loongarch_lsx_vsrl_b:
6508 case Intrinsic::loongarch_lsx_vsrl_h:
6509 case Intrinsic::loongarch_lsx_vsrl_w:
6510 case Intrinsic::loongarch_lsx_vsrl_d:
6511 case Intrinsic::loongarch_lasx_xvsrl_b:
6512 case Intrinsic::loongarch_lasx_xvsrl_h:
6513 case Intrinsic::loongarch_lasx_xvsrl_w:
6514 case Intrinsic::loongarch_lasx_xvsrl_d:
6515 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6516 truncateVecElts(N, DAG));
6517 case Intrinsic::loongarch_lsx_vsrli_b:
6518 case Intrinsic::loongarch_lasx_xvsrli_b:
6519 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6520 lowerVectorSplatImm<3>(N, 2, DAG));
6521 case Intrinsic::loongarch_lsx_vsrli_h:
6522 case Intrinsic::loongarch_lasx_xvsrli_h:
6523 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6524 lowerVectorSplatImm<4>(N, 2, DAG));
6525 case Intrinsic::loongarch_lsx_vsrli_w:
6526 case Intrinsic::loongarch_lasx_xvsrli_w:
6527 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6528 lowerVectorSplatImm<5>(N, 2, DAG));
6529 case Intrinsic::loongarch_lsx_vsrli_d:
6530 case Intrinsic::loongarch_lasx_xvsrli_d:
6531 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6532 lowerVectorSplatImm<6>(N, 2, DAG));
6533 case Intrinsic::loongarch_lsx_vsra_b:
6534 case Intrinsic::loongarch_lsx_vsra_h:
6535 case Intrinsic::loongarch_lsx_vsra_w:
6536 case Intrinsic::loongarch_lsx_vsra_d:
6537 case Intrinsic::loongarch_lasx_xvsra_b:
6538 case Intrinsic::loongarch_lasx_xvsra_h:
6539 case Intrinsic::loongarch_lasx_xvsra_w:
6540 case Intrinsic::loongarch_lasx_xvsra_d:
6541 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6542 truncateVecElts(N, DAG));
6543 case Intrinsic::loongarch_lsx_vsrai_b:
6544 case Intrinsic::loongarch_lasx_xvsrai_b:
6545 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6546 lowerVectorSplatImm<3>(N, 2, DAG));
6547 case Intrinsic::loongarch_lsx_vsrai_h:
6548 case Intrinsic::loongarch_lasx_xvsrai_h:
6549 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6550 lowerVectorSplatImm<4>(N, 2, DAG));
6551 case Intrinsic::loongarch_lsx_vsrai_w:
6552 case Intrinsic::loongarch_lasx_xvsrai_w:
6553 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6554 lowerVectorSplatImm<5>(N, 2, DAG));
6555 case Intrinsic::loongarch_lsx_vsrai_d:
6556 case Intrinsic::loongarch_lasx_xvsrai_d:
6557 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6558 lowerVectorSplatImm<6>(N, 2, DAG));
6559 case Intrinsic::loongarch_lsx_vclz_b:
6560 case Intrinsic::loongarch_lsx_vclz_h:
6561 case Intrinsic::loongarch_lsx_vclz_w:
6562 case Intrinsic::loongarch_lsx_vclz_d:
6563 case Intrinsic::loongarch_lasx_xvclz_b:
6564 case Intrinsic::loongarch_lasx_xvclz_h:
6565 case Intrinsic::loongarch_lasx_xvclz_w:
6566 case Intrinsic::loongarch_lasx_xvclz_d:
6567 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6568 case Intrinsic::loongarch_lsx_vpcnt_b:
6569 case Intrinsic::loongarch_lsx_vpcnt_h:
6570 case Intrinsic::loongarch_lsx_vpcnt_w:
6571 case Intrinsic::loongarch_lsx_vpcnt_d:
6572 case Intrinsic::loongarch_lasx_xvpcnt_b:
6573 case Intrinsic::loongarch_lasx_xvpcnt_h:
6574 case Intrinsic::loongarch_lasx_xvpcnt_w:
6575 case Intrinsic::loongarch_lasx_xvpcnt_d:
6576 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6577 case Intrinsic::loongarch_lsx_vbitclr_b:
6578 case Intrinsic::loongarch_lsx_vbitclr_h:
6579 case Intrinsic::loongarch_lsx_vbitclr_w:
6580 case Intrinsic::loongarch_lsx_vbitclr_d:
6581 case Intrinsic::loongarch_lasx_xvbitclr_b:
6582 case Intrinsic::loongarch_lasx_xvbitclr_h:
6583 case Intrinsic::loongarch_lasx_xvbitclr_w:
6584 case Intrinsic::loongarch_lasx_xvbitclr_d:
6585 return lowerVectorBitClear(N, DAG);
6586 case Intrinsic::loongarch_lsx_vbitclri_b:
6587 case Intrinsic::loongarch_lasx_xvbitclri_b:
6588 return lowerVectorBitClearImm<3>(N, DAG);
6589 case Intrinsic::loongarch_lsx_vbitclri_h:
6590 case Intrinsic::loongarch_lasx_xvbitclri_h:
6591 return lowerVectorBitClearImm<4>(N, DAG);
6592 case Intrinsic::loongarch_lsx_vbitclri_w:
6593 case Intrinsic::loongarch_lasx_xvbitclri_w:
6594 return lowerVectorBitClearImm<5>(N, DAG);
6595 case Intrinsic::loongarch_lsx_vbitclri_d:
6596 case Intrinsic::loongarch_lasx_xvbitclri_d:
6597 return lowerVectorBitClearImm<6>(N, DAG);
6598 case Intrinsic::loongarch_lsx_vbitset_b:
6599 case Intrinsic::loongarch_lsx_vbitset_h:
6600 case Intrinsic::loongarch_lsx_vbitset_w:
6601 case Intrinsic::loongarch_lsx_vbitset_d:
6602 case Intrinsic::loongarch_lasx_xvbitset_b:
6603 case Intrinsic::loongarch_lasx_xvbitset_h:
6604 case Intrinsic::loongarch_lasx_xvbitset_w:
6605 case Intrinsic::loongarch_lasx_xvbitset_d: {
6606 EVT VecTy = N->getValueType(0);
6607 SDValue One = DAG.getConstant(1, DL, VecTy);
6608 return DAG.getNode(
6609 ISD::OR, DL, VecTy, N->getOperand(1),
6610 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6611 }
6612 case Intrinsic::loongarch_lsx_vbitseti_b:
6613 case Intrinsic::loongarch_lasx_xvbitseti_b:
6614 return lowerVectorBitSetImm<3>(N, DAG);
6615 case Intrinsic::loongarch_lsx_vbitseti_h:
6616 case Intrinsic::loongarch_lasx_xvbitseti_h:
6617 return lowerVectorBitSetImm<4>(N, DAG);
6618 case Intrinsic::loongarch_lsx_vbitseti_w:
6619 case Intrinsic::loongarch_lasx_xvbitseti_w:
6620 return lowerVectorBitSetImm<5>(N, DAG);
6621 case Intrinsic::loongarch_lsx_vbitseti_d:
6622 case Intrinsic::loongarch_lasx_xvbitseti_d:
6623 return lowerVectorBitSetImm<6>(N, DAG);
6624 case Intrinsic::loongarch_lsx_vbitrev_b:
6625 case Intrinsic::loongarch_lsx_vbitrev_h:
6626 case Intrinsic::loongarch_lsx_vbitrev_w:
6627 case Intrinsic::loongarch_lsx_vbitrev_d:
6628 case Intrinsic::loongarch_lasx_xvbitrev_b:
6629 case Intrinsic::loongarch_lasx_xvbitrev_h:
6630 case Intrinsic::loongarch_lasx_xvbitrev_w:
6631 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6632 EVT VecTy = N->getValueType(0);
6633 SDValue One = DAG.getConstant(1, DL, VecTy);
6634 return DAG.getNode(
6635 ISD::XOR, DL, VecTy, N->getOperand(1),
6636 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6637 }
6638 case Intrinsic::loongarch_lsx_vbitrevi_b:
6639 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6640 return lowerVectorBitRevImm<3>(N, DAG);
6641 case Intrinsic::loongarch_lsx_vbitrevi_h:
6642 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6643 return lowerVectorBitRevImm<4>(N, DAG);
6644 case Intrinsic::loongarch_lsx_vbitrevi_w:
6645 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6646 return lowerVectorBitRevImm<5>(N, DAG);
6647 case Intrinsic::loongarch_lsx_vbitrevi_d:
6648 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6649 return lowerVectorBitRevImm<6>(N, DAG);
6650 case Intrinsic::loongarch_lsx_vfadd_s:
6651 case Intrinsic::loongarch_lsx_vfadd_d:
6652 case Intrinsic::loongarch_lasx_xvfadd_s:
6653 case Intrinsic::loongarch_lasx_xvfadd_d:
6654 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6655 N->getOperand(2));
6656 case Intrinsic::loongarch_lsx_vfsub_s:
6657 case Intrinsic::loongarch_lsx_vfsub_d:
6658 case Intrinsic::loongarch_lasx_xvfsub_s:
6659 case Intrinsic::loongarch_lasx_xvfsub_d:
6660 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6661 N->getOperand(2));
6662 case Intrinsic::loongarch_lsx_vfmul_s:
6663 case Intrinsic::loongarch_lsx_vfmul_d:
6664 case Intrinsic::loongarch_lasx_xvfmul_s:
6665 case Intrinsic::loongarch_lasx_xvfmul_d:
6666 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6667 N->getOperand(2));
6668 case Intrinsic::loongarch_lsx_vfdiv_s:
6669 case Intrinsic::loongarch_lsx_vfdiv_d:
6670 case Intrinsic::loongarch_lasx_xvfdiv_s:
6671 case Intrinsic::loongarch_lasx_xvfdiv_d:
6672 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6673 N->getOperand(2));
6674 case Intrinsic::loongarch_lsx_vfmadd_s:
6675 case Intrinsic::loongarch_lsx_vfmadd_d:
6676 case Intrinsic::loongarch_lasx_xvfmadd_s:
6677 case Intrinsic::loongarch_lasx_xvfmadd_d:
6678 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6679 N->getOperand(2), N->getOperand(3));
6680 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6681 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6682 N->getOperand(1), N->getOperand(2),
6683 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6684 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6685 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6686 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6687 N->getOperand(1), N->getOperand(2),
6688 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6689 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6690 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6691 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6692 N->getOperand(1), N->getOperand(2),
6693 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6694 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6695 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6696 N->getOperand(1), N->getOperand(2),
6697 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6698 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6699 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6700 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6701 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6702 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6703 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6704 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6705 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6706 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6707 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6708 N->getOperand(1)));
6709 case Intrinsic::loongarch_lsx_vreplve_b:
6710 case Intrinsic::loongarch_lsx_vreplve_h:
6711 case Intrinsic::loongarch_lsx_vreplve_w:
6712 case Intrinsic::loongarch_lsx_vreplve_d:
6713 case Intrinsic::loongarch_lasx_xvreplve_b:
6714 case Intrinsic::loongarch_lasx_xvreplve_h:
6715 case Intrinsic::loongarch_lasx_xvreplve_w:
6716 case Intrinsic::loongarch_lasx_xvreplve_d:
6717 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6718 N->getOperand(1),
6719 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6720 N->getOperand(2)));
6721 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6722 if (!Subtarget.is64Bit())
6723 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6724 break;
6725 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6726 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6727 if (!Subtarget.is64Bit())
6728 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6729 break;
6730 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6731 if (!Subtarget.is64Bit())
6732 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6733 break;
6734 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6735 if (!Subtarget.is64Bit())
6736 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6737 break;
6738 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6739 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6740 if (!Subtarget.is64Bit())
6741 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6742 break;
6743 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6744 if (!Subtarget.is64Bit())
6745 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6746 break;
6747 case Intrinsic::loongarch_lsx_bz_b:
6748 case Intrinsic::loongarch_lsx_bz_h:
6749 case Intrinsic::loongarch_lsx_bz_w:
6750 case Intrinsic::loongarch_lsx_bz_d:
6751 case Intrinsic::loongarch_lasx_xbz_b:
6752 case Intrinsic::loongarch_lasx_xbz_h:
6753 case Intrinsic::loongarch_lasx_xbz_w:
6754 case Intrinsic::loongarch_lasx_xbz_d:
6755 if (!Subtarget.is64Bit())
6756 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6757 N->getOperand(1));
6758 break;
6759 case Intrinsic::loongarch_lsx_bz_v:
6760 case Intrinsic::loongarch_lasx_xbz_v:
6761 if (!Subtarget.is64Bit())
6762 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6763 N->getOperand(1));
6764 break;
6765 case Intrinsic::loongarch_lsx_bnz_b:
6766 case Intrinsic::loongarch_lsx_bnz_h:
6767 case Intrinsic::loongarch_lsx_bnz_w:
6768 case Intrinsic::loongarch_lsx_bnz_d:
6769 case Intrinsic::loongarch_lasx_xbnz_b:
6770 case Intrinsic::loongarch_lasx_xbnz_h:
6771 case Intrinsic::loongarch_lasx_xbnz_w:
6772 case Intrinsic::loongarch_lasx_xbnz_d:
6773 if (!Subtarget.is64Bit())
6774 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6775 N->getOperand(1));
6776 break;
6777 case Intrinsic::loongarch_lsx_bnz_v:
6778 case Intrinsic::loongarch_lasx_xbnz_v:
6779 if (!Subtarget.is64Bit())
6780 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6781 N->getOperand(1));
6782 break;
6783 case Intrinsic::loongarch_lasx_concat_128_s:
6784 case Intrinsic::loongarch_lasx_concat_128_d:
6785 case Intrinsic::loongarch_lasx_concat_128:
6786 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6787 N->getOperand(1), N->getOperand(2));
6788 }
6789 return SDValue();
6790}
6791
6794 const LoongArchSubtarget &Subtarget) {
6795 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6796 // conversion is unnecessary and can be replaced with the
6797 // MOVFR2GR_S_LA64 operand.
6798 SDValue Op0 = N->getOperand(0);
6799 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6800 return Op0.getOperand(0);
6801 return SDValue();
6802}
6803
6806 const LoongArchSubtarget &Subtarget) {
6807 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6808 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6809 // operand.
6810 SDValue Op0 = N->getOperand(0);
6811 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6812 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6813 "Unexpected value type!");
6814 return Op0.getOperand(0);
6815 }
6816 return SDValue();
6817}
6818
6821 const LoongArchSubtarget &Subtarget) {
6822 MVT VT = N->getSimpleValueType(0);
6823 unsigned NumBits = VT.getScalarSizeInBits();
6824
6825 // Simplify the inputs.
6826 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6827 APInt DemandedMask(APInt::getAllOnes(NumBits));
6828 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6829 return SDValue(N, 0);
6830
6831 return SDValue();
6832}
6833
6834static SDValue
6837 const LoongArchSubtarget &Subtarget) {
6838 SDValue Op0 = N->getOperand(0);
6839 SDLoc DL(N);
6840
6841 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6842 // redundant. Instead, use BuildPairF64's operands directly.
6843 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6844 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6845
6846 if (Op0->isUndef()) {
6847 SDValue Lo = DAG.getUNDEF(MVT::i32);
6848 SDValue Hi = DAG.getUNDEF(MVT::i32);
6849 return DCI.CombineTo(N, Lo, Hi);
6850 }
6851
6852 // It's cheaper to materialise two 32-bit integers than to load a double
6853 // from the constant pool and transfer it to integer registers through the
6854 // stack.
6856 APInt V = C->getValueAPF().bitcastToAPInt();
6857 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6858 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6859 return DCI.CombineTo(N, Lo, Hi);
6860 }
6861
6862 return SDValue();
6863}
6864
6865/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6868 const LoongArchSubtarget &Subtarget) {
6869 SDValue N0 = N->getOperand(0);
6870 SDValue N1 = N->getOperand(1);
6871 MVT VT = N->getSimpleValueType(0);
6872 SDLoc DL(N);
6873
6874 // VANDN(undef, x) -> 0
6875 // VANDN(x, undef) -> 0
6876 if (N0.isUndef() || N1.isUndef())
6877 return DAG.getConstant(0, DL, VT);
6878
6879 // VANDN(0, x) -> x
6881 return N1;
6882
6883 // VANDN(x, 0) -> 0
6885 return DAG.getConstant(0, DL, VT);
6886
6887 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6889 return DAG.getNOT(DL, N0, VT);
6890
6891 // Turn VANDN back to AND if input is inverted.
6892 if (SDValue Not = isNOT(N0, DAG))
6893 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6894
6895 // Folds for better commutativity:
6896 if (N1->hasOneUse()) {
6897 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6898 if (SDValue Not = isNOT(N1, DAG))
6899 return DAG.getNOT(
6900 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6901
6902 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6903 // -> NOT(OR(x, SplatVector(-Imm))
6904 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6905 // gain benefits.
6906 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6907 N1.getOpcode() == ISD::BUILD_VECTOR) {
6908 if (SDValue SplatValue =
6909 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6910 if (!N1->isOnlyUserOf(SplatValue.getNode()))
6911 return SDValue();
6912
6913 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6914 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6915 SDValue Not =
6916 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6917 return DAG.getNOT(
6918 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6919 VT);
6920 }
6921 }
6922 }
6923 }
6924
6925 return SDValue();
6926}
6927
6930 const LoongArchSubtarget &Subtarget) {
6931 SDLoc DL(N);
6932 EVT VT = N->getValueType(0);
6933
6934 if (VT != MVT::f32 && VT != MVT::f64)
6935 return SDValue();
6936 if (VT == MVT::f32 && !Subtarget.hasBasicF())
6937 return SDValue();
6938 if (VT == MVT::f64 && !Subtarget.hasBasicD())
6939 return SDValue();
6940
6941 // Only optimize when the source and destination types have the same width.
6942 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
6943 return SDValue();
6944
6945 SDValue Src = N->getOperand(0);
6946 // If the result of an integer load is only used by an integer-to-float
6947 // conversion, use a fp load instead. This eliminates an integer-to-float-move
6948 // (movgr2fr) instruction.
6949 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
6950 // Do not change the width of a volatile load. This condition check is
6951 // inspired by AArch64.
6952 !cast<LoadSDNode>(Src)->isVolatile()) {
6953 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
6954 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
6955 LN0->getPointerInfo(), LN0->getAlign(),
6956 LN0->getMemOperand()->getFlags());
6957
6958 // Make sure successors of the original load stay after it by updating them
6959 // to use the new Chain.
6960 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
6961 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
6962 }
6963
6964 return SDValue();
6965}
6966
6968 DAGCombinerInfo &DCI) const {
6969 SelectionDAG &DAG = DCI.DAG;
6970 switch (N->getOpcode()) {
6971 default:
6972 break;
6973 case ISD::AND:
6974 return performANDCombine(N, DAG, DCI, Subtarget);
6975 case ISD::OR:
6976 return performORCombine(N, DAG, DCI, Subtarget);
6977 case ISD::SETCC:
6978 return performSETCCCombine(N, DAG, DCI, Subtarget);
6979 case ISD::SRL:
6980 return performSRLCombine(N, DAG, DCI, Subtarget);
6981 case ISD::BITCAST:
6982 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6983 case ISD::SINT_TO_FP:
6984 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
6985 case LoongArchISD::BITREV_W:
6986 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6987 case LoongArchISD::BR_CC:
6988 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6989 case LoongArchISD::SELECT_CC:
6990 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6992 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6993 case LoongArchISD::MOVGR2FR_W_LA64:
6994 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6995 case LoongArchISD::MOVFR2GR_S_LA64:
6996 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6997 case LoongArchISD::VMSKLTZ:
6998 case LoongArchISD::XVMSKLTZ:
6999 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7000 case LoongArchISD::SPLIT_PAIR_F64:
7001 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7002 case LoongArchISD::VANDN:
7003 return performVANDNCombine(N, DAG, DCI, Subtarget);
7004 }
7005 return SDValue();
7006}
7007
7010 if (!ZeroDivCheck)
7011 return MBB;
7012
7013 // Build instructions:
7014 // MBB:
7015 // div(or mod) $dst, $dividend, $divisor
7016 // bne $divisor, $zero, SinkMBB
7017 // BreakMBB:
7018 // break 7 // BRK_DIVZERO
7019 // SinkMBB:
7020 // fallthrough
7021 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7022 MachineFunction::iterator It = ++MBB->getIterator();
7023 MachineFunction *MF = MBB->getParent();
7024 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7025 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7026 MF->insert(It, BreakMBB);
7027 MF->insert(It, SinkMBB);
7028
7029 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7030 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7031 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7032
7033 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7034 DebugLoc DL = MI.getDebugLoc();
7035 MachineOperand &Divisor = MI.getOperand(2);
7036 Register DivisorReg = Divisor.getReg();
7037
7038 // MBB:
7039 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7040 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7041 .addReg(LoongArch::R0)
7042 .addMBB(SinkMBB);
7043 MBB->addSuccessor(BreakMBB);
7044 MBB->addSuccessor(SinkMBB);
7045
7046 // BreakMBB:
7047 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7048 // definition of BRK_DIVZERO.
7049 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7050 BreakMBB->addSuccessor(SinkMBB);
7051
7052 // Clear Divisor's kill flag.
7053 Divisor.setIsKill(false);
7054
7055 return SinkMBB;
7056}
7057
7058static MachineBasicBlock *
7060 const LoongArchSubtarget &Subtarget) {
7061 unsigned CondOpc;
7062 switch (MI.getOpcode()) {
7063 default:
7064 llvm_unreachable("Unexpected opcode");
7065 case LoongArch::PseudoVBZ:
7066 CondOpc = LoongArch::VSETEQZ_V;
7067 break;
7068 case LoongArch::PseudoVBZ_B:
7069 CondOpc = LoongArch::VSETANYEQZ_B;
7070 break;
7071 case LoongArch::PseudoVBZ_H:
7072 CondOpc = LoongArch::VSETANYEQZ_H;
7073 break;
7074 case LoongArch::PseudoVBZ_W:
7075 CondOpc = LoongArch::VSETANYEQZ_W;
7076 break;
7077 case LoongArch::PseudoVBZ_D:
7078 CondOpc = LoongArch::VSETANYEQZ_D;
7079 break;
7080 case LoongArch::PseudoVBNZ:
7081 CondOpc = LoongArch::VSETNEZ_V;
7082 break;
7083 case LoongArch::PseudoVBNZ_B:
7084 CondOpc = LoongArch::VSETALLNEZ_B;
7085 break;
7086 case LoongArch::PseudoVBNZ_H:
7087 CondOpc = LoongArch::VSETALLNEZ_H;
7088 break;
7089 case LoongArch::PseudoVBNZ_W:
7090 CondOpc = LoongArch::VSETALLNEZ_W;
7091 break;
7092 case LoongArch::PseudoVBNZ_D:
7093 CondOpc = LoongArch::VSETALLNEZ_D;
7094 break;
7095 case LoongArch::PseudoXVBZ:
7096 CondOpc = LoongArch::XVSETEQZ_V;
7097 break;
7098 case LoongArch::PseudoXVBZ_B:
7099 CondOpc = LoongArch::XVSETANYEQZ_B;
7100 break;
7101 case LoongArch::PseudoXVBZ_H:
7102 CondOpc = LoongArch::XVSETANYEQZ_H;
7103 break;
7104 case LoongArch::PseudoXVBZ_W:
7105 CondOpc = LoongArch::XVSETANYEQZ_W;
7106 break;
7107 case LoongArch::PseudoXVBZ_D:
7108 CondOpc = LoongArch::XVSETANYEQZ_D;
7109 break;
7110 case LoongArch::PseudoXVBNZ:
7111 CondOpc = LoongArch::XVSETNEZ_V;
7112 break;
7113 case LoongArch::PseudoXVBNZ_B:
7114 CondOpc = LoongArch::XVSETALLNEZ_B;
7115 break;
7116 case LoongArch::PseudoXVBNZ_H:
7117 CondOpc = LoongArch::XVSETALLNEZ_H;
7118 break;
7119 case LoongArch::PseudoXVBNZ_W:
7120 CondOpc = LoongArch::XVSETALLNEZ_W;
7121 break;
7122 case LoongArch::PseudoXVBNZ_D:
7123 CondOpc = LoongArch::XVSETALLNEZ_D;
7124 break;
7125 }
7126
7127 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7128 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7129 DebugLoc DL = MI.getDebugLoc();
7132
7133 MachineFunction *F = BB->getParent();
7134 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7135 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7136 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7137
7138 F->insert(It, FalseBB);
7139 F->insert(It, TrueBB);
7140 F->insert(It, SinkBB);
7141
7142 // Transfer the remainder of MBB and its successor edges to Sink.
7143 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7145
7146 // Insert the real instruction to BB.
7147 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7148 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7149
7150 // Insert branch.
7151 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7152 BB->addSuccessor(FalseBB);
7153 BB->addSuccessor(TrueBB);
7154
7155 // FalseBB.
7156 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7157 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7158 .addReg(LoongArch::R0)
7159 .addImm(0);
7160 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7161 FalseBB->addSuccessor(SinkBB);
7162
7163 // TrueBB.
7164 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7165 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7166 .addReg(LoongArch::R0)
7167 .addImm(1);
7168 TrueBB->addSuccessor(SinkBB);
7169
7170 // SinkBB: merge the results.
7171 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7172 MI.getOperand(0).getReg())
7173 .addReg(RD1)
7174 .addMBB(FalseBB)
7175 .addReg(RD2)
7176 .addMBB(TrueBB);
7177
7178 // The pseudo instruction is gone now.
7179 MI.eraseFromParent();
7180 return SinkBB;
7181}
7182
7183static MachineBasicBlock *
7185 const LoongArchSubtarget &Subtarget) {
7186 unsigned InsOp;
7187 unsigned BroadcastOp;
7188 unsigned HalfSize;
7189 switch (MI.getOpcode()) {
7190 default:
7191 llvm_unreachable("Unexpected opcode");
7192 case LoongArch::PseudoXVINSGR2VR_B:
7193 HalfSize = 16;
7194 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7195 InsOp = LoongArch::XVEXTRINS_B;
7196 break;
7197 case LoongArch::PseudoXVINSGR2VR_H:
7198 HalfSize = 8;
7199 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7200 InsOp = LoongArch::XVEXTRINS_H;
7201 break;
7202 }
7203 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7204 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7205 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7206 DebugLoc DL = MI.getDebugLoc();
7208 // XDst = vector_insert XSrc, Elt, Idx
7209 Register XDst = MI.getOperand(0).getReg();
7210 Register XSrc = MI.getOperand(1).getReg();
7211 Register Elt = MI.getOperand(2).getReg();
7212 unsigned Idx = MI.getOperand(3).getImm();
7213
7214 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7215 Idx < HalfSize) {
7216 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7217 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7218
7219 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7220 .addReg(XSrc, {}, LoongArch::sub_128);
7221 BuildMI(*BB, MI, DL,
7222 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7223 : LoongArch::VINSGR2VR_B),
7224 ScratchSubReg2)
7225 .addReg(ScratchSubReg1)
7226 .addReg(Elt)
7227 .addImm(Idx);
7228
7229 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7230 .addImm(0)
7231 .addReg(ScratchSubReg2)
7232 .addImm(LoongArch::sub_128);
7233 } else {
7234 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7235 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7236
7237 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7238
7239 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7240 .addReg(ScratchReg1)
7241 .addReg(XSrc)
7242 .addImm(Idx >= HalfSize ? 48 : 18);
7243
7244 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7245 .addReg(XSrc)
7246 .addReg(ScratchReg2)
7247 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7248 }
7249
7250 MI.eraseFromParent();
7251 return BB;
7252}
7253
7256 const LoongArchSubtarget &Subtarget) {
7257 assert(Subtarget.hasExtLSX());
7258 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7259 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7260 DebugLoc DL = MI.getDebugLoc();
7262 Register Dst = MI.getOperand(0).getReg();
7263 Register Src = MI.getOperand(1).getReg();
7264 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7265 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7266 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7267
7268 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7269 BuildMI(*BB, MI, DL,
7270 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7271 : LoongArch::VINSGR2VR_W),
7272 ScratchReg2)
7273 .addReg(ScratchReg1)
7274 .addReg(Src)
7275 .addImm(0);
7276 BuildMI(
7277 *BB, MI, DL,
7278 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7279 ScratchReg3)
7280 .addReg(ScratchReg2);
7281 BuildMI(*BB, MI, DL,
7282 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7283 : LoongArch::VPICKVE2GR_W),
7284 Dst)
7285 .addReg(ScratchReg3)
7286 .addImm(0);
7287
7288 MI.eraseFromParent();
7289 return BB;
7290}
7291
7292static MachineBasicBlock *
7294 const LoongArchSubtarget &Subtarget) {
7295 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7296 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7297 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7299 Register Dst = MI.getOperand(0).getReg();
7300 Register Src = MI.getOperand(1).getReg();
7301 DebugLoc DL = MI.getDebugLoc();
7302 unsigned EleBits = 8;
7303 unsigned NotOpc = 0;
7304 unsigned MskOpc;
7305
7306 switch (MI.getOpcode()) {
7307 default:
7308 llvm_unreachable("Unexpected opcode");
7309 case LoongArch::PseudoVMSKLTZ_B:
7310 MskOpc = LoongArch::VMSKLTZ_B;
7311 break;
7312 case LoongArch::PseudoVMSKLTZ_H:
7313 MskOpc = LoongArch::VMSKLTZ_H;
7314 EleBits = 16;
7315 break;
7316 case LoongArch::PseudoVMSKLTZ_W:
7317 MskOpc = LoongArch::VMSKLTZ_W;
7318 EleBits = 32;
7319 break;
7320 case LoongArch::PseudoVMSKLTZ_D:
7321 MskOpc = LoongArch::VMSKLTZ_D;
7322 EleBits = 64;
7323 break;
7324 case LoongArch::PseudoVMSKGEZ_B:
7325 MskOpc = LoongArch::VMSKGEZ_B;
7326 break;
7327 case LoongArch::PseudoVMSKEQZ_B:
7328 MskOpc = LoongArch::VMSKNZ_B;
7329 NotOpc = LoongArch::VNOR_V;
7330 break;
7331 case LoongArch::PseudoVMSKNEZ_B:
7332 MskOpc = LoongArch::VMSKNZ_B;
7333 break;
7334 case LoongArch::PseudoXVMSKLTZ_B:
7335 MskOpc = LoongArch::XVMSKLTZ_B;
7336 RC = &LoongArch::LASX256RegClass;
7337 break;
7338 case LoongArch::PseudoXVMSKLTZ_H:
7339 MskOpc = LoongArch::XVMSKLTZ_H;
7340 RC = &LoongArch::LASX256RegClass;
7341 EleBits = 16;
7342 break;
7343 case LoongArch::PseudoXVMSKLTZ_W:
7344 MskOpc = LoongArch::XVMSKLTZ_W;
7345 RC = &LoongArch::LASX256RegClass;
7346 EleBits = 32;
7347 break;
7348 case LoongArch::PseudoXVMSKLTZ_D:
7349 MskOpc = LoongArch::XVMSKLTZ_D;
7350 RC = &LoongArch::LASX256RegClass;
7351 EleBits = 64;
7352 break;
7353 case LoongArch::PseudoXVMSKGEZ_B:
7354 MskOpc = LoongArch::XVMSKGEZ_B;
7355 RC = &LoongArch::LASX256RegClass;
7356 break;
7357 case LoongArch::PseudoXVMSKEQZ_B:
7358 MskOpc = LoongArch::XVMSKNZ_B;
7359 NotOpc = LoongArch::XVNOR_V;
7360 RC = &LoongArch::LASX256RegClass;
7361 break;
7362 case LoongArch::PseudoXVMSKNEZ_B:
7363 MskOpc = LoongArch::XVMSKNZ_B;
7364 RC = &LoongArch::LASX256RegClass;
7365 break;
7366 }
7367
7368 Register Msk = MRI.createVirtualRegister(RC);
7369 if (NotOpc) {
7370 Register Tmp = MRI.createVirtualRegister(RC);
7371 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7372 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7373 .addReg(Tmp, RegState::Kill)
7374 .addReg(Tmp, RegState::Kill);
7375 } else {
7376 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7377 }
7378
7379 if (TRI->getRegSizeInBits(*RC) > 128) {
7380 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7381 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7382 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7383 .addReg(Msk)
7384 .addImm(0);
7385 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7386 .addReg(Msk, RegState::Kill)
7387 .addImm(4);
7388 BuildMI(*BB, MI, DL,
7389 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7390 : LoongArch::BSTRINS_W),
7391 Dst)
7394 .addImm(256 / EleBits - 1)
7395 .addImm(128 / EleBits);
7396 } else {
7397 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7398 .addReg(Msk, RegState::Kill)
7399 .addImm(0);
7400 }
7401
7402 MI.eraseFromParent();
7403 return BB;
7404}
7405
7406static MachineBasicBlock *
7408 const LoongArchSubtarget &Subtarget) {
7409 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7410 "Unexpected instruction");
7411
7412 MachineFunction &MF = *BB->getParent();
7413 DebugLoc DL = MI.getDebugLoc();
7415 Register LoReg = MI.getOperand(0).getReg();
7416 Register HiReg = MI.getOperand(1).getReg();
7417 Register SrcReg = MI.getOperand(2).getReg();
7418
7419 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7420 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7421 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7422 MI.eraseFromParent(); // The pseudo instruction is gone now.
7423 return BB;
7424}
7425
7426static MachineBasicBlock *
7428 const LoongArchSubtarget &Subtarget) {
7429 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7430 "Unexpected instruction");
7431
7432 MachineFunction &MF = *BB->getParent();
7433 DebugLoc DL = MI.getDebugLoc();
7436 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7437 Register DstReg = MI.getOperand(0).getReg();
7438 Register LoReg = MI.getOperand(1).getReg();
7439 Register HiReg = MI.getOperand(2).getReg();
7440
7441 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7442 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7443 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7444 .addReg(TmpReg, RegState::Kill)
7445 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7446 MI.eraseFromParent(); // The pseudo instruction is gone now.
7447 return BB;
7448}
7449
7451 switch (MI.getOpcode()) {
7452 default:
7453 return false;
7454 case LoongArch::Select_GPR_Using_CC_GPR:
7455 return true;
7456 }
7457}
7458
7459static MachineBasicBlock *
7461 const LoongArchSubtarget &Subtarget) {
7462 // To "insert" Select_* instructions, we actually have to insert the triangle
7463 // control-flow pattern. The incoming instructions know the destination vreg
7464 // to set, the condition code register to branch on, the true/false values to
7465 // select between, and the condcode to use to select the appropriate branch.
7466 //
7467 // We produce the following control flow:
7468 // HeadMBB
7469 // | \
7470 // | IfFalseMBB
7471 // | /
7472 // TailMBB
7473 //
7474 // When we find a sequence of selects we attempt to optimize their emission
7475 // by sharing the control flow. Currently we only handle cases where we have
7476 // multiple selects with the exact same condition (same LHS, RHS and CC).
7477 // The selects may be interleaved with other instructions if the other
7478 // instructions meet some requirements we deem safe:
7479 // - They are not pseudo instructions.
7480 // - They are debug instructions. Otherwise,
7481 // - They do not have side-effects, do not access memory and their inputs do
7482 // not depend on the results of the select pseudo-instructions.
7483 // The TrueV/FalseV operands of the selects cannot depend on the result of
7484 // previous selects in the sequence.
7485 // These conditions could be further relaxed. See the X86 target for a
7486 // related approach and more information.
7487
7488 Register LHS = MI.getOperand(1).getReg();
7489 Register RHS;
7490 if (MI.getOperand(2).isReg())
7491 RHS = MI.getOperand(2).getReg();
7492 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7493
7494 SmallVector<MachineInstr *, 4> SelectDebugValues;
7495 SmallSet<Register, 4> SelectDests;
7496 SelectDests.insert(MI.getOperand(0).getReg());
7497
7498 MachineInstr *LastSelectPseudo = &MI;
7499 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7500 SequenceMBBI != E; ++SequenceMBBI) {
7501 if (SequenceMBBI->isDebugInstr())
7502 continue;
7503 if (isSelectPseudo(*SequenceMBBI)) {
7504 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7505 !SequenceMBBI->getOperand(2).isReg() ||
7506 SequenceMBBI->getOperand(2).getReg() != RHS ||
7507 SequenceMBBI->getOperand(3).getImm() != CC ||
7508 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7509 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7510 break;
7511 LastSelectPseudo = &*SequenceMBBI;
7512 SequenceMBBI->collectDebugValues(SelectDebugValues);
7513 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7514 continue;
7515 }
7516 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7517 SequenceMBBI->mayLoadOrStore() ||
7518 SequenceMBBI->usesCustomInsertionHook())
7519 break;
7520 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7521 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7522 }))
7523 break;
7524 }
7525
7526 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7527 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7528 DebugLoc DL = MI.getDebugLoc();
7530
7531 MachineBasicBlock *HeadMBB = BB;
7532 MachineFunction *F = BB->getParent();
7533 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7534 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7535
7536 F->insert(I, IfFalseMBB);
7537 F->insert(I, TailMBB);
7538
7539 // Set the call frame size on entry to the new basic blocks.
7540 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7541 IfFalseMBB->setCallFrameSize(CallFrameSize);
7542 TailMBB->setCallFrameSize(CallFrameSize);
7543
7544 // Transfer debug instructions associated with the selects to TailMBB.
7545 for (MachineInstr *DebugInstr : SelectDebugValues) {
7546 TailMBB->push_back(DebugInstr->removeFromParent());
7547 }
7548
7549 // Move all instructions after the sequence to TailMBB.
7550 TailMBB->splice(TailMBB->end(), HeadMBB,
7551 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7552 // Update machine-CFG edges by transferring all successors of the current
7553 // block to the new block which will contain the Phi nodes for the selects.
7554 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7555 // Set the successors for HeadMBB.
7556 HeadMBB->addSuccessor(IfFalseMBB);
7557 HeadMBB->addSuccessor(TailMBB);
7558
7559 // Insert appropriate branch.
7560 if (MI.getOperand(2).isImm())
7561 BuildMI(HeadMBB, DL, TII.get(CC))
7562 .addReg(LHS)
7563 .addImm(MI.getOperand(2).getImm())
7564 .addMBB(TailMBB);
7565 else
7566 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7567
7568 // IfFalseMBB just falls through to TailMBB.
7569 IfFalseMBB->addSuccessor(TailMBB);
7570
7571 // Create PHIs for all of the select pseudo-instructions.
7572 auto SelectMBBI = MI.getIterator();
7573 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7574 auto InsertionPoint = TailMBB->begin();
7575 while (SelectMBBI != SelectEnd) {
7576 auto Next = std::next(SelectMBBI);
7577 if (isSelectPseudo(*SelectMBBI)) {
7578 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7579 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7580 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7581 .addReg(SelectMBBI->getOperand(4).getReg())
7582 .addMBB(HeadMBB)
7583 .addReg(SelectMBBI->getOperand(5).getReg())
7584 .addMBB(IfFalseMBB);
7585 SelectMBBI->eraseFromParent();
7586 }
7587 SelectMBBI = Next;
7588 }
7589
7590 F->getProperties().resetNoPHIs();
7591 return TailMBB;
7592}
7593
7594MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7595 MachineInstr &MI, MachineBasicBlock *BB) const {
7596 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7597 DebugLoc DL = MI.getDebugLoc();
7598
7599 switch (MI.getOpcode()) {
7600 default:
7601 llvm_unreachable("Unexpected instr type to insert");
7602 case LoongArch::DIV_W:
7603 case LoongArch::DIV_WU:
7604 case LoongArch::MOD_W:
7605 case LoongArch::MOD_WU:
7606 case LoongArch::DIV_D:
7607 case LoongArch::DIV_DU:
7608 case LoongArch::MOD_D:
7609 case LoongArch::MOD_DU:
7610 return insertDivByZeroTrap(MI, BB);
7611 break;
7612 case LoongArch::WRFCSR: {
7613 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7614 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7615 .addReg(MI.getOperand(1).getReg());
7616 MI.eraseFromParent();
7617 return BB;
7618 }
7619 case LoongArch::RDFCSR: {
7620 MachineInstr *ReadFCSR =
7621 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7622 MI.getOperand(0).getReg())
7623 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7624 ReadFCSR->getOperand(1).setIsUndef();
7625 MI.eraseFromParent();
7626 return BB;
7627 }
7628 case LoongArch::Select_GPR_Using_CC_GPR:
7629 return emitSelectPseudo(MI, BB, Subtarget);
7630 case LoongArch::BuildPairF64Pseudo:
7631 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7632 case LoongArch::SplitPairF64Pseudo:
7633 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7634 case LoongArch::PseudoVBZ:
7635 case LoongArch::PseudoVBZ_B:
7636 case LoongArch::PseudoVBZ_H:
7637 case LoongArch::PseudoVBZ_W:
7638 case LoongArch::PseudoVBZ_D:
7639 case LoongArch::PseudoVBNZ:
7640 case LoongArch::PseudoVBNZ_B:
7641 case LoongArch::PseudoVBNZ_H:
7642 case LoongArch::PseudoVBNZ_W:
7643 case LoongArch::PseudoVBNZ_D:
7644 case LoongArch::PseudoXVBZ:
7645 case LoongArch::PseudoXVBZ_B:
7646 case LoongArch::PseudoXVBZ_H:
7647 case LoongArch::PseudoXVBZ_W:
7648 case LoongArch::PseudoXVBZ_D:
7649 case LoongArch::PseudoXVBNZ:
7650 case LoongArch::PseudoXVBNZ_B:
7651 case LoongArch::PseudoXVBNZ_H:
7652 case LoongArch::PseudoXVBNZ_W:
7653 case LoongArch::PseudoXVBNZ_D:
7654 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7655 case LoongArch::PseudoXVINSGR2VR_B:
7656 case LoongArch::PseudoXVINSGR2VR_H:
7657 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7658 case LoongArch::PseudoCTPOP:
7659 return emitPseudoCTPOP(MI, BB, Subtarget);
7660 case LoongArch::PseudoVMSKLTZ_B:
7661 case LoongArch::PseudoVMSKLTZ_H:
7662 case LoongArch::PseudoVMSKLTZ_W:
7663 case LoongArch::PseudoVMSKLTZ_D:
7664 case LoongArch::PseudoVMSKGEZ_B:
7665 case LoongArch::PseudoVMSKEQZ_B:
7666 case LoongArch::PseudoVMSKNEZ_B:
7667 case LoongArch::PseudoXVMSKLTZ_B:
7668 case LoongArch::PseudoXVMSKLTZ_H:
7669 case LoongArch::PseudoXVMSKLTZ_W:
7670 case LoongArch::PseudoXVMSKLTZ_D:
7671 case LoongArch::PseudoXVMSKGEZ_B:
7672 case LoongArch::PseudoXVMSKEQZ_B:
7673 case LoongArch::PseudoXVMSKNEZ_B:
7674 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7675 case TargetOpcode::STATEPOINT:
7676 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7677 // while bl call instruction (where statepoint will be lowered at the
7678 // end) has implicit def. This def is early-clobber as it will be set at
7679 // the moment of the call and earlier than any use is read.
7680 // Add this implicit dead def here as a workaround.
7681 MI.addOperand(*MI.getMF(),
7683 LoongArch::R1, /*isDef*/ true,
7684 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7685 /*isUndef*/ false, /*isEarlyClobber*/ true));
7686 if (!Subtarget.is64Bit())
7687 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7688 return emitPatchPoint(MI, BB);
7689 }
7690}
7691
7693 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7694 unsigned *Fast) const {
7695 if (!Subtarget.hasUAL())
7696 return false;
7697
7698 // TODO: set reasonable speed number.
7699 if (Fast)
7700 *Fast = 1;
7701 return true;
7702}
7703
7704//===----------------------------------------------------------------------===//
7705// Calling Convention Implementation
7706//===----------------------------------------------------------------------===//
7707
7708// Eight general-purpose registers a0-a7 used for passing integer arguments,
7709// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7710// fixed-point arguments, and floating-point arguments when no FPR is available
7711// or with soft float ABI.
7712const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7713 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7714 LoongArch::R10, LoongArch::R11};
7715// Eight floating-point registers fa0-fa7 used for passing floating-point
7716// arguments, and fa0-fa1 are also used to return values.
7717const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7718 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7719 LoongArch::F6, LoongArch::F7};
7720// FPR32 and FPR64 alias each other.
7722 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7723 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7724
7725const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7726 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7727 LoongArch::VR6, LoongArch::VR7};
7728
7729const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7730 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7731 LoongArch::XR6, LoongArch::XR7};
7732
7733// Pass a 2*GRLen argument that has been split into two GRLen values through
7734// registers or the stack as necessary.
7735static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7736 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7737 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7738 ISD::ArgFlagsTy ArgFlags2) {
7739 unsigned GRLenInBytes = GRLen / 8;
7740 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7741 // At least one half can be passed via register.
7742 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7743 VA1.getLocVT(), CCValAssign::Full));
7744 } else {
7745 // Both halves must be passed on the stack, with proper alignment.
7746 Align StackAlign =
7747 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7748 State.addLoc(
7750 State.AllocateStack(GRLenInBytes, StackAlign),
7751 VA1.getLocVT(), CCValAssign::Full));
7752 State.addLoc(CCValAssign::getMem(
7753 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7754 LocVT2, CCValAssign::Full));
7755 return false;
7756 }
7757 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7758 // The second half can also be passed via register.
7759 State.addLoc(
7760 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7761 } else {
7762 // The second half is passed via the stack, without additional alignment.
7763 State.addLoc(CCValAssign::getMem(
7764 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7765 LocVT2, CCValAssign::Full));
7766 }
7767 return false;
7768}
7769
7770// Implements the LoongArch calling convention. Returns true upon failure.
7772 unsigned ValNo, MVT ValVT,
7773 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7774 CCState &State, bool IsRet, Type *OrigTy) {
7775 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7776 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7777 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7778 MVT LocVT = ValVT;
7779
7780 // Any return value split into more than two values can't be returned
7781 // directly.
7782 if (IsRet && ValNo > 1)
7783 return true;
7784
7785 // If passing a variadic argument, or if no FPR is available.
7786 bool UseGPRForFloat = true;
7787
7788 switch (ABI) {
7789 default:
7790 llvm_unreachable("Unexpected ABI");
7791 break;
7796 UseGPRForFloat = ArgFlags.isVarArg();
7797 break;
7800 break;
7801 }
7802
7803 // If this is a variadic argument, the LoongArch calling convention requires
7804 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7805 // byte alignment. An aligned register should be used regardless of whether
7806 // the original argument was split during legalisation or not. The argument
7807 // will not be passed by registers if the original type is larger than
7808 // 2*GRLen, so the register alignment rule does not apply.
7809 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7810 if (ArgFlags.isVarArg() &&
7811 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7812 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7813 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7814 // Skip 'odd' register if necessary.
7815 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7816 State.AllocateReg(ArgGPRs);
7817 }
7818
7819 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7820 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7821 State.getPendingArgFlags();
7822
7823 assert(PendingLocs.size() == PendingArgFlags.size() &&
7824 "PendingLocs and PendingArgFlags out of sync");
7825
7826 // FPR32 and FPR64 alias each other.
7827 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7828 UseGPRForFloat = true;
7829
7830 if (UseGPRForFloat && ValVT == MVT::f32) {
7831 LocVT = GRLenVT;
7832 LocInfo = CCValAssign::BCvt;
7833 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7834 LocVT = MVT::i64;
7835 LocInfo = CCValAssign::BCvt;
7836 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7837 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7838 // registers are exhausted.
7839 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7840 // Depending on available argument GPRS, f64 may be passed in a pair of
7841 // GPRs, split between a GPR and the stack, or passed completely on the
7842 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7843 // cases.
7844 MCRegister Reg = State.AllocateReg(ArgGPRs);
7845 if (!Reg) {
7846 int64_t StackOffset = State.AllocateStack(8, Align(8));
7847 State.addLoc(
7848 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7849 return false;
7850 }
7851 LocVT = MVT::i32;
7852 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7853 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7854 if (HiReg) {
7855 State.addLoc(
7856 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7857 } else {
7858 int64_t StackOffset = State.AllocateStack(4, Align(4));
7859 State.addLoc(
7860 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7861 }
7862 return false;
7863 }
7864
7865 // Split arguments might be passed indirectly, so keep track of the pending
7866 // values.
7867 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7868 LocVT = GRLenVT;
7869 LocInfo = CCValAssign::Indirect;
7870 PendingLocs.push_back(
7871 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7872 PendingArgFlags.push_back(ArgFlags);
7873 if (!ArgFlags.isSplitEnd()) {
7874 return false;
7875 }
7876 }
7877
7878 // If the split argument only had two elements, it should be passed directly
7879 // in registers or on the stack.
7880 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7881 PendingLocs.size() <= 2) {
7882 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7883 // Apply the normal calling convention rules to the first half of the
7884 // split argument.
7885 CCValAssign VA = PendingLocs[0];
7886 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7887 PendingLocs.clear();
7888 PendingArgFlags.clear();
7889 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7890 ArgFlags);
7891 }
7892
7893 // Allocate to a register if possible, or else a stack slot.
7894 Register Reg;
7895 unsigned StoreSizeBytes = GRLen / 8;
7896 Align StackAlign = Align(GRLen / 8);
7897
7898 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7899 Reg = State.AllocateReg(ArgFPR32s);
7900 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7901 Reg = State.AllocateReg(ArgFPR64s);
7902 } else if (ValVT.is128BitVector()) {
7903 Reg = State.AllocateReg(ArgVRs);
7904 UseGPRForFloat = false;
7905 StoreSizeBytes = 16;
7906 StackAlign = Align(16);
7907 } else if (ValVT.is256BitVector()) {
7908 Reg = State.AllocateReg(ArgXRs);
7909 UseGPRForFloat = false;
7910 StoreSizeBytes = 32;
7911 StackAlign = Align(32);
7912 } else {
7913 Reg = State.AllocateReg(ArgGPRs);
7914 }
7915
7916 unsigned StackOffset =
7917 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7918
7919 // If we reach this point and PendingLocs is non-empty, we must be at the
7920 // end of a split argument that must be passed indirectly.
7921 if (!PendingLocs.empty()) {
7922 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7923 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7924 for (auto &It : PendingLocs) {
7925 if (Reg)
7926 It.convertToReg(Reg);
7927 else
7928 It.convertToMem(StackOffset);
7929 State.addLoc(It);
7930 }
7931 PendingLocs.clear();
7932 PendingArgFlags.clear();
7933 return false;
7934 }
7935 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7936 "Expected an GRLenVT at this stage");
7937
7938 if (Reg) {
7939 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7940 return false;
7941 }
7942
7943 // When a floating-point value is passed on the stack, no bit-cast is needed.
7944 if (ValVT.isFloatingPoint()) {
7945 LocVT = ValVT;
7946 LocInfo = CCValAssign::Full;
7947 }
7948
7949 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7950 return false;
7951}
7952
7953void LoongArchTargetLowering::analyzeInputArgs(
7954 MachineFunction &MF, CCState &CCInfo,
7955 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7956 LoongArchCCAssignFn Fn) const {
7957 FunctionType *FType = MF.getFunction().getFunctionType();
7958 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7959 MVT ArgVT = Ins[i].VT;
7960 Type *ArgTy = nullptr;
7961 if (IsRet)
7962 ArgTy = FType->getReturnType();
7963 else if (Ins[i].isOrigArg())
7964 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7966 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7967 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7968 CCInfo, IsRet, ArgTy)) {
7969 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7970 << '\n');
7971 llvm_unreachable("");
7972 }
7973 }
7974}
7975
7976void LoongArchTargetLowering::analyzeOutputArgs(
7977 MachineFunction &MF, CCState &CCInfo,
7978 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7979 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7980 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7981 MVT ArgVT = Outs[i].VT;
7982 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7984 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7985 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7986 CCInfo, IsRet, OrigTy)) {
7987 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7988 << "\n");
7989 llvm_unreachable("");
7990 }
7991 }
7992}
7993
7994// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7995// values.
7997 const CCValAssign &VA, const SDLoc &DL) {
7998 switch (VA.getLocInfo()) {
7999 default:
8000 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8001 case CCValAssign::Full:
8003 break;
8004 case CCValAssign::BCvt:
8005 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8006 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8007 else
8008 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8009 break;
8010 }
8011 return Val;
8012}
8013
8015 const CCValAssign &VA, const SDLoc &DL,
8016 const ISD::InputArg &In,
8017 const LoongArchTargetLowering &TLI) {
8020 EVT LocVT = VA.getLocVT();
8021 SDValue Val;
8022 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8023 Register VReg = RegInfo.createVirtualRegister(RC);
8024 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8025 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8026
8027 // If input is sign extended from 32 bits, note it for the OptW pass.
8028 if (In.isOrigArg()) {
8029 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8030 if (OrigArg->getType()->isIntegerTy()) {
8031 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8032 // An input zero extended from i31 can also be considered sign extended.
8033 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8034 (BitWidth < 32 && In.Flags.isZExt())) {
8037 LAFI->addSExt32Register(VReg);
8038 }
8039 }
8040 }
8041
8042 return convertLocVTToValVT(DAG, Val, VA, DL);
8043}
8044
8045// The caller is responsible for loading the full value if the argument is
8046// passed with CCValAssign::Indirect.
8048 const CCValAssign &VA, const SDLoc &DL) {
8050 MachineFrameInfo &MFI = MF.getFrameInfo();
8051 EVT ValVT = VA.getValVT();
8052 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8053 /*IsImmutable=*/true);
8054 SDValue FIN = DAG.getFrameIndex(
8056
8057 ISD::LoadExtType ExtType;
8058 switch (VA.getLocInfo()) {
8059 default:
8060 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8061 case CCValAssign::Full:
8063 case CCValAssign::BCvt:
8064 ExtType = ISD::NON_EXTLOAD;
8065 break;
8066 }
8067 return DAG.getExtLoad(
8068 ExtType, DL, VA.getLocVT(), Chain, FIN,
8070}
8071
8073 const CCValAssign &VA,
8074 const CCValAssign &HiVA,
8075 const SDLoc &DL) {
8076 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8077 "Unexpected VA");
8079 MachineFrameInfo &MFI = MF.getFrameInfo();
8081
8082 assert(VA.isRegLoc() && "Expected register VA assignment");
8083
8084 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8085 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8086 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8087 SDValue Hi;
8088 if (HiVA.isMemLoc()) {
8089 // Second half of f64 is passed on the stack.
8090 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8091 /*IsImmutable=*/true);
8092 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8093 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8095 } else {
8096 // Second half of f64 is passed in another GPR.
8097 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8098 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8099 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8100 }
8101 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8102}
8103
8105 const CCValAssign &VA, const SDLoc &DL) {
8106 EVT LocVT = VA.getLocVT();
8107
8108 switch (VA.getLocInfo()) {
8109 default:
8110 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8111 case CCValAssign::Full:
8112 break;
8113 case CCValAssign::BCvt:
8114 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8115 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8116 else
8117 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8118 break;
8119 }
8120 return Val;
8121}
8122
8123static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8124 CCValAssign::LocInfo LocInfo,
8125 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8126 CCState &State) {
8127 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8128 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8129 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8130 static const MCPhysReg GPRList[] = {
8131 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8132 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8133 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8134 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8135 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8136 return false;
8137 }
8138 }
8139
8140 if (LocVT == MVT::f32) {
8141 // Pass in STG registers: F1, F2, F3, F4
8142 // fs0,fs1,fs2,fs3
8143 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8144 LoongArch::F26, LoongArch::F27};
8145 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8146 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8147 return false;
8148 }
8149 }
8150
8151 if (LocVT == MVT::f64) {
8152 // Pass in STG registers: D1, D2, D3, D4
8153 // fs4,fs5,fs6,fs7
8154 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8155 LoongArch::F30_64, LoongArch::F31_64};
8156 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8157 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8158 return false;
8159 }
8160 }
8161
8162 report_fatal_error("No registers left in GHC calling convention");
8163 return true;
8164}
8165
8166// Transform physical registers into virtual registers.
8168 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8169 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8170 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8171
8173 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8174
8175 switch (CallConv) {
8176 default:
8177 llvm_unreachable("Unsupported calling convention");
8178 case CallingConv::C:
8179 case CallingConv::Fast:
8181 break;
8182 case CallingConv::GHC:
8183 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8184 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8186 "GHC calling convention requires the F and D extensions");
8187 }
8188
8189 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8190 MVT GRLenVT = Subtarget.getGRLenVT();
8191 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8192 // Used with varargs to acumulate store chains.
8193 std::vector<SDValue> OutChains;
8194
8195 // Assign locations to all of the incoming arguments.
8197 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8198
8199 if (CallConv == CallingConv::GHC)
8201 else
8202 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8203
8204 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8205 CCValAssign &VA = ArgLocs[i];
8206 SDValue ArgValue;
8207 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8208 // case.
8209 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8210 assert(VA.needsCustom());
8211 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8212 } else if (VA.isRegLoc())
8213 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8214 else
8215 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8216 if (VA.getLocInfo() == CCValAssign::Indirect) {
8217 // If the original argument was split and passed by reference, we need to
8218 // load all parts of it here (using the same address).
8219 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8221 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8222 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8223 assert(ArgPartOffset == 0);
8224 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8225 CCValAssign &PartVA = ArgLocs[i + 1];
8226 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8227 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8228 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8229 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8231 ++i;
8232 ++InsIdx;
8233 }
8234 continue;
8235 }
8236 InVals.push_back(ArgValue);
8237 if (Ins[InsIdx].Flags.isByVal())
8238 LoongArchFI->addIncomingByValArgs(ArgValue);
8239 }
8240
8241 if (IsVarArg) {
8243 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8244 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8245 MachineFrameInfo &MFI = MF.getFrameInfo();
8246 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8247
8248 // Offset of the first variable argument from stack pointer, and size of
8249 // the vararg save area. For now, the varargs save area is either zero or
8250 // large enough to hold a0-a7.
8251 int VaArgOffset, VarArgsSaveSize;
8252
8253 // If all registers are allocated, then all varargs must be passed on the
8254 // stack and we don't need to save any argregs.
8255 if (ArgRegs.size() == Idx) {
8256 VaArgOffset = CCInfo.getStackSize();
8257 VarArgsSaveSize = 0;
8258 } else {
8259 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8260 VaArgOffset = -VarArgsSaveSize;
8261 }
8262
8263 // Record the frame index of the first variable argument
8264 // which is a value necessary to VASTART.
8265 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8266 LoongArchFI->setVarArgsFrameIndex(FI);
8267
8268 // If saving an odd number of registers then create an extra stack slot to
8269 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8270 // offsets to even-numbered registered remain 2*GRLen-aligned.
8271 if (Idx % 2) {
8272 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8273 true);
8274 VarArgsSaveSize += GRLenInBytes;
8275 }
8276
8277 // Copy the integer registers that may have been used for passing varargs
8278 // to the vararg save area.
8279 for (unsigned I = Idx; I < ArgRegs.size();
8280 ++I, VaArgOffset += GRLenInBytes) {
8281 const Register Reg = RegInfo.createVirtualRegister(RC);
8282 RegInfo.addLiveIn(ArgRegs[I], Reg);
8283 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8284 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8285 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8286 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8288 cast<StoreSDNode>(Store.getNode())
8289 ->getMemOperand()
8290 ->setValue((Value *)nullptr);
8291 OutChains.push_back(Store);
8292 }
8293 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8294 }
8295
8296 LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8297
8298 // All stores are grouped in one node to allow the matching between
8299 // the size of Ins and InVals. This only happens for vararg functions.
8300 if (!OutChains.empty()) {
8301 OutChains.push_back(Chain);
8302 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8303 }
8304
8305 return Chain;
8306}
8307
8309 return CI->isTailCall();
8310}
8311
8312// Check if the return value is used as only a return value, as otherwise
8313// we can't perform a tail-call.
8315 SDValue &Chain) const {
8316 if (N->getNumValues() != 1)
8317 return false;
8318 if (!N->hasNUsesOfValue(1, 0))
8319 return false;
8320
8321 SDNode *Copy = *N->user_begin();
8322 if (Copy->getOpcode() != ISD::CopyToReg)
8323 return false;
8324
8325 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8326 // isn't safe to perform a tail call.
8327 if (Copy->getGluedNode())
8328 return false;
8329
8330 // The copy must be used by a LoongArchISD::RET, and nothing else.
8331 bool HasRet = false;
8332 for (SDNode *Node : Copy->users()) {
8333 if (Node->getOpcode() != LoongArchISD::RET)
8334 return false;
8335 HasRet = true;
8336 }
8337
8338 if (!HasRet)
8339 return false;
8340
8341 Chain = Copy->getOperand(0);
8342 return true;
8343}
8344
8345// Check whether the call is eligible for tail call optimization.
8346bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8347 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8348 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8349
8350 auto CalleeCC = CLI.CallConv;
8351 auto &Outs = CLI.Outs;
8352 auto &Caller = MF.getFunction();
8353 auto CallerCC = Caller.getCallingConv();
8354 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8355
8356 // If the stack arguments for this call do not fit into our own save area then
8357 // the call cannot be made tail.
8358 if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8359 return false;
8360
8361 // Do not tail call opt if any parameters need to be passed indirectly.
8362 for (auto &VA : ArgLocs)
8363 if (VA.getLocInfo() == CCValAssign::Indirect)
8364 return false;
8365
8366 // Do not tail call opt if either caller or callee uses struct return
8367 // semantics.
8368 auto IsCallerStructRet = Caller.hasStructRetAttr();
8369 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8370 if (IsCallerStructRet != IsCalleeStructRet)
8371 return false;
8372
8373 // Do not tail call opt if caller's and callee's byval arguments do not match.
8374 for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
8375 if (!Outs[i].Flags.isByVal())
8376 continue;
8377 if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8378 return false;
8379 if (LoongArchFI->getIncomingByValArgs(i).getValueType() != Outs[i].ArgVT)
8380 return false;
8381 }
8382
8383 // The callee has to preserve all registers the caller needs to preserve.
8384 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8385 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8386 if (CalleeCC != CallerCC) {
8387 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8388 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8389 return false;
8390 }
8391
8392 // If the callee takes no arguments then go on to check the results of the
8393 // call.
8394 const MachineRegisterInfo &MRI = MF.getRegInfo();
8395 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8396 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
8397 return false;
8398
8399 return true;
8400}
8401
8403 return DAG.getDataLayout().getPrefTypeAlign(
8404 VT.getTypeForEVT(*DAG.getContext()));
8405}
8406
8407// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8408// and output parameter nodes.
8409SDValue
8411 SmallVectorImpl<SDValue> &InVals) const {
8412 SelectionDAG &DAG = CLI.DAG;
8413 SDLoc &DL = CLI.DL;
8415 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8417 SDValue Chain = CLI.Chain;
8418 SDValue Callee = CLI.Callee;
8419 CallingConv::ID CallConv = CLI.CallConv;
8420 bool IsVarArg = CLI.IsVarArg;
8421 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8422 MVT GRLenVT = Subtarget.getGRLenVT();
8423 bool &IsTailCall = CLI.IsTailCall;
8424
8426 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8427
8428 // Analyze the operands of the call, assigning locations to each operand.
8430 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8431
8432 if (CallConv == CallingConv::GHC)
8433 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8434 else
8435 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8436
8437 // Check if it's really possible to do a tail call.
8438 if (IsTailCall)
8439 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8440
8441 if (IsTailCall)
8442 ++NumTailCalls;
8443 else if (CLI.CB && CLI.CB->isMustTailCall())
8444 report_fatal_error("failed to perform tail call elimination on a call "
8445 "site marked musttail");
8446
8447 // Get a count of how many bytes are to be pushed on the stack.
8448 unsigned NumBytes = ArgCCInfo.getStackSize();
8449
8450 // Create local copies for byval args.
8451 SmallVector<SDValue> ByValArgs;
8452 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
8453 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8454 if (!Flags.isByVal())
8455 continue;
8456
8457 SDValue Arg = OutVals[i];
8458 unsigned Size = Flags.getByValSize();
8459 Align Alignment = Flags.getNonZeroByValAlign();
8460 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8461 SDValue Dst;
8462
8463 if (IsTailCall) {
8464 SDValue CallerArg = LoongArchFI->getIncomingByValArgs(j++);
8467 Dst = CallerArg;
8468 } else {
8469 int FI =
8470 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8471 Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8472 }
8473 if (Dst) {
8474 Chain =
8475 DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment,
8476 /*IsVolatile=*/false,
8477 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8479 ByValArgs.push_back(Dst);
8480 }
8481 }
8482
8483 if (!IsTailCall)
8484 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8485
8486 // During a tail call, stores to the argument area must happen after all of
8487 // the function's incoming arguments have been loaded because they may alias.
8488 // This is done by folding in a TokenFactor from LowerFormalArguments, but
8489 // there's no point in doing so repeatedly so this tracks whether that's
8490 // happened yet.
8491 bool AfterFormalArgLoads = false;
8492
8493 // Copy argument values to their designated locations.
8495 SmallVector<SDValue> MemOpChains;
8496 SDValue StackPtr;
8497 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8498 ++i, ++OutIdx) {
8499 CCValAssign &VA = ArgLocs[i];
8500 SDValue ArgValue = OutVals[OutIdx];
8501 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8502
8503 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8504 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8505 assert(VA.isRegLoc() && "Expected register VA assignment");
8506 assert(VA.needsCustom());
8507 SDValue SplitF64 =
8508 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8509 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8510 SDValue Lo = SplitF64.getValue(0);
8511 SDValue Hi = SplitF64.getValue(1);
8512
8513 Register RegLo = VA.getLocReg();
8514 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8515
8516 // Get the CCValAssign for the Hi part.
8517 CCValAssign &HiVA = ArgLocs[++i];
8518
8519 if (HiVA.isMemLoc()) {
8520 // Second half of f64 is passed on the stack.
8521 if (!StackPtr.getNode())
8522 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8524 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8525 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8526 // Emit the store.
8527 MemOpChains.push_back(DAG.getStore(
8528 Chain, DL, Hi, Address,
8530 } else {
8531 // Second half of f64 is passed in another GPR.
8532 Register RegHigh = HiVA.getLocReg();
8533 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8534 }
8535 continue;
8536 }
8537
8538 // Promote the value if needed.
8539 // For now, only handle fully promoted and indirect arguments.
8540 if (VA.getLocInfo() == CCValAssign::Indirect) {
8541 // Store the argument in a stack slot and pass its address.
8542 Align StackAlign =
8543 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8544 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8545 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8546 // If the original argument was split and passed by reference, we need to
8547 // store the required parts of it here (and pass just one address).
8548 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8549 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8550 assert(ArgPartOffset == 0);
8551 // Calculate the total size to store. We don't have access to what we're
8552 // actually storing other than performing the loop and collecting the
8553 // info.
8555 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8556 SDValue PartValue = OutVals[OutIdx + 1];
8557 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8558 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8559 EVT PartVT = PartValue.getValueType();
8560
8561 StoredSize += PartVT.getStoreSize();
8562 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8563 Parts.push_back(std::make_pair(PartValue, Offset));
8564 ++i;
8565 ++OutIdx;
8566 }
8567 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8568 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8569 MemOpChains.push_back(
8570 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8572 for (const auto &Part : Parts) {
8573 SDValue PartValue = Part.first;
8574 SDValue PartOffset = Part.second;
8576 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8577 MemOpChains.push_back(
8578 DAG.getStore(Chain, DL, PartValue, Address,
8580 }
8581 ArgValue = SpillSlot;
8582 } else {
8583 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8584 }
8585
8586 // Use local copy if it is a byval arg.
8587 if (Flags.isByVal()) {
8588 if (!IsTailCall || (isa<GlobalAddressSDNode>(ArgValue) ||
8589 isa<ExternalSymbolSDNode>(ArgValue) ||
8590 isa<FrameIndexSDNode>(ArgValue)))
8591 ArgValue = ByValArgs[j++];
8592 }
8593
8594 if (VA.isRegLoc()) {
8595 // Queue up the argument copies and emit them at the end.
8596 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8597 } else {
8598 assert(VA.isMemLoc() && "Argument not register or memory");
8599 SDValue DstAddr;
8600 MachinePointerInfo DstInfo;
8601 int32_t Offset = VA.getLocMemOffset();
8602
8603 // Work out the address of the stack slot.
8604 if (!StackPtr.getNode())
8605 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8606
8607 if (IsTailCall) {
8608 unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8);
8609 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
8610 DstAddr = DAG.getFrameIndex(FI, PtrVT);
8611 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8612 if (!AfterFormalArgLoads) {
8613 Chain = DAG.getStackArgumentTokenFactor(Chain);
8614 AfterFormalArgLoads = true;
8615 }
8616 } else {
8617 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
8618 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
8619 DstInfo = MachinePointerInfo::getStack(MF, Offset);
8620 }
8621
8622 // Emit the store.
8623 MemOpChains.push_back(
8624 DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
8625 }
8626 }
8627
8628 // Join the stores, which are independent of one another.
8629 if (!MemOpChains.empty())
8630 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8631
8632 SDValue Glue;
8633
8634 // Build a sequence of copy-to-reg nodes, chained and glued together.
8635 for (auto &Reg : RegsToPass) {
8636 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8637 Glue = Chain.getValue(1);
8638 }
8639
8640 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8641 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8642 // split it and then direct call can be matched by PseudoCALL_SMALL.
8644 const GlobalValue *GV = S->getGlobal();
8645 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8648 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8649 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8650 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8653 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8654 }
8655
8656 // The first call operand is the chain and the second is the target address.
8658 Ops.push_back(Chain);
8659 Ops.push_back(Callee);
8660
8661 // Add argument registers to the end of the list so that they are
8662 // known live into the call.
8663 for (auto &Reg : RegsToPass)
8664 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8665
8666 if (!IsTailCall) {
8667 // Add a register mask operand representing the call-preserved registers.
8668 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8669 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8670 assert(Mask && "Missing call preserved mask for calling convention");
8671 Ops.push_back(DAG.getRegisterMask(Mask));
8672 }
8673
8674 // Glue the call to the argument copies, if any.
8675 if (Glue.getNode())
8676 Ops.push_back(Glue);
8677
8678 // Emit the call.
8679 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8680 unsigned Op;
8681 switch (DAG.getTarget().getCodeModel()) {
8682 default:
8683 report_fatal_error("Unsupported code model");
8684 case CodeModel::Small:
8685 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8686 break;
8687 case CodeModel::Medium:
8688 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8689 break;
8690 case CodeModel::Large:
8691 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8692 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8693 break;
8694 }
8695
8696 if (IsTailCall) {
8698 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8699 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8700 return Ret;
8701 }
8702
8703 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8704 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8705 Glue = Chain.getValue(1);
8706
8707 // Mark the end of the call, which is glued to the call itself.
8708 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8709 Glue = Chain.getValue(1);
8710
8711 // Assign locations to each value returned by this call.
8713 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8714 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8715
8716 // Copy all of the result registers out of their specified physreg.
8717 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8718 auto &VA = RVLocs[i];
8719 // Copy the value out.
8720 SDValue RetValue =
8721 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8722 // Glue the RetValue to the end of the call sequence.
8723 Chain = RetValue.getValue(1);
8724 Glue = RetValue.getValue(2);
8725
8726 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8727 assert(VA.needsCustom());
8728 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8729 MVT::i32, Glue);
8730 Chain = RetValue2.getValue(1);
8731 Glue = RetValue2.getValue(2);
8732 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8733 RetValue, RetValue2);
8734 } else
8735 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8736
8737 InVals.push_back(RetValue);
8738 }
8739
8740 return Chain;
8741}
8742
8744 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8745 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8746 const Type *RetTy) const {
8748 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8749
8750 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8751 LoongArchABI::ABI ABI =
8752 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8753 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8754 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8755 return false;
8756 }
8757 return true;
8758}
8759
8761 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8763 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8764 SelectionDAG &DAG) const {
8765 // Stores the assignment of the return value to a location.
8767
8768 // Info about the registers and stack slot.
8769 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8770 *DAG.getContext());
8771
8772 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8773 nullptr, CC_LoongArch);
8774 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8775 report_fatal_error("GHC functions return void only");
8776 SDValue Glue;
8777 SmallVector<SDValue, 4> RetOps(1, Chain);
8778
8779 // Copy the result values into the output registers.
8780 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8781 SDValue Val = OutVals[OutIdx];
8782 CCValAssign &VA = RVLocs[i];
8783 assert(VA.isRegLoc() && "Can only return in registers!");
8784
8785 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8786 // Handle returning f64 on LA32D with a soft float ABI.
8787 assert(VA.isRegLoc() && "Expected return via registers");
8788 assert(VA.needsCustom());
8789 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8790 DAG.getVTList(MVT::i32, MVT::i32), Val);
8791 SDValue Lo = SplitF64.getValue(0);
8792 SDValue Hi = SplitF64.getValue(1);
8793 Register RegLo = VA.getLocReg();
8794 Register RegHi = RVLocs[++i].getLocReg();
8795
8796 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8797 Glue = Chain.getValue(1);
8798 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8799 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8800 Glue = Chain.getValue(1);
8801 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8802 } else {
8803 // Handle a 'normal' return.
8804 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8805 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8806
8807 // Guarantee that all emitted copies are stuck together.
8808 Glue = Chain.getValue(1);
8809 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8810 }
8811 }
8812
8813 RetOps[0] = Chain; // Update chain.
8814
8815 // Add the glue node if we have it.
8816 if (Glue.getNode())
8817 RetOps.push_back(Glue);
8818
8819 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8820}
8821
8822// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8823// Note: The following prefixes are excluded:
8824// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8825// as they can be represented using [x]vrepli.[whb]
8827 const APInt &SplatValue, const unsigned SplatBitSize) const {
8828 uint64_t RequiredImm = 0;
8829 uint64_t V = SplatValue.getZExtValue();
8830 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8831 // 4'b0101
8832 RequiredImm = (0b10101 << 8) | (V >> 8);
8833 return {true, RequiredImm};
8834 } else if (SplatBitSize == 32) {
8835 // 4'b0001
8836 if (!(V & 0xFFFF00FF)) {
8837 RequiredImm = (0b10001 << 8) | (V >> 8);
8838 return {true, RequiredImm};
8839 }
8840 // 4'b0010
8841 if (!(V & 0xFF00FFFF)) {
8842 RequiredImm = (0b10010 << 8) | (V >> 16);
8843 return {true, RequiredImm};
8844 }
8845 // 4'b0011
8846 if (!(V & 0x00FFFFFF)) {
8847 RequiredImm = (0b10011 << 8) | (V >> 24);
8848 return {true, RequiredImm};
8849 }
8850 // 4'b0110
8851 if ((V & 0xFFFF00FF) == 0xFF) {
8852 RequiredImm = (0b10110 << 8) | (V >> 8);
8853 return {true, RequiredImm};
8854 }
8855 // 4'b0111
8856 if ((V & 0xFF00FFFF) == 0xFFFF) {
8857 RequiredImm = (0b10111 << 8) | (V >> 16);
8858 return {true, RequiredImm};
8859 }
8860 // 4'b1010
8861 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8862 RequiredImm =
8863 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8864 return {true, RequiredImm};
8865 }
8866 } else if (SplatBitSize == 64) {
8867 // 4'b1011
8868 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8869 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8870 RequiredImm =
8871 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8872 return {true, RequiredImm};
8873 }
8874 // 4'b1100
8875 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8876 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8877 RequiredImm =
8878 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8879 return {true, RequiredImm};
8880 }
8881 // 4'b1001
8882 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8883 uint8_t res = 0;
8884 for (int i = 0; i < 8; ++i) {
8885 uint8_t byte = x & 0xFF;
8886 if (byte == 0 || byte == 0xFF)
8887 res |= ((byte & 1) << i);
8888 else
8889 return {false, 0};
8890 x >>= 8;
8891 }
8892 return {true, res};
8893 };
8894 auto [IsSame, Suffix] = sameBitsPreByte(V);
8895 if (IsSame) {
8896 RequiredImm = (0b11001 << 8) | Suffix;
8897 return {true, RequiredImm};
8898 }
8899 }
8900 return {false, RequiredImm};
8901}
8902
8904 EVT VT) const {
8905 if (!Subtarget.hasExtLSX())
8906 return false;
8907
8908 if (VT == MVT::f32) {
8909 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8910 return (masked == 0x3e000000 || masked == 0x40000000);
8911 }
8912
8913 if (VT == MVT::f64) {
8914 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8915 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8916 }
8917
8918 return false;
8919}
8920
8921bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8922 bool ForCodeSize) const {
8923 // TODO: Maybe need more checks here after vector extension is supported.
8924 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8925 return false;
8926 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8927 return false;
8928 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8929}
8930
8932 return true;
8933}
8934
8936 return true;
8937}
8938
8939bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8940 const Instruction *I) const {
8941 if (!Subtarget.is64Bit())
8942 return isa<LoadInst>(I) || isa<StoreInst>(I);
8943
8944 if (isa<LoadInst>(I))
8945 return true;
8946
8947 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8948 // require fences beacuse we can use amswap_db.[w/d].
8949 Type *Ty = I->getOperand(0)->getType();
8950 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8951 unsigned Size = Ty->getIntegerBitWidth();
8952 return (Size == 8 || Size == 16);
8953 }
8954
8955 return false;
8956}
8957
8959 LLVMContext &Context,
8960 EVT VT) const {
8961 if (!VT.isVector())
8962 return getPointerTy(DL);
8964}
8965
8967 EVT VT = Y.getValueType();
8968
8969 if (VT.isVector())
8970 return Subtarget.hasExtLSX() && VT.isInteger();
8971
8972 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8973}
8974
8976 const CallBase &I,
8977 MachineFunction &MF,
8978 unsigned Intrinsic) const {
8979 switch (Intrinsic) {
8980 default:
8981 return false;
8982 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8983 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8984 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8985 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8986 Info.opc = ISD::INTRINSIC_W_CHAIN;
8987 Info.memVT = MVT::i32;
8988 Info.ptrVal = I.getArgOperand(0);
8989 Info.offset = 0;
8990 Info.align = Align(4);
8993 return true;
8994 // TODO: Add more Intrinsics later.
8995 }
8996}
8997
8998// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8999// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9000// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9001// regression, we need to implement it manually.
9004
9006 Op == AtomicRMWInst::And) &&
9007 "Unable to expand");
9008 unsigned MinWordSize = 4;
9009
9010 IRBuilder<> Builder(AI);
9011 LLVMContext &Ctx = Builder.getContext();
9012 const DataLayout &DL = AI->getDataLayout();
9013 Type *ValueType = AI->getType();
9014 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9015
9016 Value *Addr = AI->getPointerOperand();
9017 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9018 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9019
9020 Value *AlignedAddr = Builder.CreateIntrinsic(
9021 Intrinsic::ptrmask, {PtrTy, IntTy},
9022 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9023 "AlignedAddr");
9024
9025 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9026 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9027 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9028 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9029 Value *Mask = Builder.CreateShl(
9030 ConstantInt::get(WordType,
9031 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9032 ShiftAmt, "Mask");
9033 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9034 Value *ValOperand_Shifted =
9035 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9036 ShiftAmt, "ValOperand_Shifted");
9037 Value *NewOperand;
9038 if (Op == AtomicRMWInst::And)
9039 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9040 else
9041 NewOperand = ValOperand_Shifted;
9042
9043 AtomicRMWInst *NewAI =
9044 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9045 AI->getOrdering(), AI->getSyncScopeID());
9046
9047 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9048 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9049 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9050 AI->replaceAllUsesWith(FinalOldResult);
9051 AI->eraseFromParent();
9052}
9053
9056 const AtomicRMWInst *AI) const {
9057 // TODO: Add more AtomicRMWInst that needs to be extended.
9058
9059 // Since floating-point operation requires a non-trivial set of data
9060 // operations, use CmpXChg to expand.
9061 if (AI->isFloatingPointOperation() ||
9067
9068 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9071 AI->getOperation() == AtomicRMWInst::Sub)) {
9073 }
9074
9075 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9076 if (Subtarget.hasLAMCAS()) {
9077 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9081 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9083 }
9084
9085 if (Size == 8 || Size == 16)
9088}
9089
9090static Intrinsic::ID
9092 AtomicRMWInst::BinOp BinOp) {
9093 if (GRLen == 64) {
9094 switch (BinOp) {
9095 default:
9096 llvm_unreachable("Unexpected AtomicRMW BinOp");
9098 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9099 case AtomicRMWInst::Add:
9100 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9101 case AtomicRMWInst::Sub:
9102 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9104 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9106 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9108 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9109 case AtomicRMWInst::Max:
9110 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9111 case AtomicRMWInst::Min:
9112 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9113 // TODO: support other AtomicRMWInst.
9114 }
9115 }
9116
9117 if (GRLen == 32) {
9118 switch (BinOp) {
9119 default:
9120 llvm_unreachable("Unexpected AtomicRMW BinOp");
9122 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9123 case AtomicRMWInst::Add:
9124 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9125 case AtomicRMWInst::Sub:
9126 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9128 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9130 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9132 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9133 case AtomicRMWInst::Max:
9134 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9135 case AtomicRMWInst::Min:
9136 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9137 // TODO: support other AtomicRMWInst.
9138 }
9139 }
9140
9141 llvm_unreachable("Unexpected GRLen\n");
9142}
9143
9146 const AtomicCmpXchgInst *CI) const {
9147
9148 if (Subtarget.hasLAMCAS())
9150
9152 if (Size == 8 || Size == 16)
9155}
9156
9158 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9159 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9160 unsigned GRLen = Subtarget.getGRLen();
9161 AtomicOrdering FailOrd = CI->getFailureOrdering();
9162 Value *FailureOrdering =
9163 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9164 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9165 if (GRLen == 64) {
9166 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9167 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9168 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9169 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9170 }
9171 Type *Tys[] = {AlignedAddr->getType()};
9172 Value *Result = Builder.CreateIntrinsic(
9173 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9174 if (GRLen == 64)
9175 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9176 return Result;
9177}
9178
9180 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9181 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9182 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9183 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9184 // mask, as this produces better code than the LL/SC loop emitted by
9185 // int_loongarch_masked_atomicrmw_xchg.
9186 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9189 if (CVal->isZero())
9190 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9191 Builder.CreateNot(Mask, "Inv_Mask"),
9192 AI->getAlign(), Ord);
9193 if (CVal->isMinusOne())
9194 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9195 AI->getAlign(), Ord);
9196 }
9197
9198 unsigned GRLen = Subtarget.getGRLen();
9199 Value *Ordering =
9200 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9201 Type *Tys[] = {AlignedAddr->getType()};
9203 AI->getModule(),
9205
9206 if (GRLen == 64) {
9207 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9208 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9209 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9210 }
9211
9212 Value *Result;
9213
9214 // Must pass the shift amount needed to sign extend the loaded value prior
9215 // to performing a signed comparison for min/max. ShiftAmt is the number of
9216 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9217 // is the number of bits to left+right shift the value in order to
9218 // sign-extend.
9219 if (AI->getOperation() == AtomicRMWInst::Min ||
9221 const DataLayout &DL = AI->getDataLayout();
9222 unsigned ValWidth =
9223 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9224 Value *SextShamt =
9225 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9226 Result = Builder.CreateCall(LlwOpScwLoop,
9227 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9228 } else {
9229 Result =
9230 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9231 }
9232
9233 if (GRLen == 64)
9234 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9235 return Result;
9236}
9237
9239 const MachineFunction &MF, EVT VT) const {
9240 VT = VT.getScalarType();
9241
9242 if (!VT.isSimple())
9243 return false;
9244
9245 switch (VT.getSimpleVT().SimpleTy) {
9246 case MVT::f32:
9247 case MVT::f64:
9248 return true;
9249 default:
9250 break;
9251 }
9252
9253 return false;
9254}
9255
9257 const Constant *PersonalityFn) const {
9258 return LoongArch::R4;
9259}
9260
9262 const Constant *PersonalityFn) const {
9263 return LoongArch::R5;
9264}
9265
9266//===----------------------------------------------------------------------===//
9267// Target Optimization Hooks
9268//===----------------------------------------------------------------------===//
9269
9271 const LoongArchSubtarget &Subtarget) {
9272 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9273 // IEEE float has 23 digits and double has 52 digits.
9274 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9275 return RefinementSteps;
9276}
9277
9279 SelectionDAG &DAG, int Enabled,
9280 int &RefinementSteps,
9281 bool &UseOneConstNR,
9282 bool Reciprocal) const {
9283 if (Subtarget.hasFrecipe()) {
9284 SDLoc DL(Operand);
9285 EVT VT = Operand.getValueType();
9286
9287 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9288 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9289 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9290 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9291 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9292
9293 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9294 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9295
9296 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9297 if (Reciprocal)
9298 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9299
9300 return Estimate;
9301 }
9302 }
9303
9304 return SDValue();
9305}
9306
9308 SelectionDAG &DAG,
9309 int Enabled,
9310 int &RefinementSteps) const {
9311 if (Subtarget.hasFrecipe()) {
9312 SDLoc DL(Operand);
9313 EVT VT = Operand.getValueType();
9314
9315 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9316 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9317 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9318 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9319 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9320
9321 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9322 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9323
9324 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9325 }
9326 }
9327
9328 return SDValue();
9329}
9330
9331//===----------------------------------------------------------------------===//
9332// LoongArch Inline Assembly Support
9333//===----------------------------------------------------------------------===//
9334
9336LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9337 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9338 //
9339 // 'f': A floating-point register (if available).
9340 // 'k': A memory operand whose address is formed by a base register and
9341 // (optionally scaled) index register.
9342 // 'l': A signed 16-bit constant.
9343 // 'm': A memory operand whose address is formed by a base register and
9344 // offset that is suitable for use in instructions with the same
9345 // addressing mode as st.w and ld.w.
9346 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9347 // instruction)
9348 // 'I': A signed 12-bit constant (for arithmetic instructions).
9349 // 'J': Integer zero.
9350 // 'K': An unsigned 12-bit constant (for logic instructions).
9351 // "ZB": An address that is held in a general-purpose register. The offset is
9352 // zero.
9353 // "ZC": A memory operand whose address is formed by a base register and
9354 // offset that is suitable for use in instructions with the same
9355 // addressing mode as ll.w and sc.w.
9356 if (Constraint.size() == 1) {
9357 switch (Constraint[0]) {
9358 default:
9359 break;
9360 case 'f':
9361 case 'q':
9362 return C_RegisterClass;
9363 case 'l':
9364 case 'I':
9365 case 'J':
9366 case 'K':
9367 return C_Immediate;
9368 case 'k':
9369 return C_Memory;
9370 }
9371 }
9372
9373 if (Constraint == "ZC" || Constraint == "ZB")
9374 return C_Memory;
9375
9376 // 'm' is handled here.
9377 return TargetLowering::getConstraintType(Constraint);
9378}
9379
9380InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9381 StringRef ConstraintCode) const {
9382 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9386 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9387}
9388
9389std::pair<unsigned, const TargetRegisterClass *>
9390LoongArchTargetLowering::getRegForInlineAsmConstraint(
9391 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9392 // First, see if this is a constraint that directly corresponds to a LoongArch
9393 // register class.
9394 if (Constraint.size() == 1) {
9395 switch (Constraint[0]) {
9396 case 'r':
9397 // TODO: Support fixed vectors up to GRLen?
9398 if (VT.isVector())
9399 break;
9400 return std::make_pair(0U, &LoongArch::GPRRegClass);
9401 case 'q':
9402 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9403 case 'f':
9404 if (Subtarget.hasBasicF() && VT == MVT::f32)
9405 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9406 if (Subtarget.hasBasicD() && VT == MVT::f64)
9407 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9408 if (Subtarget.hasExtLSX() &&
9409 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9410 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9411 if (Subtarget.hasExtLASX() &&
9412 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9413 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9414 break;
9415 default:
9416 break;
9417 }
9418 }
9419
9420 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9421 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9422 // constraints while the official register name is prefixed with a '$'. So we
9423 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9424 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9425 // case insensitive, so no need to convert the constraint to upper case here.
9426 //
9427 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9428 // decode the usage of register name aliases into their official names. And
9429 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9430 // official register names.
9431 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9432 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9433 bool IsFP = Constraint[2] == 'f';
9434 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9435 std::pair<unsigned, const TargetRegisterClass *> R;
9437 TRI, join_items("", Temp.first, Temp.second), VT);
9438 // Match those names to the widest floating point register type available.
9439 if (IsFP) {
9440 unsigned RegNo = R.first;
9441 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9442 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9443 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9444 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9445 }
9446 }
9447 }
9448 return R;
9449 }
9450
9451 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9452}
9453
9454void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9455 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9456 SelectionDAG &DAG) const {
9457 // Currently only support length 1 constraints.
9458 if (Constraint.size() == 1) {
9459 switch (Constraint[0]) {
9460 case 'l':
9461 // Validate & create a 16-bit signed immediate operand.
9462 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9463 uint64_t CVal = C->getSExtValue();
9464 if (isInt<16>(CVal))
9465 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9466 Subtarget.getGRLenVT()));
9467 }
9468 return;
9469 case 'I':
9470 // Validate & create a 12-bit signed immediate operand.
9471 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9472 uint64_t CVal = C->getSExtValue();
9473 if (isInt<12>(CVal))
9474 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9475 Subtarget.getGRLenVT()));
9476 }
9477 return;
9478 case 'J':
9479 // Validate & create an integer zero operand.
9480 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9481 if (C->getZExtValue() == 0)
9482 Ops.push_back(
9483 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9484 return;
9485 case 'K':
9486 // Validate & create a 12-bit unsigned immediate operand.
9487 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9488 uint64_t CVal = C->getZExtValue();
9489 if (isUInt<12>(CVal))
9490 Ops.push_back(
9491 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9492 }
9493 return;
9494 default:
9495 break;
9496 }
9497 }
9499}
9500
9501#define GET_REGISTER_MATCHER
9502#include "LoongArchGenAsmMatcher.inc"
9503
9506 const MachineFunction &MF) const {
9507 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9508 std::string NewRegName = Name.second.str();
9509 Register Reg = MatchRegisterAltName(NewRegName);
9510 if (!Reg)
9511 Reg = MatchRegisterName(NewRegName);
9512 if (!Reg)
9513 return Reg;
9514 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9515 if (!ReservedRegs.test(Reg))
9516 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9517 StringRef(RegName) + "\"."));
9518 return Reg;
9519}
9520
9522 EVT VT, SDValue C) const {
9523 // TODO: Support vectors.
9524 if (!VT.isScalarInteger())
9525 return false;
9526
9527 // Omit the optimization if the data size exceeds GRLen.
9528 if (VT.getSizeInBits() > Subtarget.getGRLen())
9529 return false;
9530
9531 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9532 const APInt &Imm = ConstNode->getAPIntValue();
9533 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9534 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9535 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9536 return true;
9537 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9538 if (ConstNode->hasOneUse() &&
9539 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9540 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9541 return true;
9542 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9543 // in which the immediate has two set bits. Or Break (MUL x, imm)
9544 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9545 // equals to (1 << s0) - (1 << s1).
9546 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9547 unsigned Shifts = Imm.countr_zero();
9548 // Reject immediates which can be composed via a single LUI.
9549 if (Shifts >= 12)
9550 return false;
9551 // Reject multiplications can be optimized to
9552 // (SLLI (ALSL x, x, 1/2/3/4), s).
9553 APInt ImmPop = Imm.ashr(Shifts);
9554 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9555 return false;
9556 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9557 // since it needs one more instruction than other 3 cases.
9558 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9559 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9560 (ImmSmall - Imm).isPowerOf2())
9561 return true;
9562 }
9563 }
9564
9565 return false;
9566}
9567
9569 const AddrMode &AM,
9570 Type *Ty, unsigned AS,
9571 Instruction *I) const {
9572 // LoongArch has four basic addressing modes:
9573 // 1. reg
9574 // 2. reg + 12-bit signed offset
9575 // 3. reg + 14-bit signed offset left-shifted by 2
9576 // 4. reg1 + reg2
9577 // TODO: Add more checks after support vector extension.
9578
9579 // No global is ever allowed as a base.
9580 if (AM.BaseGV)
9581 return false;
9582
9583 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9584 // with `UAL` feature.
9585 if (!isInt<12>(AM.BaseOffs) &&
9586 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9587 return false;
9588
9589 switch (AM.Scale) {
9590 case 0:
9591 // "r+i" or just "i", depending on HasBaseReg.
9592 break;
9593 case 1:
9594 // "r+r+i" is not allowed.
9595 if (AM.HasBaseReg && AM.BaseOffs)
9596 return false;
9597 // Otherwise we have "r+r" or "r+i".
9598 break;
9599 case 2:
9600 // "2*r+r" or "2*r+i" is not allowed.
9601 if (AM.HasBaseReg || AM.BaseOffs)
9602 return false;
9603 // Allow "2*r" as "r+r".
9604 break;
9605 default:
9606 return false;
9607 }
9608
9609 return true;
9610}
9611
9613 return isInt<12>(Imm);
9614}
9615
9617 return isInt<12>(Imm);
9618}
9619
9621 // Zexts are free if they can be combined with a load.
9622 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9623 // poorly with type legalization of compares preferring sext.
9624 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9625 EVT MemVT = LD->getMemoryVT();
9626 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9627 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9628 LD->getExtensionType() == ISD::ZEXTLOAD))
9629 return true;
9630 }
9631
9632 return TargetLowering::isZExtFree(Val, VT2);
9633}
9634
9636 EVT DstVT) const {
9637 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9638}
9639
9641 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9642}
9643
9645 // TODO: Support vectors.
9646 if (Y.getValueType().isVector())
9647 return false;
9648
9649 return !isa<ConstantSDNode>(Y);
9650}
9651
9653 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9654 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9655}
9656
9658 Type *Ty, bool IsSigned) const {
9659 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9660 return true;
9661
9662 return IsSigned;
9663}
9664
9666 // Return false to suppress the unnecessary extensions if the LibCall
9667 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9668 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9669 Type.getSizeInBits() < Subtarget.getGRLen()))
9670 return false;
9671 return true;
9672}
9673
9674// memcpy, and other memory intrinsics, typically tries to use wider load/store
9675// if the source/dest is aligned and the copy size is large enough. We therefore
9676// want to align such objects passed to memory intrinsics.
9678 unsigned &MinSize,
9679 Align &PrefAlign) const {
9680 if (!isa<MemIntrinsic>(CI))
9681 return false;
9682
9683 if (Subtarget.is64Bit()) {
9684 MinSize = 8;
9685 PrefAlign = Align(8);
9686 } else {
9687 MinSize = 4;
9688 PrefAlign = Align(4);
9689 }
9690
9691 return true;
9692}
9693
9702
9703bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9704 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9705 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9706 bool IsABIRegCopy = CC.has_value();
9707 EVT ValueVT = Val.getValueType();
9708
9709 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9710 PartVT == MVT::f32) {
9711 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9712 // nan, and cast to f32.
9713 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9714 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9715 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9716 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9717 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9718 Parts[0] = Val;
9719 return true;
9720 }
9721
9722 return false;
9723}
9724
9725SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9726 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9727 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9728 bool IsABIRegCopy = CC.has_value();
9729
9730 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9731 PartVT == MVT::f32) {
9732 SDValue Val = Parts[0];
9733
9734 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9735 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9736 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9737 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9738 return Val;
9739 }
9740
9741 return SDValue();
9742}
9743
9744MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9745 CallingConv::ID CC,
9746 EVT VT) const {
9747 // Use f32 to pass f16.
9748 if (VT == MVT::f16 && Subtarget.hasBasicF())
9749 return MVT::f32;
9750
9752}
9753
9754unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9755 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9756 // Use f32 to pass f16.
9757 if (VT == MVT::f16 && Subtarget.hasBasicF())
9758 return 1;
9759
9761}
9762
9764 SDValue Op, const APInt &OriginalDemandedBits,
9765 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9766 unsigned Depth) const {
9767 EVT VT = Op.getValueType();
9768 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9769 unsigned Opc = Op.getOpcode();
9770 switch (Opc) {
9771 default:
9772 break;
9773 case LoongArchISD::VMSKLTZ:
9774 case LoongArchISD::XVMSKLTZ: {
9775 SDValue Src = Op.getOperand(0);
9776 MVT SrcVT = Src.getSimpleValueType();
9777 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9778 unsigned NumElts = SrcVT.getVectorNumElements();
9779
9780 // If we don't need the sign bits at all just return zero.
9781 if (OriginalDemandedBits.countr_zero() >= NumElts)
9782 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9783
9784 // Only demand the vector elements of the sign bits we need.
9785 APInt KnownUndef, KnownZero;
9786 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9787 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9788 TLO, Depth + 1))
9789 return true;
9790
9791 Known.Zero = KnownZero.zext(BitWidth);
9792 Known.Zero.setHighBits(BitWidth - NumElts);
9793
9794 // [X]VMSKLTZ only uses the MSB from each vector element.
9795 KnownBits KnownSrc;
9796 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9797 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9798 Depth + 1))
9799 return true;
9800
9801 if (KnownSrc.One[SrcBits - 1])
9802 Known.One.setLowBits(NumElts);
9803 else if (KnownSrc.Zero[SrcBits - 1])
9804 Known.Zero.setLowBits(NumElts);
9805
9806 // Attempt to avoid multi-use ops if we don't need anything from it.
9808 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9809 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9810 return false;
9811 }
9812 }
9813
9815 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9816}
9817
9819 unsigned Opc = VecOp.getOpcode();
9820
9821 // Assume target opcodes can't be scalarized.
9822 // TODO - do we have any exceptions?
9823 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9824 return false;
9825
9826 // If the vector op is not supported, try to convert to scalar.
9827 EVT VecVT = VecOp.getValueType();
9829 return true;
9830
9831 // If the vector op is supported, but the scalar op is not, the transform may
9832 // not be worthwhile.
9833 EVT ScalarVT = VecVT.getScalarType();
9834 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9835}
9836
9838 unsigned Index) const {
9840 return false;
9841
9842 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9843 return Index == 0;
9844}
9845
9847 unsigned Index) const {
9848 EVT EltVT = VT.getScalarType();
9849
9850 // Extract a scalar FP value from index 0 of a vector is free.
9851 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9852}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1495
bool isZero() const
Definition APFloat.h:1508
APInt bitcastToAPInt() const
Definition APFloat.h:1416
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1400
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1648
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1397
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:890
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2772
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...