LLVM 23.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
127
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
188
192 Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
206 }
207
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit())
250
251 if (!Subtarget.hasBasicD()) {
253 if (Subtarget.is64Bit()) {
256 }
257 }
258 }
259
260 // Set operations for 'D' feature.
261
262 if (Subtarget.hasBasicD()) {
263 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
264 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
266 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
267 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
269 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
270
290 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 Subtarget.isSoftFPABI() ? LibCall : Custom);
294
295 if (Subtarget.is64Bit())
297 }
298
299 // Set operations for 'LSX' feature.
300
301 if (Subtarget.hasExtLSX()) {
303 // Expand all truncating stores and extending loads.
304 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
305 setTruncStoreAction(VT, InnerVT, Expand);
308 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
309 }
310 // By default everything must be expanded. Then we will selectively turn
311 // on ones that can be effectively codegen'd.
312 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
314 }
315
316 for (MVT VT : LSXVTs) {
320
324
329 }
330 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
333 Legal);
335 VT, Legal);
342 Expand);
353 }
354 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
356 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
358 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
361 }
362 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 VT, Expand);
378 }
380 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
381 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
382 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
383 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
384
385 for (MVT VT :
386 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
387 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
397 }
398 }
399
400 // Set operations for 'LASX' feature.
401
402 if (Subtarget.hasExtLASX()) {
403 for (MVT VT : LASXVTs) {
407
413
417 }
418 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
421 Legal);
423 VT, Legal);
430 Expand);
442 }
443 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
445 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
447 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
450 }
451 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
459 VT, Expand);
467 }
468 }
469
470 // Set DAG combine for LA32 and LA64.
471 if (Subtarget.hasBasicF()) {
473 }
474
479
480 // Set DAG combine for 'LSX' feature.
481
482 if (Subtarget.hasExtLSX()) {
485 }
486
487 // Set DAG combine for 'LASX' feature.
488
489 if (Subtarget.hasExtLASX())
491
492 // Compute derived properties from the register classes.
493 computeRegisterProperties(Subtarget.getRegisterInfo());
494
496
499
500 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
501
503
504 // Function alignments.
506 // Set preferred alignments.
507 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
508 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
509 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
510
511 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
512 if (Subtarget.hasLAMCAS())
514
515 if (Subtarget.hasSCQ()) {
518 }
519
520 // Disable strict node mutation.
521 IsStrictFPEnabled = true;
522}
523
525 const GlobalAddressSDNode *GA) const {
526 // In order to maximise the opportunity for common subexpression elimination,
527 // keep a separate ADD node for the global address offset instead of folding
528 // it in the global address node. Later peephole optimisations may choose to
529 // fold it back in when profitable.
530 return false;
531}
532
534 SelectionDAG &DAG) const {
535 switch (Op.getOpcode()) {
537 return lowerATOMIC_FENCE(Op, DAG);
539 return lowerEH_DWARF_CFA(Op, DAG);
541 return lowerGlobalAddress(Op, DAG);
543 return lowerGlobalTLSAddress(Op, DAG);
545 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
547 return lowerINTRINSIC_W_CHAIN(Op, DAG);
549 return lowerINTRINSIC_VOID(Op, DAG);
551 return lowerBlockAddress(Op, DAG);
552 case ISD::JumpTable:
553 return lowerJumpTable(Op, DAG);
554 case ISD::SHL_PARTS:
555 return lowerShiftLeftParts(Op, DAG);
556 case ISD::SRA_PARTS:
557 return lowerShiftRightParts(Op, DAG, true);
558 case ISD::SRL_PARTS:
559 return lowerShiftRightParts(Op, DAG, false);
561 return lowerConstantPool(Op, DAG);
562 case ISD::FP_TO_SINT:
563 return lowerFP_TO_SINT(Op, DAG);
564 case ISD::BITCAST:
565 return lowerBITCAST(Op, DAG);
566 case ISD::UINT_TO_FP:
567 return lowerUINT_TO_FP(Op, DAG);
568 case ISD::SINT_TO_FP:
569 return lowerSINT_TO_FP(Op, DAG);
570 case ISD::VASTART:
571 return lowerVASTART(Op, DAG);
572 case ISD::FRAMEADDR:
573 return lowerFRAMEADDR(Op, DAG);
574 case ISD::RETURNADDR:
575 return lowerRETURNADDR(Op, DAG);
577 return lowerWRITE_REGISTER(Op, DAG);
579 return lowerINSERT_VECTOR_ELT(Op, DAG);
581 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
583 return lowerBUILD_VECTOR(Op, DAG);
585 return lowerCONCAT_VECTORS(Op, DAG);
587 return lowerVECTOR_SHUFFLE(Op, DAG);
588 case ISD::BITREVERSE:
589 return lowerBITREVERSE(Op, DAG);
591 return lowerSCALAR_TO_VECTOR(Op, DAG);
592 case ISD::PREFETCH:
593 return lowerPREFETCH(Op, DAG);
594 case ISD::SELECT:
595 return lowerSELECT(Op, DAG);
596 case ISD::BRCOND:
597 return lowerBRCOND(Op, DAG);
598 case ISD::FP_TO_FP16:
599 return lowerFP_TO_FP16(Op, DAG);
600 case ISD::FP16_TO_FP:
601 return lowerFP16_TO_FP(Op, DAG);
602 case ISD::FP_TO_BF16:
603 return lowerFP_TO_BF16(Op, DAG);
604 case ISD::BF16_TO_FP:
605 return lowerBF16_TO_FP(Op, DAG);
607 return lowerVECREDUCE_ADD(Op, DAG);
608 case ISD::ROTL:
609 case ISD::ROTR:
610 return lowerRotate(Op, DAG);
618 return lowerVECREDUCE(Op, DAG);
619 case ISD::ConstantFP:
620 return lowerConstantFP(Op, DAG);
621 }
622 return SDValue();
623}
624
625// Helper to attempt to return a cheaper, bit-inverted version of \p V.
627 // TODO: don't always ignore oneuse constraints.
628 V = peekThroughBitcasts(V);
629 EVT VT = V.getValueType();
630
631 // Match not(xor X, -1) -> X.
632 if (V.getOpcode() == ISD::XOR &&
633 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
634 isAllOnesConstant(V.getOperand(1))))
635 return V.getOperand(0);
636
637 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
638 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
639 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
640 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
641 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
642 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
643 V.getOperand(1));
644 }
645 }
646
647 // Match not(SplatVector(not(X)) -> SplatVector(X).
648 if (V.getOpcode() == ISD::BUILD_VECTOR) {
649 if (SDValue SplatValue =
650 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
651 if (!V->isOnlyUserOf(SplatValue.getNode()))
652 return SDValue();
653
654 if (SDValue Not = isNOT(SplatValue, DAG)) {
655 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
656 return DAG.getSplat(VT, SDLoc(Not), Not);
657 }
658 }
659 }
660
661 // Match not(or(not(X),not(Y))) -> and(X, Y).
662 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
663 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
664 // TODO: Handle cases with single NOT operand -> VANDN
665 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
666 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
667 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
668 DAG.getBitcast(VT, Op1));
669 }
670
671 // TODO: Add more matching patterns. Such as,
672 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
673 // not(slt(C, X)) -> slt(X - 1, C)
674
675 return SDValue();
676}
677
678SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
679 SelectionDAG &DAG) const {
680 EVT VT = Op.getValueType();
681 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
682 const APFloat &FPVal = CFP->getValueAPF();
683 SDLoc DL(CFP);
684
685 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
686 (VT == MVT::f64 && Subtarget.hasBasicD()));
687
688 // If value is 0.0 or -0.0, just ignore it.
689 if (FPVal.isZero())
690 return SDValue();
691
692 // If lsx enabled, use cheaper 'vldi' instruction if possible.
693 if (isFPImmVLDILegal(FPVal, VT))
694 return SDValue();
695
696 // Construct as integer, and move to float register.
697 APInt INTVal = FPVal.bitcastToAPInt();
698
699 // If more than MaterializeFPImmInsNum instructions will be used to
700 // generate the INTVal and move it to float register, fallback to
701 // use floating point load from the constant pool.
703 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
704 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
705 return SDValue();
706
707 switch (VT.getSimpleVT().SimpleTy) {
708 default:
709 llvm_unreachable("Unexpected floating point type!");
710 break;
711 case MVT::f32: {
712 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
713 if (Subtarget.is64Bit())
714 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
715 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
716 : LoongArchISD::MOVGR2FR_W,
717 DL, VT, NewVal);
718 }
719 case MVT::f64: {
720 if (Subtarget.is64Bit()) {
721 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
722 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
723 }
724 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
725 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
727 }
728 }
729
730 return SDValue();
731}
732
733// Lower vecreduce_add using vhaddw instructions.
734// For Example:
735// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
736// can be lowered to:
737// VHADDW_D_W vr0, vr0, vr0
738// VHADDW_Q_D vr0, vr0, vr0
739// VPICKVE2GR_D a0, vr0, 0
740// ADDI_W a0, a0, 0
741SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
742 SelectionDAG &DAG) const {
743
744 SDLoc DL(Op);
745 MVT OpVT = Op.getSimpleValueType();
746 SDValue Val = Op.getOperand(0);
747
748 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
749 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
750 unsigned ResBits = OpVT.getScalarSizeInBits();
751
752 unsigned LegalVecSize = 128;
753 bool isLASX256Vector =
754 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
755
756 // Ensure operand type legal or enable it legal.
757 while (!isTypeLegal(Val.getSimpleValueType())) {
758 Val = DAG.WidenVector(Val, DL);
759 }
760
761 // NumEles is designed for iterations count, v4i32 for LSX
762 // and v8i32 for LASX should have the same count.
763 if (isLASX256Vector) {
764 NumEles /= 2;
765 LegalVecSize = 256;
766 }
767
768 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
769 MVT IntTy = MVT::getIntegerVT(EleBits);
770 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
771 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
772 }
773
774 if (isLASX256Vector) {
775 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
776 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
777 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
778 }
779
780 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
781 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
782 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
783}
784
785// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
786// For Example:
787// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
788// can be lowered to:
789// VBSRL_V vr1, vr0, 8
790// VMAX_W vr0, vr1, vr0
791// VBSRL_V vr1, vr0, 4
792// VMAX_W vr0, vr1, vr0
793// VPICKVE2GR_W a0, vr0, 0
794// For 256 bit vector, it is illegal and will be spilt into
795// two 128 bit vector by default then processed by this.
796SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
797 SelectionDAG &DAG) const {
798 SDLoc DL(Op);
799
800 MVT OpVT = Op.getSimpleValueType();
801 SDValue Val = Op.getOperand(0);
802
803 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
804 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
805
806 // Ensure operand type legal or enable it legal.
807 while (!isTypeLegal(Val.getSimpleValueType())) {
808 Val = DAG.WidenVector(Val, DL);
809 }
810
811 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
812 MVT VecTy = Val.getSimpleValueType();
813 MVT GRLenVT = Subtarget.getGRLenVT();
814
815 for (int i = NumEles; i > 1; i /= 2) {
816 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
817 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
818 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
819 }
820
821 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
822 DAG.getConstant(0, DL, GRLenVT));
823}
824
825SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
826 SelectionDAG &DAG) const {
827 unsigned IsData = Op.getConstantOperandVal(4);
828
829 // We don't support non-data prefetch.
830 // Just preserve the chain.
831 if (!IsData)
832 return Op.getOperand(0);
833
834 return Op;
835}
836
837SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
838 SelectionDAG &DAG) const {
839 MVT VT = Op.getSimpleValueType();
840 assert(VT.isVector() && "Unexpected type");
841
842 SDLoc DL(Op);
843 SDValue R = Op.getOperand(0);
844 SDValue Amt = Op.getOperand(1);
845 unsigned Opcode = Op.getOpcode();
846 unsigned EltSizeInBits = VT.getScalarSizeInBits();
847
848 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
849 if (V.getOpcode() != ISD::BUILD_VECTOR)
850 return false;
851 if (SDValue SplatValue =
852 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
853 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
854 CstSplatValue = C->getAPIntValue();
855 return true;
856 }
857 }
858 return false;
859 };
860
861 // Check for constant splat rotation amount.
862 APInt CstSplatValue;
863 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
864 bool isROTL = Opcode == ISD::ROTL;
865
866 // Check for splat rotate by zero.
867 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
868 return R;
869
870 // LoongArch targets always prefer ISD::ROTR.
871 if (isROTL) {
872 SDValue Zero = DAG.getConstant(0, DL, VT);
873 return DAG.getNode(ISD::ROTR, DL, VT, R,
874 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
875 }
876
877 // Rotate by a immediate.
878 if (IsCstSplat) {
879 // ISD::ROTR: Attemp to rotate by a positive immediate.
880 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
881 if (SDValue Urem =
882 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
883 return DAG.getNode(Opcode, DL, VT, R, Urem);
884 }
885
886 return Op;
887}
888
889// Return true if Val is equal to (setcc LHS, RHS, CC).
890// Return false if Val is the inverse of (setcc LHS, RHS, CC).
891// Otherwise, return std::nullopt.
892static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
893 ISD::CondCode CC, SDValue Val) {
894 assert(Val->getOpcode() == ISD::SETCC);
895 SDValue LHS2 = Val.getOperand(0);
896 SDValue RHS2 = Val.getOperand(1);
897 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
898
899 if (LHS == LHS2 && RHS == RHS2) {
900 if (CC == CC2)
901 return true;
902 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
903 return false;
904 } else if (LHS == RHS2 && RHS == LHS2) {
906 if (CC == CC2)
907 return true;
908 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
909 return false;
910 }
911
912 return std::nullopt;
913}
914
916 const LoongArchSubtarget &Subtarget) {
917 SDValue CondV = N->getOperand(0);
918 SDValue TrueV = N->getOperand(1);
919 SDValue FalseV = N->getOperand(2);
920 MVT VT = N->getSimpleValueType(0);
921 SDLoc DL(N);
922
923 // (select c, -1, y) -> -c | y
924 if (isAllOnesConstant(TrueV)) {
925 SDValue Neg = DAG.getNegative(CondV, DL, VT);
926 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
927 }
928 // (select c, y, -1) -> (c-1) | y
929 if (isAllOnesConstant(FalseV)) {
930 SDValue Neg =
931 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
932 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
933 }
934
935 // (select c, 0, y) -> (c-1) & y
936 if (isNullConstant(TrueV)) {
937 SDValue Neg =
938 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
939 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
940 }
941 // (select c, y, 0) -> -c & y
942 if (isNullConstant(FalseV)) {
943 SDValue Neg = DAG.getNegative(CondV, DL, VT);
944 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
945 }
946
947 // select c, ~x, x --> xor -c, x
948 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
949 const APInt &TrueVal = TrueV->getAsAPIntVal();
950 const APInt &FalseVal = FalseV->getAsAPIntVal();
951 if (~TrueVal == FalseVal) {
952 SDValue Neg = DAG.getNegative(CondV, DL, VT);
953 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
954 }
955 }
956
957 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
958 // when both truev and falsev are also setcc.
959 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
960 FalseV.getOpcode() == ISD::SETCC) {
961 SDValue LHS = CondV.getOperand(0);
962 SDValue RHS = CondV.getOperand(1);
963 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
964
965 // (select x, x, y) -> x | y
966 // (select !x, x, y) -> x & y
967 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
968 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
969 DAG.getFreeze(FalseV));
970 }
971 // (select x, y, x) -> x & y
972 // (select !x, y, x) -> x | y
973 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
974 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
975 DAG.getFreeze(TrueV), FalseV);
976 }
977 }
978
979 return SDValue();
980}
981
982// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
983// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
984// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
985// being `0` or `-1`. In such cases we can replace `select` with `and`.
986// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
987// than `c0`?
988static SDValue
990 const LoongArchSubtarget &Subtarget) {
991 unsigned SelOpNo = 0;
992 SDValue Sel = BO->getOperand(0);
993 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
994 SelOpNo = 1;
995 Sel = BO->getOperand(1);
996 }
997
998 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
999 return SDValue();
1000
1001 unsigned ConstSelOpNo = 1;
1002 unsigned OtherSelOpNo = 2;
1003 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1004 ConstSelOpNo = 2;
1005 OtherSelOpNo = 1;
1006 }
1007 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1008 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1009 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1010 return SDValue();
1011
1012 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1013 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1014 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1015 return SDValue();
1016
1017 SDLoc DL(Sel);
1018 EVT VT = BO->getValueType(0);
1019
1020 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1021 if (SelOpNo == 1)
1022 std::swap(NewConstOps[0], NewConstOps[1]);
1023
1024 SDValue NewConstOp =
1025 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1026 if (!NewConstOp)
1027 return SDValue();
1028
1029 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1030 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1031 return SDValue();
1032
1033 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1034 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1035 if (SelOpNo == 1)
1036 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1037 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1038
1039 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1040 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1041 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1042}
1043
1044// Changes the condition code and swaps operands if necessary, so the SetCC
1045// operation matches one of the comparisons supported directly by branches
1046// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1047// compare with 1/-1.
1049 ISD::CondCode &CC, SelectionDAG &DAG) {
1050 // If this is a single bit test that can't be handled by ANDI, shift the
1051 // bit to be tested to the MSB and perform a signed compare with 0.
1052 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1053 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1054 isa<ConstantSDNode>(LHS.getOperand(1))) {
1055 uint64_t Mask = LHS.getConstantOperandVal(1);
1056 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1057 unsigned ShAmt = 0;
1058 if (isPowerOf2_64(Mask)) {
1059 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1060 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1061 } else {
1062 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1063 }
1064
1065 LHS = LHS.getOperand(0);
1066 if (ShAmt != 0)
1067 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1068 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1069 return;
1070 }
1071 }
1072
1073 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1074 int64_t C = RHSC->getSExtValue();
1075 switch (CC) {
1076 default:
1077 break;
1078 case ISD::SETGT:
1079 // Convert X > -1 to X >= 0.
1080 if (C == -1) {
1081 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1082 CC = ISD::SETGE;
1083 return;
1084 }
1085 break;
1086 case ISD::SETLT:
1087 // Convert X < 1 to 0 >= X.
1088 if (C == 1) {
1089 RHS = LHS;
1090 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1091 CC = ISD::SETGE;
1092 return;
1093 }
1094 break;
1095 }
1096 }
1097
1098 switch (CC) {
1099 default:
1100 break;
1101 case ISD::SETGT:
1102 case ISD::SETLE:
1103 case ISD::SETUGT:
1104 case ISD::SETULE:
1106 std::swap(LHS, RHS);
1107 break;
1108 }
1109}
1110
1111SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1112 SelectionDAG &DAG) const {
1113 SDValue CondV = Op.getOperand(0);
1114 SDValue TrueV = Op.getOperand(1);
1115 SDValue FalseV = Op.getOperand(2);
1116 SDLoc DL(Op);
1117 MVT VT = Op.getSimpleValueType();
1118 MVT GRLenVT = Subtarget.getGRLenVT();
1119
1120 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1121 return V;
1122
1123 if (Op.hasOneUse()) {
1124 unsigned UseOpc = Op->user_begin()->getOpcode();
1125 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1126 SDNode *BinOp = *Op->user_begin();
1127 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1128 DAG, Subtarget)) {
1129 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1130 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1131 // may return a constant node and cause crash in lowerSELECT.
1132 if (NewSel.getOpcode() == ISD::SELECT)
1133 return lowerSELECT(NewSel, DAG);
1134 return NewSel;
1135 }
1136 }
1137 }
1138
1139 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1140 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1141 // (select condv, truev, falsev)
1142 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1143 if (CondV.getOpcode() != ISD::SETCC ||
1144 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1145 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1146 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1147
1148 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1149
1150 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1151 }
1152
1153 // If the CondV is the output of a SETCC node which operates on GRLenVT
1154 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1155 // to take advantage of the integer compare+branch instructions. i.e.: (select
1156 // (setcc lhs, rhs, cc), truev, falsev)
1157 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1158 SDValue LHS = CondV.getOperand(0);
1159 SDValue RHS = CondV.getOperand(1);
1160 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1161
1162 // Special case for a select of 2 constants that have a difference of 1.
1163 // Normally this is done by DAGCombine, but if the select is introduced by
1164 // type legalization or op legalization, we miss it. Restricting to SETLT
1165 // case for now because that is what signed saturating add/sub need.
1166 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1167 // but we would probably want to swap the true/false values if the condition
1168 // is SETGE/SETLE to avoid an XORI.
1169 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1170 CCVal == ISD::SETLT) {
1171 const APInt &TrueVal = TrueV->getAsAPIntVal();
1172 const APInt &FalseVal = FalseV->getAsAPIntVal();
1173 if (TrueVal - 1 == FalseVal)
1174 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1175 if (TrueVal + 1 == FalseVal)
1176 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1177 }
1178
1179 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1180 // 1 < x ? x : 1 -> 0 < x ? x : 1
1181 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1182 RHS == TrueV && LHS == FalseV) {
1183 LHS = DAG.getConstant(0, DL, VT);
1184 // 0 <u x is the same as x != 0.
1185 if (CCVal == ISD::SETULT) {
1186 std::swap(LHS, RHS);
1187 CCVal = ISD::SETNE;
1188 }
1189 }
1190
1191 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1192 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1193 RHS == FalseV) {
1194 RHS = DAG.getConstant(0, DL, VT);
1195 }
1196
1197 SDValue TargetCC = DAG.getCondCode(CCVal);
1198
1199 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1200 // (select (setcc lhs, rhs, CC), constant, falsev)
1201 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1202 std::swap(TrueV, FalseV);
1203 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1204 }
1205
1206 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1207 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1208}
1209
1210SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1211 SelectionDAG &DAG) const {
1212 SDValue CondV = Op.getOperand(1);
1213 SDLoc DL(Op);
1214 MVT GRLenVT = Subtarget.getGRLenVT();
1215
1216 if (CondV.getOpcode() == ISD::SETCC) {
1217 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1218 SDValue LHS = CondV.getOperand(0);
1219 SDValue RHS = CondV.getOperand(1);
1220 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1221
1222 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1223
1224 SDValue TargetCC = DAG.getCondCode(CCVal);
1225 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1226 Op.getOperand(0), LHS, RHS, TargetCC,
1227 Op.getOperand(2));
1228 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1229 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1230 Op.getOperand(0), CondV, Op.getOperand(2));
1231 }
1232 }
1233
1234 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1235 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1236 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1237}
1238
1239SDValue
1240LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1241 SelectionDAG &DAG) const {
1242 SDLoc DL(Op);
1243 MVT OpVT = Op.getSimpleValueType();
1244
1245 SDValue Vector = DAG.getUNDEF(OpVT);
1246 SDValue Val = Op.getOperand(0);
1247 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1248
1249 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1250}
1251
1252SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1253 SelectionDAG &DAG) const {
1254 EVT ResTy = Op->getValueType(0);
1255 SDValue Src = Op->getOperand(0);
1256 SDLoc DL(Op);
1257
1258 // LoongArchISD::BITREV_8B is not supported on LA32.
1259 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1260 return SDValue();
1261
1262 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1263 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1264 unsigned int NewEltNum = NewVT.getVectorNumElements();
1265
1266 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1267
1269 for (unsigned int i = 0; i < NewEltNum; i++) {
1270 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1271 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1272 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1273 ? (unsigned)LoongArchISD::BITREV_8B
1274 : (unsigned)ISD::BITREVERSE;
1275 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1276 }
1277 SDValue Res =
1278 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1279
1280 switch (ResTy.getSimpleVT().SimpleTy) {
1281 default:
1282 return SDValue();
1283 case MVT::v16i8:
1284 case MVT::v32i8:
1285 return Res;
1286 case MVT::v8i16:
1287 case MVT::v16i16:
1288 case MVT::v4i32:
1289 case MVT::v8i32: {
1291 for (unsigned int i = 0; i < NewEltNum; i++)
1292 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1293 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1294 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1295 }
1296 }
1297}
1298
1299// Widen element type to get a new mask value (if possible).
1300// For example:
1301// shufflevector <4 x i32> %a, <4 x i32> %b,
1302// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1303// is equivalent to:
1304// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1305// can be lowered to:
1306// VPACKOD_D vr0, vr0, vr1
1308 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1309 unsigned EltBits = VT.getScalarSizeInBits();
1310
1311 if (EltBits > 32 || EltBits == 1)
1312 return SDValue();
1313
1314 SmallVector<int, 8> NewMask;
1315 if (widenShuffleMaskElts(Mask, NewMask)) {
1316 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1317 : MVT::getIntegerVT(EltBits * 2);
1318 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1319 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1320 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1321 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1322 return DAG.getBitcast(
1323 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1324 }
1325 }
1326
1327 return SDValue();
1328}
1329
1330/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1331/// instruction.
1332// The funciton matches elements from one of the input vector shuffled to the
1333// left or right with zeroable elements 'shifted in'. It handles both the
1334// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1335// lane.
1336// Mostly copied from X86.
1337static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1338 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1339 int MaskOffset, const APInt &Zeroable) {
1340 int Size = Mask.size();
1341 unsigned SizeInBits = Size * ScalarSizeInBits;
1342
1343 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1344 for (int i = 0; i < Size; i += Scale)
1345 for (int j = 0; j < Shift; ++j)
1346 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1347 return false;
1348
1349 return true;
1350 };
1351
1352 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1353 int Step = 1) {
1354 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1355 if (!(Mask[i] == -1 || Mask[i] == Low))
1356 return false;
1357 return true;
1358 };
1359
1360 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1361 for (int i = 0; i != Size; i += Scale) {
1362 unsigned Pos = Left ? i + Shift : i;
1363 unsigned Low = Left ? i : i + Shift;
1364 unsigned Len = Scale - Shift;
1365 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1366 return -1;
1367 }
1368
1369 int ShiftEltBits = ScalarSizeInBits * Scale;
1370 bool ByteShift = ShiftEltBits > 64;
1371 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1372 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1373 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1374
1375 // Normalize the scale for byte shifts to still produce an i64 element
1376 // type.
1377 Scale = ByteShift ? Scale / 2 : Scale;
1378
1379 // We need to round trip through the appropriate type for the shift.
1380 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1381 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1382 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1383 return (int)ShiftAmt;
1384 };
1385
1386 unsigned MaxWidth = 128;
1387 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1388 for (int Shift = 1; Shift != Scale; ++Shift)
1389 for (bool Left : {true, false})
1390 if (CheckZeros(Shift, Scale, Left)) {
1391 int ShiftAmt = MatchShift(Shift, Scale, Left);
1392 if (0 < ShiftAmt)
1393 return ShiftAmt;
1394 }
1395
1396 // no match
1397 return -1;
1398}
1399
1400/// Lower VECTOR_SHUFFLE as shift (if possible).
1401///
1402/// For example:
1403/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1404/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1405/// is lowered to:
1406/// (VBSLL_V $v0, $v0, 4)
1407///
1408/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1409/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1410/// is lowered to:
1411/// (VSLLI_D $v0, $v0, 32)
1413 MVT VT, SDValue V1, SDValue V2,
1414 SelectionDAG &DAG,
1415 const LoongArchSubtarget &Subtarget,
1416 const APInt &Zeroable) {
1417 int Size = Mask.size();
1418 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1419
1420 MVT ShiftVT;
1421 SDValue V = V1;
1422 unsigned Opcode;
1423
1424 // Try to match shuffle against V1 shift.
1425 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1426 Mask, 0, Zeroable);
1427
1428 // If V1 failed, try to match shuffle against V2 shift.
1429 if (ShiftAmt < 0) {
1430 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1431 Mask, Size, Zeroable);
1432 V = V2;
1433 }
1434
1435 if (ShiftAmt < 0)
1436 return SDValue();
1437
1438 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1439 "Illegal integer vector type");
1440 V = DAG.getBitcast(ShiftVT, V);
1441 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1442 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1443 return DAG.getBitcast(VT, V);
1444}
1445
1446/// Determine whether a range fits a regular pattern of values.
1447/// This function accounts for the possibility of jumping over the End iterator.
1448template <typename ValType>
1449static bool
1451 unsigned CheckStride,
1453 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1454 auto &I = Begin;
1455
1456 while (I != End) {
1457 if (*I != -1 && *I != ExpectedIndex)
1458 return false;
1459 ExpectedIndex += ExpectedIndexStride;
1460
1461 // Incrementing past End is undefined behaviour so we must increment one
1462 // step at a time and check for End at each step.
1463 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1464 ; // Empty loop body.
1465 }
1466 return true;
1467}
1468
1469/// Compute whether each element of a shuffle is zeroable.
1470///
1471/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1473 SDValue V2, APInt &KnownUndef,
1474 APInt &KnownZero) {
1475 int Size = Mask.size();
1476 KnownUndef = KnownZero = APInt::getZero(Size);
1477
1478 V1 = peekThroughBitcasts(V1);
1479 V2 = peekThroughBitcasts(V2);
1480
1481 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1482 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1483
1484 int VectorSizeInBits = V1.getValueSizeInBits();
1485 int ScalarSizeInBits = VectorSizeInBits / Size;
1486 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1487 (void)ScalarSizeInBits;
1488
1489 for (int i = 0; i < Size; ++i) {
1490 int M = Mask[i];
1491 if (M < 0) {
1492 KnownUndef.setBit(i);
1493 continue;
1494 }
1495 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1496 KnownZero.setBit(i);
1497 continue;
1498 }
1499 }
1500}
1501
1502/// Test whether a shuffle mask is equivalent within each sub-lane.
1503///
1504/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1505/// non-trivial to compute in the face of undef lanes. The representation is
1506/// suitable for use with existing 128-bit shuffles as entries from the second
1507/// vector have been remapped to [LaneSize, 2*LaneSize).
1508static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1509 ArrayRef<int> Mask,
1510 SmallVectorImpl<int> &RepeatedMask) {
1511 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1512 RepeatedMask.assign(LaneSize, -1);
1513 int Size = Mask.size();
1514 for (int i = 0; i < Size; ++i) {
1515 assert(Mask[i] == -1 || Mask[i] >= 0);
1516 if (Mask[i] < 0)
1517 continue;
1518 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1519 // This entry crosses lanes, so there is no way to model this shuffle.
1520 return false;
1521
1522 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1523 // Adjust second vector indices to start at LaneSize instead of Size.
1524 int LocalM =
1525 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1526 if (RepeatedMask[i % LaneSize] < 0)
1527 // This is the first non-undef entry in this slot of a 128-bit lane.
1528 RepeatedMask[i % LaneSize] = LocalM;
1529 else if (RepeatedMask[i % LaneSize] != LocalM)
1530 // Found a mismatch with the repeated mask.
1531 return false;
1532 }
1533 return true;
1534}
1535
1536/// Attempts to match vector shuffle as byte rotation.
1538 ArrayRef<int> Mask) {
1539
1540 SDValue Lo, Hi;
1541 SmallVector<int, 16> RepeatedMask;
1542
1543 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1544 return -1;
1545
1546 int NumElts = RepeatedMask.size();
1547 int Rotation = 0;
1548 int Scale = 16 / NumElts;
1549
1550 for (int i = 0; i < NumElts; ++i) {
1551 int M = RepeatedMask[i];
1552 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1553 "Unexpected mask index.");
1554 if (M < 0)
1555 continue;
1556
1557 // Determine where a rotated vector would have started.
1558 int StartIdx = i - (M % NumElts);
1559 if (StartIdx == 0)
1560 return -1;
1561
1562 // If we found the tail of a vector the rotation must be the missing
1563 // front. If we found the head of a vector, it must be how much of the
1564 // head.
1565 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1566
1567 if (Rotation == 0)
1568 Rotation = CandidateRotation;
1569 else if (Rotation != CandidateRotation)
1570 return -1;
1571
1572 // Compute which value this mask is pointing at.
1573 SDValue MaskV = M < NumElts ? V1 : V2;
1574
1575 // Compute which of the two target values this index should be assigned
1576 // to. This reflects whether the high elements are remaining or the low
1577 // elements are remaining.
1578 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1579
1580 // Either set up this value if we've not encountered it before, or check
1581 // that it remains consistent.
1582 if (!TargetV)
1583 TargetV = MaskV;
1584 else if (TargetV != MaskV)
1585 return -1;
1586 }
1587
1588 // Check that we successfully analyzed the mask, and normalize the results.
1589 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1590 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1591 if (!Lo)
1592 Lo = Hi;
1593 else if (!Hi)
1594 Hi = Lo;
1595
1596 V1 = Lo;
1597 V2 = Hi;
1598
1599 return Rotation * Scale;
1600}
1601
1602/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1603///
1604/// For example:
1605/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1606/// <2 x i32> <i32 3, i32 0>
1607/// is lowered to:
1608/// (VBSRL_V $v1, $v1, 8)
1609/// (VBSLL_V $v0, $v0, 8)
1610/// (VOR_V $v0, $V0, $v1)
1611static SDValue
1613 SDValue V1, SDValue V2, SelectionDAG &DAG,
1614 const LoongArchSubtarget &Subtarget) {
1615
1616 SDValue Lo = V1, Hi = V2;
1617 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1618 if (ByteRotation <= 0)
1619 return SDValue();
1620
1621 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1622 Lo = DAG.getBitcast(ByteVT, Lo);
1623 Hi = DAG.getBitcast(ByteVT, Hi);
1624
1625 int LoByteShift = 16 - ByteRotation;
1626 int HiByteShift = ByteRotation;
1627 MVT GRLenVT = Subtarget.getGRLenVT();
1628
1629 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1630 DAG.getConstant(LoByteShift, DL, GRLenVT));
1631 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1632 DAG.getConstant(HiByteShift, DL, GRLenVT));
1633 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1634}
1635
1636/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1637///
1638/// For example:
1639/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1640/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1641/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1642/// is lowered to:
1643/// (VREPLI $v1, 0)
1644/// (VILVL $v0, $v1, $v0)
1646 ArrayRef<int> Mask, MVT VT,
1647 SDValue V1, SDValue V2,
1648 SelectionDAG &DAG,
1649 const APInt &Zeroable) {
1650 int Bits = VT.getSizeInBits();
1651 int EltBits = VT.getScalarSizeInBits();
1652 int NumElements = VT.getVectorNumElements();
1653
1654 if (Zeroable.isAllOnes())
1655 return DAG.getConstant(0, DL, VT);
1656
1657 // Define a helper function to check a particular ext-scale and lower to it if
1658 // valid.
1659 auto Lower = [&](int Scale) -> SDValue {
1660 SDValue InputV;
1661 bool AnyExt = true;
1662 int Offset = 0;
1663 for (int i = 0; i < NumElements; i++) {
1664 int M = Mask[i];
1665 if (M < 0)
1666 continue;
1667 if (i % Scale != 0) {
1668 // Each of the extended elements need to be zeroable.
1669 if (!Zeroable[i])
1670 return SDValue();
1671
1672 AnyExt = false;
1673 continue;
1674 }
1675
1676 // Each of the base elements needs to be consecutive indices into the
1677 // same input vector.
1678 SDValue V = M < NumElements ? V1 : V2;
1679 M = M % NumElements;
1680 if (!InputV) {
1681 InputV = V;
1682 Offset = M - (i / Scale);
1683
1684 // These offset can't be handled
1685 if (Offset % (NumElements / Scale))
1686 return SDValue();
1687 } else if (InputV != V)
1688 return SDValue();
1689
1690 if (M != (Offset + (i / Scale)))
1691 return SDValue(); // Non-consecutive strided elements.
1692 }
1693
1694 // If we fail to find an input, we have a zero-shuffle which should always
1695 // have already been handled.
1696 if (!InputV)
1697 return SDValue();
1698
1699 do {
1700 unsigned VilVLoHi = LoongArchISD::VILVL;
1701 if (Offset >= (NumElements / 2)) {
1702 VilVLoHi = LoongArchISD::VILVH;
1703 Offset -= (NumElements / 2);
1704 }
1705
1706 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1707 SDValue Ext =
1708 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1709 InputV = DAG.getBitcast(InputVT, InputV);
1710 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1711 Scale /= 2;
1712 EltBits *= 2;
1713 NumElements /= 2;
1714 } while (Scale > 1);
1715 return DAG.getBitcast(VT, InputV);
1716 };
1717
1718 // Each iteration, try extending the elements half as much, but into twice as
1719 // many elements.
1720 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1721 NumExtElements *= 2) {
1722 if (SDValue V = Lower(NumElements / NumExtElements))
1723 return V;
1724 }
1725 return SDValue();
1726}
1727
1728/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1729///
1730/// VREPLVEI performs vector broadcast based on an element specified by an
1731/// integer immediate, with its mask being similar to:
1732/// <x, x, x, ...>
1733/// where x is any valid index.
1734///
1735/// When undef's appear in the mask they are treated as if they were whatever
1736/// value is necessary in order to fit the above form.
1737static SDValue
1739 SDValue V1, SelectionDAG &DAG,
1740 const LoongArchSubtarget &Subtarget) {
1741 int SplatIndex = -1;
1742 for (const auto &M : Mask) {
1743 if (M != -1) {
1744 SplatIndex = M;
1745 break;
1746 }
1747 }
1748
1749 if (SplatIndex == -1)
1750 return DAG.getUNDEF(VT);
1751
1752 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1753 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1754 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1755 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1756 }
1757
1758 return SDValue();
1759}
1760
1761/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1762///
1763/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1764/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1765///
1766/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1767/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1768/// When undef's appear they are treated as if they were whatever value is
1769/// necessary in order to fit the above forms.
1770///
1771/// For example:
1772/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1773/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1774/// i32 7, i32 6, i32 5, i32 4>
1775/// is lowered to:
1776/// (VSHUF4I_H $v0, $v1, 27)
1777/// where the 27 comes from:
1778/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1779static SDValue
1781 SDValue V1, SDValue V2, SelectionDAG &DAG,
1782 const LoongArchSubtarget &Subtarget) {
1783
1784 unsigned SubVecSize = 4;
1785 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1786 SubVecSize = 2;
1787
1788 int SubMask[4] = {-1, -1, -1, -1};
1789 for (unsigned i = 0; i < SubVecSize; ++i) {
1790 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1791 int M = Mask[j];
1792
1793 // Convert from vector index to 4-element subvector index
1794 // If an index refers to an element outside of the subvector then give up
1795 if (M != -1) {
1796 M -= 4 * (j / SubVecSize);
1797 if (M < 0 || M >= 4)
1798 return SDValue();
1799 }
1800
1801 // If the mask has an undef, replace it with the current index.
1802 // Note that it might still be undef if the current index is also undef
1803 if (SubMask[i] == -1)
1804 SubMask[i] = M;
1805 // Check that non-undef values are the same as in the mask. If they
1806 // aren't then give up
1807 else if (M != -1 && M != SubMask[i])
1808 return SDValue();
1809 }
1810 }
1811
1812 // Calculate the immediate. Replace any remaining undefs with zero
1813 int Imm = 0;
1814 for (int i = SubVecSize - 1; i >= 0; --i) {
1815 int M = SubMask[i];
1816
1817 if (M == -1)
1818 M = 0;
1819
1820 Imm <<= 2;
1821 Imm |= M & 0x3;
1822 }
1823
1824 MVT GRLenVT = Subtarget.getGRLenVT();
1825
1826 // Return vshuf4i.d
1827 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1828 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1829 DAG.getConstant(Imm, DL, GRLenVT));
1830
1831 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1832 DAG.getConstant(Imm, DL, GRLenVT));
1833}
1834
1835/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1836///
1837/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1838/// reverse whose mask likes:
1839/// <7, 6, 5, 4, 3, 2, 1, 0>
1840///
1841/// When undef's appear in the mask they are treated as if they were whatever
1842/// value is necessary in order to fit the above forms.
1843static SDValue
1845 SDValue V1, SelectionDAG &DAG,
1846 const LoongArchSubtarget &Subtarget) {
1847 // Only vectors with i8/i16 elements which cannot match other patterns
1848 // directly needs to do this.
1849 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1850 VT != MVT::v16i16)
1851 return SDValue();
1852
1853 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1854 return SDValue();
1855
1856 int WidenNumElts = VT.getVectorNumElements() / 4;
1857 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1858 for (int i = 0; i < WidenNumElts; ++i)
1859 WidenMask[i] = WidenNumElts - 1 - i;
1860
1861 MVT WidenVT = MVT::getVectorVT(
1862 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1863 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1864 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1865 DAG.getUNDEF(WidenVT), WidenMask);
1866
1867 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1868 DAG.getBitcast(VT, WidenRev),
1869 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1870}
1871
1872/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1873///
1874/// VPACKEV interleaves the even elements from each vector.
1875///
1876/// It is possible to lower into VPACKEV when the mask consists of two of the
1877/// following forms interleaved:
1878/// <0, 2, 4, ...>
1879/// <n, n+2, n+4, ...>
1880/// where n is the number of elements in the vector.
1881/// For example:
1882/// <0, 0, 2, 2, 4, 4, ...>
1883/// <0, n, 2, n+2, 4, n+4, ...>
1884///
1885/// When undef's appear in the mask they are treated as if they were whatever
1886/// value is necessary in order to fit the above forms.
1888 MVT VT, SDValue V1, SDValue V2,
1889 SelectionDAG &DAG) {
1890
1891 const auto &Begin = Mask.begin();
1892 const auto &End = Mask.end();
1893 SDValue OriV1 = V1, OriV2 = V2;
1894
1895 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1896 V1 = OriV1;
1897 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1898 V1 = OriV2;
1899 else
1900 return SDValue();
1901
1902 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1903 V2 = OriV1;
1904 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1905 V2 = OriV2;
1906 else
1907 return SDValue();
1908
1909 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1910}
1911
1912/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1913///
1914/// VPACKOD interleaves the odd elements from each vector.
1915///
1916/// It is possible to lower into VPACKOD when the mask consists of two of the
1917/// following forms interleaved:
1918/// <1, 3, 5, ...>
1919/// <n+1, n+3, n+5, ...>
1920/// where n is the number of elements in the vector.
1921/// For example:
1922/// <1, 1, 3, 3, 5, 5, ...>
1923/// <1, n+1, 3, n+3, 5, n+5, ...>
1924///
1925/// When undef's appear in the mask they are treated as if they were whatever
1926/// value is necessary in order to fit the above forms.
1928 MVT VT, SDValue V1, SDValue V2,
1929 SelectionDAG &DAG) {
1930
1931 const auto &Begin = Mask.begin();
1932 const auto &End = Mask.end();
1933 SDValue OriV1 = V1, OriV2 = V2;
1934
1935 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1936 V1 = OriV1;
1937 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1938 V1 = OriV2;
1939 else
1940 return SDValue();
1941
1942 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1943 V2 = OriV1;
1944 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1945 V2 = OriV2;
1946 else
1947 return SDValue();
1948
1949 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1950}
1951
1952/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1953///
1954/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1955/// of each vector.
1956///
1957/// It is possible to lower into VILVH when the mask consists of two of the
1958/// following forms interleaved:
1959/// <x, x+1, x+2, ...>
1960/// <n+x, n+x+1, n+x+2, ...>
1961/// where n is the number of elements in the vector and x is half n.
1962/// For example:
1963/// <x, x, x+1, x+1, x+2, x+2, ...>
1964/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1965///
1966/// When undef's appear in the mask they are treated as if they were whatever
1967/// value is necessary in order to fit the above forms.
1969 MVT VT, SDValue V1, SDValue V2,
1970 SelectionDAG &DAG) {
1971
1972 const auto &Begin = Mask.begin();
1973 const auto &End = Mask.end();
1974 unsigned HalfSize = Mask.size() / 2;
1975 SDValue OriV1 = V1, OriV2 = V2;
1976
1977 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1978 V1 = OriV1;
1979 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1980 V1 = OriV2;
1981 else
1982 return SDValue();
1983
1984 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1985 V2 = OriV1;
1986 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1987 1))
1988 V2 = OriV2;
1989 else
1990 return SDValue();
1991
1992 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1993}
1994
1995/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1996///
1997/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1998/// of each vector.
1999///
2000/// It is possible to lower into VILVL when the mask consists of two of the
2001/// following forms interleaved:
2002/// <0, 1, 2, ...>
2003/// <n, n+1, n+2, ...>
2004/// where n is the number of elements in the vector.
2005/// For example:
2006/// <0, 0, 1, 1, 2, 2, ...>
2007/// <0, n, 1, n+1, 2, n+2, ...>
2008///
2009/// When undef's appear in the mask they are treated as if they were whatever
2010/// value is necessary in order to fit the above forms.
2012 MVT VT, SDValue V1, SDValue V2,
2013 SelectionDAG &DAG) {
2014
2015 const auto &Begin = Mask.begin();
2016 const auto &End = Mask.end();
2017 SDValue OriV1 = V1, OriV2 = V2;
2018
2019 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2020 V1 = OriV1;
2021 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2022 V1 = OriV2;
2023 else
2024 return SDValue();
2025
2026 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2027 V2 = OriV1;
2028 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2029 V2 = OriV2;
2030 else
2031 return SDValue();
2032
2033 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2034}
2035
2036/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2037///
2038/// VPICKEV copies the even elements of each vector into the result vector.
2039///
2040/// It is possible to lower into VPICKEV when the mask consists of two of the
2041/// following forms concatenated:
2042/// <0, 2, 4, ...>
2043/// <n, n+2, n+4, ...>
2044/// where n is the number of elements in the vector.
2045/// For example:
2046/// <0, 2, 4, ..., 0, 2, 4, ...>
2047/// <0, 2, 4, ..., n, n+2, n+4, ...>
2048///
2049/// When undef's appear in the mask they are treated as if they were whatever
2050/// value is necessary in order to fit the above forms.
2052 MVT VT, SDValue V1, SDValue V2,
2053 SelectionDAG &DAG) {
2054
2055 const auto &Begin = Mask.begin();
2056 const auto &Mid = Mask.begin() + Mask.size() / 2;
2057 const auto &End = Mask.end();
2058 SDValue OriV1 = V1, OriV2 = V2;
2059
2060 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2061 V1 = OriV1;
2062 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2063 V1 = OriV2;
2064 else
2065 return SDValue();
2066
2067 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2068 V2 = OriV1;
2069 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2070 V2 = OriV2;
2071
2072 else
2073 return SDValue();
2074
2075 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2076}
2077
2078/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2079///
2080/// VPICKOD copies the odd elements of each vector into the result vector.
2081///
2082/// It is possible to lower into VPICKOD when the mask consists of two of the
2083/// following forms concatenated:
2084/// <1, 3, 5, ...>
2085/// <n+1, n+3, n+5, ...>
2086/// where n is the number of elements in the vector.
2087/// For example:
2088/// <1, 3, 5, ..., 1, 3, 5, ...>
2089/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2090///
2091/// When undef's appear in the mask they are treated as if they were whatever
2092/// value is necessary in order to fit the above forms.
2094 MVT VT, SDValue V1, SDValue V2,
2095 SelectionDAG &DAG) {
2096
2097 const auto &Begin = Mask.begin();
2098 const auto &Mid = Mask.begin() + Mask.size() / 2;
2099 const auto &End = Mask.end();
2100 SDValue OriV1 = V1, OriV2 = V2;
2101
2102 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2103 V1 = OriV1;
2104 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2105 V1 = OriV2;
2106 else
2107 return SDValue();
2108
2109 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2110 V2 = OriV1;
2111 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2112 V2 = OriV2;
2113 else
2114 return SDValue();
2115
2116 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2117}
2118
2119/// Lower VECTOR_SHUFFLE into VSHUF.
2120///
2121/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2122/// adding it as an operand to the resulting VSHUF.
2124 MVT VT, SDValue V1, SDValue V2,
2125 SelectionDAG &DAG,
2126 const LoongArchSubtarget &Subtarget) {
2127
2129 for (auto M : Mask)
2130 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2131
2132 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2133 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2134
2135 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2136 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2137 // VSHF concatenates the vectors in a bitwise fashion:
2138 // <0b00, 0b01> + <0b10, 0b11> ->
2139 // 0b0100 + 0b1110 -> 0b01001110
2140 // <0b10, 0b11, 0b00, 0b01>
2141 // We must therefore swap the operands to get the correct result.
2142 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2143}
2144
2145/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2146///
2147/// This routine breaks down the specific type of 128-bit shuffle and
2148/// dispatches to the lowering routines accordingly.
2150 SDValue V1, SDValue V2, SelectionDAG &DAG,
2151 const LoongArchSubtarget &Subtarget) {
2152 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2153 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2154 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2155 "Vector type is unsupported for lsx!");
2157 "Two operands have different types!");
2158 assert(VT.getVectorNumElements() == Mask.size() &&
2159 "Unexpected mask size for shuffle!");
2160 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2161
2162 APInt KnownUndef, KnownZero;
2163 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2164 APInt Zeroable = KnownUndef | KnownZero;
2165
2166 SDValue Result;
2167 // TODO: Add more comparison patterns.
2168 if (V2.isUndef()) {
2169 if ((Result =
2170 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2171 return Result;
2172 if ((Result =
2173 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2174 return Result;
2175 if ((Result =
2176 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2177 return Result;
2178
2179 // TODO: This comment may be enabled in the future to better match the
2180 // pattern for instruction selection.
2181 /* V2 = V1; */
2182 }
2183
2184 // It is recommended not to change the pattern comparison order for better
2185 // performance.
2186 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2187 return Result;
2188 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2189 return Result;
2190 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2191 return Result;
2192 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2193 return Result;
2194 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2195 return Result;
2196 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2197 return Result;
2198 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2199 (Result =
2200 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2201 return Result;
2202 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2203 Zeroable)))
2204 return Result;
2205 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2206 Zeroable)))
2207 return Result;
2208 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2209 Subtarget)))
2210 return Result;
2211 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2212 return NewShuffle;
2213 if ((Result =
2214 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2215 return Result;
2216 return SDValue();
2217}
2218
2219/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2220///
2221/// It is a XVREPLVEI when the mask is:
2222/// <x, x, x, ..., x+n, x+n, x+n, ...>
2223/// where the number of x is equal to n and n is half the length of vector.
2224///
2225/// When undef's appear in the mask they are treated as if they were whatever
2226/// value is necessary in order to fit the above form.
2227static SDValue
2229 SDValue V1, SelectionDAG &DAG,
2230 const LoongArchSubtarget &Subtarget) {
2231 int SplatIndex = -1;
2232 for (const auto &M : Mask) {
2233 if (M != -1) {
2234 SplatIndex = M;
2235 break;
2236 }
2237 }
2238
2239 if (SplatIndex == -1)
2240 return DAG.getUNDEF(VT);
2241
2242 const auto &Begin = Mask.begin();
2243 const auto &End = Mask.end();
2244 int HalfSize = Mask.size() / 2;
2245
2246 if (SplatIndex >= HalfSize)
2247 return SDValue();
2248
2249 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2250 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2251 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2252 0)) {
2253 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2254 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2255 }
2256
2257 return SDValue();
2258}
2259
2260/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2261static SDValue
2263 SDValue V1, SDValue V2, SelectionDAG &DAG,
2264 const LoongArchSubtarget &Subtarget) {
2265 // When the size is less than or equal to 4, lower cost instructions may be
2266 // used.
2267 if (Mask.size() <= 4)
2268 return SDValue();
2269 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2270}
2271
2272/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2273static SDValue
2275 SDValue V1, SelectionDAG &DAG,
2276 const LoongArchSubtarget &Subtarget) {
2277 // Only consider XVPERMI_D.
2278 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2279 return SDValue();
2280
2281 unsigned MaskImm = 0;
2282 for (unsigned i = 0; i < Mask.size(); ++i) {
2283 if (Mask[i] == -1)
2284 continue;
2285 MaskImm |= Mask[i] << (i * 2);
2286 }
2287
2288 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2289 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2290}
2291
2292/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2294 MVT VT, SDValue V1, SelectionDAG &DAG,
2295 const LoongArchSubtarget &Subtarget) {
2296 // LoongArch LASX only have XVPERM_W.
2297 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2298 return SDValue();
2299
2300 unsigned NumElts = VT.getVectorNumElements();
2301 unsigned HalfSize = NumElts / 2;
2302 bool FrontLo = true, FrontHi = true;
2303 bool BackLo = true, BackHi = true;
2304
2305 auto inRange = [](int val, int low, int high) {
2306 return (val == -1) || (val >= low && val < high);
2307 };
2308
2309 for (unsigned i = 0; i < HalfSize; ++i) {
2310 int Fronti = Mask[i];
2311 int Backi = Mask[i + HalfSize];
2312
2313 FrontLo &= inRange(Fronti, 0, HalfSize);
2314 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2315 BackLo &= inRange(Backi, 0, HalfSize);
2316 BackHi &= inRange(Backi, HalfSize, NumElts);
2317 }
2318
2319 // If both the lower and upper 128-bit parts access only one half of the
2320 // vector (either lower or upper), avoid using xvperm.w. The latency of
2321 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2322 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2323 return SDValue();
2324
2326 MVT GRLenVT = Subtarget.getGRLenVT();
2327 for (unsigned i = 0; i < NumElts; ++i)
2328 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2329 : DAG.getConstant(Mask[i], DL, GRLenVT));
2330 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2331
2332 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2333}
2334
2335/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2337 MVT VT, SDValue V1, SDValue V2,
2338 SelectionDAG &DAG) {
2339 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2340}
2341
2342/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2344 MVT VT, SDValue V1, SDValue V2,
2345 SelectionDAG &DAG) {
2346 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2347}
2348
2349/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2351 MVT VT, SDValue V1, SDValue V2,
2352 SelectionDAG &DAG) {
2353
2354 const auto &Begin = Mask.begin();
2355 const auto &End = Mask.end();
2356 unsigned HalfSize = Mask.size() / 2;
2357 unsigned LeftSize = HalfSize / 2;
2358 SDValue OriV1 = V1, OriV2 = V2;
2359
2360 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2361 1) &&
2362 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2363 V1 = OriV1;
2364 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2365 Mask.size() + HalfSize - LeftSize, 1) &&
2366 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2367 Mask.size() + HalfSize + LeftSize, 1))
2368 V1 = OriV2;
2369 else
2370 return SDValue();
2371
2372 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2373 1) &&
2374 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2375 1))
2376 V2 = OriV1;
2377 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2378 Mask.size() + HalfSize - LeftSize, 1) &&
2379 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2380 Mask.size() + HalfSize + LeftSize, 1))
2381 V2 = OriV2;
2382 else
2383 return SDValue();
2384
2385 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2386}
2387
2388/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2390 MVT VT, SDValue V1, SDValue V2,
2391 SelectionDAG &DAG) {
2392
2393 const auto &Begin = Mask.begin();
2394 const auto &End = Mask.end();
2395 unsigned HalfSize = Mask.size() / 2;
2396 SDValue OriV1 = V1, OriV2 = V2;
2397
2398 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2399 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2400 V1 = OriV1;
2401 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2402 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2403 Mask.size() + HalfSize, 1))
2404 V1 = OriV2;
2405 else
2406 return SDValue();
2407
2408 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2409 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2410 V2 = OriV1;
2411 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2412 1) &&
2413 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2414 Mask.size() + HalfSize, 1))
2415 V2 = OriV2;
2416 else
2417 return SDValue();
2418
2419 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2420}
2421
2422/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2424 MVT VT, SDValue V1, SDValue V2,
2425 SelectionDAG &DAG) {
2426
2427 const auto &Begin = Mask.begin();
2428 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2429 const auto &Mid = Mask.begin() + Mask.size() / 2;
2430 const auto &RightMid = Mask.end() - Mask.size() / 4;
2431 const auto &End = Mask.end();
2432 unsigned HalfSize = Mask.size() / 2;
2433 SDValue OriV1 = V1, OriV2 = V2;
2434
2435 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2436 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2437 V1 = OriV1;
2438 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2439 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2440 V1 = OriV2;
2441 else
2442 return SDValue();
2443
2444 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2445 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2446 V2 = OriV1;
2447 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2448 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2449 V2 = OriV2;
2450
2451 else
2452 return SDValue();
2453
2454 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2455}
2456
2457/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2459 MVT VT, SDValue V1, SDValue V2,
2460 SelectionDAG &DAG) {
2461
2462 const auto &Begin = Mask.begin();
2463 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2464 const auto &Mid = Mask.begin() + Mask.size() / 2;
2465 const auto &RightMid = Mask.end() - Mask.size() / 4;
2466 const auto &End = Mask.end();
2467 unsigned HalfSize = Mask.size() / 2;
2468 SDValue OriV1 = V1, OriV2 = V2;
2469
2470 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2471 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2472 V1 = OriV1;
2473 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2474 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2475 2))
2476 V1 = OriV2;
2477 else
2478 return SDValue();
2479
2480 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2481 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2482 V2 = OriV1;
2483 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2484 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2485 2))
2486 V2 = OriV2;
2487 else
2488 return SDValue();
2489
2490 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2491}
2492
2493/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2494static SDValue
2496 SDValue V1, SDValue V2, SelectionDAG &DAG,
2497 const LoongArchSubtarget &Subtarget) {
2498 // LoongArch LASX only supports xvinsve0.{w/d}.
2499 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2500 VT != MVT::v4f64)
2501 return SDValue();
2502
2503 MVT GRLenVT = Subtarget.getGRLenVT();
2504 int MaskSize = Mask.size();
2505 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2506
2507 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2508 // all other elements are either 'Base + i' or undef (-1). On success, return
2509 // the index of the replaced element. Otherwise, just return -1.
2510 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2511 int Idx = -1;
2512 for (int i = 0; i < MaskSize; ++i) {
2513 if (Mask[i] == Base + i || Mask[i] == -1)
2514 continue;
2515 if (Mask[i] != Replaced)
2516 return -1;
2517 if (Idx == -1)
2518 Idx = i;
2519 else
2520 return -1;
2521 }
2522 return Idx;
2523 };
2524
2525 // Case 1: the lowest element of V2 replaces one element in V1.
2526 int Idx = checkReplaceOne(0, MaskSize);
2527 if (Idx != -1)
2528 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2529 DAG.getConstant(Idx, DL, GRLenVT));
2530
2531 // Case 2: the lowest element of V1 replaces one element in V2.
2532 Idx = checkReplaceOne(MaskSize, 0);
2533 if (Idx != -1)
2534 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2535 DAG.getConstant(Idx, DL, GRLenVT));
2536
2537 return SDValue();
2538}
2539
2540/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2542 MVT VT, SDValue V1, SDValue V2,
2543 SelectionDAG &DAG) {
2544
2545 int MaskSize = Mask.size();
2546 int HalfSize = Mask.size() / 2;
2547 const auto &Begin = Mask.begin();
2548 const auto &Mid = Mask.begin() + HalfSize;
2549 const auto &End = Mask.end();
2550
2551 // VECTOR_SHUFFLE concatenates the vectors:
2552 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2553 // shuffling ->
2554 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2555 //
2556 // XVSHUF concatenates the vectors:
2557 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2558 // shuffling ->
2559 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2560 SmallVector<SDValue, 8> MaskAlloc;
2561 for (auto it = Begin; it < Mid; it++) {
2562 if (*it < 0) // UNDEF
2563 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2564 else if ((*it >= 0 && *it < HalfSize) ||
2565 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2566 int M = *it < HalfSize ? *it : *it - HalfSize;
2567 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2568 } else
2569 return SDValue();
2570 }
2571 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2572
2573 for (auto it = Mid; it < End; it++) {
2574 if (*it < 0) // UNDEF
2575 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2576 else if ((*it >= HalfSize && *it < MaskSize) ||
2577 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2578 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2579 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2580 } else
2581 return SDValue();
2582 }
2583 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2584
2585 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2586 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2587 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2588}
2589
2590/// Shuffle vectors by lane to generate more optimized instructions.
2591/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2592///
2593/// Therefore, except for the following four cases, other cases are regarded
2594/// as cross-lane shuffles, where optimization is relatively limited.
2595///
2596/// - Shuffle high, low lanes of two inputs vector
2597/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2598/// - Shuffle low, high lanes of two inputs vector
2599/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2600/// - Shuffle low, low lanes of two inputs vector
2601/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2602/// - Shuffle high, high lanes of two inputs vector
2603/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2604///
2605/// The first case is the closest to LoongArch instructions and the other
2606/// cases need to be converted to it for processing.
2607///
2608/// This function will return true for the last three cases above and will
2609/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2610/// cross-lane shuffle cases.
2612 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2613 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2614
2615 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2616
2617 int MaskSize = Mask.size();
2618 int HalfSize = Mask.size() / 2;
2619 MVT GRLenVT = Subtarget.getGRLenVT();
2620
2621 HalfMaskType preMask = None, postMask = None;
2622
2623 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2624 return M < 0 || (M >= 0 && M < HalfSize) ||
2625 (M >= MaskSize && M < MaskSize + HalfSize);
2626 }))
2627 preMask = HighLaneTy;
2628 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2629 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2630 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2631 }))
2632 preMask = LowLaneTy;
2633
2634 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2635 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2636 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2637 }))
2638 postMask = LowLaneTy;
2639 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2640 return M < 0 || (M >= 0 && M < HalfSize) ||
2641 (M >= MaskSize && M < MaskSize + HalfSize);
2642 }))
2643 postMask = HighLaneTy;
2644
2645 // The pre-half of mask is high lane type, and the post-half of mask
2646 // is low lane type, which is closest to the LoongArch instructions.
2647 //
2648 // Note: In the LoongArch architecture, the high lane of mask corresponds
2649 // to the lower 128-bit of vector register, and the low lane of mask
2650 // corresponds the higher 128-bit of vector register.
2651 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2652 return false;
2653 }
2654 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2655 V1 = DAG.getBitcast(MVT::v4i64, V1);
2656 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2657 DAG.getConstant(0b01001110, DL, GRLenVT));
2658 V1 = DAG.getBitcast(VT, V1);
2659
2660 if (!V2.isUndef()) {
2661 V2 = DAG.getBitcast(MVT::v4i64, V2);
2662 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2663 DAG.getConstant(0b01001110, DL, GRLenVT));
2664 V2 = DAG.getBitcast(VT, V2);
2665 }
2666
2667 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2668 *it = *it < 0 ? *it : *it - HalfSize;
2669 }
2670 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2671 *it = *it < 0 ? *it : *it + HalfSize;
2672 }
2673 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2674 V1 = DAG.getBitcast(MVT::v4i64, V1);
2675 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2676 DAG.getConstant(0b11101110, DL, GRLenVT));
2677 V1 = DAG.getBitcast(VT, V1);
2678
2679 if (!V2.isUndef()) {
2680 V2 = DAG.getBitcast(MVT::v4i64, V2);
2681 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2682 DAG.getConstant(0b11101110, DL, GRLenVT));
2683 V2 = DAG.getBitcast(VT, V2);
2684 }
2685
2686 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2687 *it = *it < 0 ? *it : *it - HalfSize;
2688 }
2689 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2690 V1 = DAG.getBitcast(MVT::v4i64, V1);
2691 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2692 DAG.getConstant(0b01000100, DL, GRLenVT));
2693 V1 = DAG.getBitcast(VT, V1);
2694
2695 if (!V2.isUndef()) {
2696 V2 = DAG.getBitcast(MVT::v4i64, V2);
2697 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2698 DAG.getConstant(0b01000100, DL, GRLenVT));
2699 V2 = DAG.getBitcast(VT, V2);
2700 }
2701
2702 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2703 *it = *it < 0 ? *it : *it + HalfSize;
2704 }
2705 } else { // cross-lane
2706 return false;
2707 }
2708
2709 return true;
2710}
2711
2712/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2713/// Only for 256-bit vector.
2714///
2715/// For example:
2716/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2717/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2718/// is lowerded to:
2719/// (XVPERMI $xr2, $xr0, 78)
2720/// (XVSHUF $xr1, $xr2, $xr0)
2721/// (XVORI $xr0, $xr1, 0)
2723 ArrayRef<int> Mask,
2724 MVT VT, SDValue V1,
2725 SDValue V2,
2726 SelectionDAG &DAG) {
2727 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2728 int Size = Mask.size();
2729 int LaneSize = Size / 2;
2730
2731 bool LaneCrossing[2] = {false, false};
2732 for (int i = 0; i < Size; ++i)
2733 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2734 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2735
2736 // Ensure that all lanes ared involved.
2737 if (!LaneCrossing[0] && !LaneCrossing[1])
2738 return SDValue();
2739
2740 SmallVector<int> InLaneMask;
2741 InLaneMask.assign(Mask.begin(), Mask.end());
2742 for (int i = 0; i < Size; ++i) {
2743 int &M = InLaneMask[i];
2744 if (M < 0)
2745 continue;
2746 if (((M % Size) / LaneSize) != (i / LaneSize))
2747 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2748 }
2749
2750 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2751 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2752 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2753 Flipped = DAG.getBitcast(VT, Flipped);
2754 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2755}
2756
2757/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2758///
2759/// This routine breaks down the specific type of 256-bit shuffle and
2760/// dispatches to the lowering routines accordingly.
2762 SDValue V1, SDValue V2, SelectionDAG &DAG,
2763 const LoongArchSubtarget &Subtarget) {
2764 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2765 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2766 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2767 "Vector type is unsupported for lasx!");
2769 "Two operands have different types!");
2770 assert(VT.getVectorNumElements() == Mask.size() &&
2771 "Unexpected mask size for shuffle!");
2772 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2773 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2774
2775 APInt KnownUndef, KnownZero;
2776 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2777 APInt Zeroable = KnownUndef | KnownZero;
2778
2779 SDValue Result;
2780 // TODO: Add more comparison patterns.
2781 if (V2.isUndef()) {
2782 if ((Result =
2783 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2784 return Result;
2785 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2786 Subtarget)))
2787 return Result;
2788 // Try to widen vectors to gain more optimization opportunities.
2789 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2790 return NewShuffle;
2791 if ((Result =
2792 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2793 return Result;
2794 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2795 return Result;
2796 if ((Result =
2797 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2798 return Result;
2799
2800 // TODO: This comment may be enabled in the future to better match the
2801 // pattern for instruction selection.
2802 /* V2 = V1; */
2803 }
2804
2805 // It is recommended not to change the pattern comparison order for better
2806 // performance.
2807 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2808 return Result;
2809 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2810 return Result;
2811 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2812 return Result;
2813 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2814 return Result;
2815 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2816 return Result;
2817 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2818 return Result;
2819 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2820 Zeroable)))
2821 return Result;
2822 if ((Result =
2823 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2824 return Result;
2825 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2826 Subtarget)))
2827 return Result;
2828
2829 // canonicalize non cross-lane shuffle vector
2830 SmallVector<int> NewMask(Mask);
2831 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2832 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2833
2834 // FIXME: Handling the remaining cases earlier can degrade performance
2835 // in some situations. Further analysis is required to enable more
2836 // effective optimizations.
2837 if (V2.isUndef()) {
2838 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2839 V1, V2, DAG)))
2840 return Result;
2841 }
2842
2843 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2844 return NewShuffle;
2845 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2846 return Result;
2847
2848 return SDValue();
2849}
2850
2851SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2852 SelectionDAG &DAG) const {
2853 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2854 ArrayRef<int> OrigMask = SVOp->getMask();
2855 SDValue V1 = Op.getOperand(0);
2856 SDValue V2 = Op.getOperand(1);
2857 MVT VT = Op.getSimpleValueType();
2858 int NumElements = VT.getVectorNumElements();
2859 SDLoc DL(Op);
2860
2861 bool V1IsUndef = V1.isUndef();
2862 bool V2IsUndef = V2.isUndef();
2863 if (V1IsUndef && V2IsUndef)
2864 return DAG.getUNDEF(VT);
2865
2866 // When we create a shuffle node we put the UNDEF node to second operand,
2867 // but in some cases the first operand may be transformed to UNDEF.
2868 // In this case we should just commute the node.
2869 if (V1IsUndef)
2870 return DAG.getCommutedVectorShuffle(*SVOp);
2871
2872 // Check for non-undef masks pointing at an undef vector and make the masks
2873 // undef as well. This makes it easier to match the shuffle based solely on
2874 // the mask.
2875 if (V2IsUndef &&
2876 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2877 SmallVector<int, 8> NewMask(OrigMask);
2878 for (int &M : NewMask)
2879 if (M >= NumElements)
2880 M = -1;
2881 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2882 }
2883
2884 // Check for illegal shuffle mask element index values.
2885 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2886 (void)MaskUpperLimit;
2887 assert(llvm::all_of(OrigMask,
2888 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2889 "Out of bounds shuffle index");
2890
2891 // For each vector width, delegate to a specialized lowering routine.
2892 if (VT.is128BitVector())
2893 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2894
2895 if (VT.is256BitVector())
2896 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2897
2898 return SDValue();
2899}
2900
2901SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2902 SelectionDAG &DAG) const {
2903 // Custom lower to ensure the libcall return is passed in an FPR on hard
2904 // float ABIs.
2905 SDLoc DL(Op);
2906 MakeLibCallOptions CallOptions;
2907 SDValue Op0 = Op.getOperand(0);
2908 SDValue Chain = SDValue();
2909 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2910 SDValue Res;
2911 std::tie(Res, Chain) =
2912 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2913 if (Subtarget.is64Bit())
2914 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2915 return DAG.getBitcast(MVT::i32, Res);
2916}
2917
2918SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2919 SelectionDAG &DAG) const {
2920 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2921 // float ABIs.
2922 SDLoc DL(Op);
2923 MakeLibCallOptions CallOptions;
2924 SDValue Op0 = Op.getOperand(0);
2925 SDValue Chain = SDValue();
2926 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2927 DL, MVT::f32, Op0)
2928 : DAG.getBitcast(MVT::f32, Op0);
2929 SDValue Res;
2930 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2931 CallOptions, DL, Chain);
2932 return Res;
2933}
2934
2935SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2936 SelectionDAG &DAG) const {
2937 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2938 SDLoc DL(Op);
2939 MakeLibCallOptions CallOptions;
2940 RTLIB::Libcall LC =
2941 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2942 SDValue Res =
2943 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2944 if (Subtarget.is64Bit())
2945 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2946 return DAG.getBitcast(MVT::i32, Res);
2947}
2948
2949SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2950 SelectionDAG &DAG) const {
2951 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2952 MVT VT = Op.getSimpleValueType();
2953 SDLoc DL(Op);
2954 Op = DAG.getNode(
2955 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2956 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2957 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2958 DL, MVT::f32, Op)
2959 : DAG.getBitcast(MVT::f32, Op);
2960 if (VT != MVT::f32)
2961 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2962 return Res;
2963}
2964
2965// Lower BUILD_VECTOR as broadcast load (if possible).
2966// For example:
2967// %a = load i8, ptr %ptr
2968// %b = build_vector %a, %a, %a, %a
2969// is lowered to :
2970// (VLDREPL_B $a0, 0)
2972 const SDLoc &DL,
2973 SelectionDAG &DAG) {
2974 MVT VT = BVOp->getSimpleValueType(0);
2975 int NumOps = BVOp->getNumOperands();
2976
2977 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2978 "Unsupported vector type for broadcast.");
2979
2980 SDValue IdentitySrc;
2981 bool IsIdeneity = true;
2982
2983 for (int i = 0; i != NumOps; i++) {
2984 SDValue Op = BVOp->getOperand(i);
2985 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2986 IsIdeneity = false;
2987 break;
2988 }
2989 IdentitySrc = BVOp->getOperand(0);
2990 }
2991
2992 // make sure that this load is valid and only has one user.
2993 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2994 return SDValue();
2995
2996 auto *LN = cast<LoadSDNode>(IdentitySrc);
2997 auto ExtType = LN->getExtensionType();
2998
2999 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3000 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3001 // Indexed loads and stores are not supported on LoongArch.
3002 assert(LN->isUnindexed() && "Unexpected indexed load.");
3003
3004 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3005 // The offset operand of unindexed load is always undefined, so there is
3006 // no need to pass it to VLDREPL.
3007 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3008 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3009 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3010 return BCast;
3011 }
3012 return SDValue();
3013}
3014
3015// Sequentially insert elements from Ops into Vector, from low to high indices.
3016// Note: Ops can have fewer elements than Vector.
3018 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3019 EVT ResTy) {
3020 assert(Ops.size() <= ResTy.getVectorNumElements());
3021
3022 SDValue Op0 = Ops[0];
3023 if (!Op0.isUndef())
3024 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3025 for (unsigned i = 1; i < Ops.size(); ++i) {
3026 SDValue Opi = Ops[i];
3027 if (Opi.isUndef())
3028 continue;
3029 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3030 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3031 }
3032}
3033
3034// Build a ResTy subvector from Node, taking NumElts elements starting at index
3035// 'first'.
3037 SelectionDAG &DAG, SDLoc DL,
3038 const LoongArchSubtarget &Subtarget,
3039 EVT ResTy, unsigned first) {
3040 unsigned NumElts = ResTy.getVectorNumElements();
3041
3042 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3043
3044 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3045 Node->op_begin() + first + NumElts);
3046 SDValue Vector = DAG.getUNDEF(ResTy);
3047 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3048 return Vector;
3049}
3050
3051SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3052 SelectionDAG &DAG) const {
3053 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3054 MVT VT = Node->getSimpleValueType(0);
3055 EVT ResTy = Op->getValueType(0);
3056 unsigned NumElts = ResTy.getVectorNumElements();
3057 SDLoc DL(Op);
3058 APInt SplatValue, SplatUndef;
3059 unsigned SplatBitSize;
3060 bool HasAnyUndefs;
3061 bool IsConstant = false;
3062 bool UseSameConstant = true;
3063 SDValue ConstantValue;
3064 bool Is128Vec = ResTy.is128BitVector();
3065 bool Is256Vec = ResTy.is256BitVector();
3066
3067 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3068 (!Subtarget.hasExtLASX() || !Is256Vec))
3069 return SDValue();
3070
3071 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3072 return Result;
3073
3074 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3075 /*MinSplatBits=*/8) &&
3076 SplatBitSize <= 64) {
3077 // We can only cope with 8, 16, 32, or 64-bit elements.
3078 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3079 SplatBitSize != 64)
3080 return SDValue();
3081
3082 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3083 // We can only handle 64-bit elements that are within
3084 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3085 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3086 if (!SplatValue.isSignedIntN(10) &&
3087 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3088 return SDValue();
3089 if ((Is128Vec && ResTy == MVT::v4i32) ||
3090 (Is256Vec && ResTy == MVT::v8i32))
3091 return Op;
3092 }
3093
3094 EVT ViaVecTy;
3095
3096 switch (SplatBitSize) {
3097 default:
3098 return SDValue();
3099 case 8:
3100 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3101 break;
3102 case 16:
3103 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3104 break;
3105 case 32:
3106 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3107 break;
3108 case 64:
3109 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3110 break;
3111 }
3112
3113 // SelectionDAG::getConstant will promote SplatValue appropriately.
3114 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3115
3116 // Bitcast to the type we originally wanted.
3117 if (ViaVecTy != ResTy)
3118 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3119
3120 return Result;
3121 }
3122
3123 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3124 return Op;
3125
3126 for (unsigned i = 0; i < NumElts; ++i) {
3127 SDValue Opi = Node->getOperand(i);
3128 if (isIntOrFPConstant(Opi)) {
3129 IsConstant = true;
3130 if (!ConstantValue.getNode())
3131 ConstantValue = Opi;
3132 else if (ConstantValue != Opi)
3133 UseSameConstant = false;
3134 }
3135 }
3136
3137 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3138 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3139 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3140 for (unsigned i = 0; i < NumElts; ++i) {
3141 SDValue Opi = Node->getOperand(i);
3142 if (!isIntOrFPConstant(Opi))
3143 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3144 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3145 }
3146 return Result;
3147 }
3148
3149 if (!IsConstant) {
3150 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3151 // the sub-sequence of the vector and then broadcast the sub-sequence.
3152 //
3153 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3154 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3155 // generates worse code in some cases. This could be further optimized
3156 // with more consideration.
3158 BitVector UndefElements;
3159 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3160 UndefElements.count() == 0) {
3161 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3162 // because the high part can be simply treated as undef.
3163 SDValue Vector = DAG.getUNDEF(ResTy);
3164 EVT FillTy = Is256Vec
3166 : ResTy;
3167 SDValue FillVec =
3168 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3169
3170 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3171
3172 unsigned SeqLen = Sequence.size();
3173 unsigned SplatLen = NumElts / SeqLen;
3174 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3175 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3176
3177 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3178 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3179 if (SplatEltTy == MVT::i128)
3180 SplatTy = MVT::v4i64;
3181
3182 SDValue SplatVec;
3183 SDValue SrcVec = DAG.getBitcast(
3184 SplatTy,
3185 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3186 if (Is256Vec) {
3187 SplatVec =
3188 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3189 : LoongArchISD::XVREPLVE0,
3190 DL, SplatTy, SrcVec);
3191 } else {
3192 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3193 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3194 }
3195
3196 return DAG.getBitcast(ResTy, SplatVec);
3197 }
3198
3199 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3200 // using memory operations is much lower.
3201 //
3202 // For 256-bit vectors, normally split into two halves and concatenate.
3203 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3204 // one non-undef element, skip spliting to avoid a worse result.
3205 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3206 ResTy == MVT::v4f64) {
3207 unsigned NonUndefCount = 0;
3208 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3209 if (!Node->getOperand(i).isUndef()) {
3210 ++NonUndefCount;
3211 if (NonUndefCount > 1)
3212 break;
3213 }
3214 }
3215 if (NonUndefCount == 1)
3216 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3217 }
3218
3219 EVT VecTy =
3220 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3221 SDValue Vector =
3222 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3223
3224 if (Is128Vec)
3225 return Vector;
3226
3227 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3228 VecTy, NumElts / 2);
3229
3230 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3231 }
3232
3233 return SDValue();
3234}
3235
3236SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3237 SelectionDAG &DAG) const {
3238 SDLoc DL(Op);
3239 MVT ResVT = Op.getSimpleValueType();
3240 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3241
3242 unsigned NumOperands = Op.getNumOperands();
3243 unsigned NumFreezeUndef = 0;
3244 unsigned NumZero = 0;
3245 unsigned NumNonZero = 0;
3246 unsigned NonZeros = 0;
3247 SmallSet<SDValue, 4> Undefs;
3248 for (unsigned i = 0; i != NumOperands; ++i) {
3249 SDValue SubVec = Op.getOperand(i);
3250 if (SubVec.isUndef())
3251 continue;
3252 if (ISD::isFreezeUndef(SubVec.getNode())) {
3253 // If the freeze(undef) has multiple uses then we must fold to zero.
3254 if (SubVec.hasOneUse()) {
3255 ++NumFreezeUndef;
3256 } else {
3257 ++NumZero;
3258 Undefs.insert(SubVec);
3259 }
3260 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3261 ++NumZero;
3262 else {
3263 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3264 NonZeros |= 1 << i;
3265 ++NumNonZero;
3266 }
3267 }
3268
3269 // If we have more than 2 non-zeros, build each half separately.
3270 if (NumNonZero > 2) {
3271 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3272 ArrayRef<SDUse> Ops = Op->ops();
3273 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3274 Ops.slice(0, NumOperands / 2));
3275 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3276 Ops.slice(NumOperands / 2));
3277 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3278 }
3279
3280 // Otherwise, build it up through insert_subvectors.
3281 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3282 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3283 : DAG.getUNDEF(ResVT));
3284
3285 // Replace Undef operands with ZeroVector.
3286 for (SDValue U : Undefs)
3287 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3288
3289 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3290 unsigned NumSubElems = SubVT.getVectorNumElements();
3291 for (unsigned i = 0; i != NumOperands; ++i) {
3292 if ((NonZeros & (1 << i)) == 0)
3293 continue;
3294
3295 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3296 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3297 }
3298
3299 return Vec;
3300}
3301
3302SDValue
3303LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3304 SelectionDAG &DAG) const {
3305 MVT EltVT = Op.getSimpleValueType();
3306 SDValue Vec = Op->getOperand(0);
3307 EVT VecTy = Vec->getValueType(0);
3308 SDValue Idx = Op->getOperand(1);
3309 SDLoc DL(Op);
3310 MVT GRLenVT = Subtarget.getGRLenVT();
3311
3312 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3313
3314 if (isa<ConstantSDNode>(Idx))
3315 return Op;
3316
3317 switch (VecTy.getSimpleVT().SimpleTy) {
3318 default:
3319 llvm_unreachable("Unexpected type");
3320 case MVT::v32i8:
3321 case MVT::v16i16:
3322 case MVT::v4i64:
3323 case MVT::v4f64: {
3324 // Extract the high half subvector and place it to the low half of a new
3325 // vector. It doesn't matter what the high half of the new vector is.
3326 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3327 SDValue VecHi =
3328 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3329 SDValue TmpVec =
3330 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3331 VecHi, DAG.getConstant(0, DL, GRLenVT));
3332
3333 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3334 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3335 // desired element.
3336 SDValue IdxCp =
3337 Subtarget.is64Bit()
3338 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3339 : DAG.getBitcast(MVT::f32, Idx);
3340 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3341 SDValue MaskVec =
3342 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3343 SDValue ResVec =
3344 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3345
3346 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3347 DAG.getConstant(0, DL, GRLenVT));
3348 }
3349 case MVT::v8i32:
3350 case MVT::v8f32: {
3351 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3352 SDValue SplatValue =
3353 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3354
3355 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3356 DAG.getConstant(0, DL, GRLenVT));
3357 }
3358 }
3359}
3360
3361SDValue
3362LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3363 SelectionDAG &DAG) const {
3364 MVT VT = Op.getSimpleValueType();
3365 MVT EltVT = VT.getVectorElementType();
3366 unsigned NumElts = VT.getVectorNumElements();
3367 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3368 SDLoc DL(Op);
3369 SDValue Op0 = Op.getOperand(0);
3370 SDValue Op1 = Op.getOperand(1);
3371 SDValue Op2 = Op.getOperand(2);
3372
3373 if (isa<ConstantSDNode>(Op2))
3374 return Op;
3375
3376 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3377 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3378
3379 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3380 return SDValue();
3381
3382 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3383 SmallVector<SDValue, 32> RawIndices;
3384 SDValue SplatIdx;
3385 SDValue Indices;
3386
3387 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3388 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3389 for (unsigned i = 0; i < NumElts; ++i) {
3390 RawIndices.push_back(Op2);
3391 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3392 }
3393 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3394 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3395
3396 RawIndices.clear();
3397 for (unsigned i = 0; i < NumElts; ++i) {
3398 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3399 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3400 }
3401 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3402 Indices = DAG.getBitcast(IdxVTy, Indices);
3403 } else {
3404 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3405
3406 for (unsigned i = 0; i < NumElts; ++i)
3407 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3408 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3409 }
3410
3411 // insert vec, elt, idx
3412 // =>
3413 // select (splatidx == {0,1,2...}) ? splatelt : vec
3414 SDValue SelectCC =
3415 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3416 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3417}
3418
3419SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3420 SelectionDAG &DAG) const {
3421 SDLoc DL(Op);
3422 SyncScope::ID FenceSSID =
3423 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3424
3425 // singlethread fences only synchronize with signal handlers on the same
3426 // thread and thus only need to preserve instruction order, not actually
3427 // enforce memory ordering.
3428 if (FenceSSID == SyncScope::SingleThread)
3429 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3430 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3431
3432 return Op;
3433}
3434
3435SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3436 SelectionDAG &DAG) const {
3437
3438 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3439 DAG.getContext()->emitError(
3440 "On LA64, only 64-bit registers can be written.");
3441 return Op.getOperand(0);
3442 }
3443
3444 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3445 DAG.getContext()->emitError(
3446 "On LA32, only 32-bit registers can be written.");
3447 return Op.getOperand(0);
3448 }
3449
3450 return Op;
3451}
3452
3453SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3454 SelectionDAG &DAG) const {
3455 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3456 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3457 "be a constant integer");
3458 return SDValue();
3459 }
3460
3461 MachineFunction &MF = DAG.getMachineFunction();
3463 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3464 EVT VT = Op.getValueType();
3465 SDLoc DL(Op);
3466 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3467 unsigned Depth = Op.getConstantOperandVal(0);
3468 int GRLenInBytes = Subtarget.getGRLen() / 8;
3469
3470 while (Depth--) {
3471 int Offset = -(GRLenInBytes * 2);
3472 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3473 DAG.getSignedConstant(Offset, DL, VT));
3474 FrameAddr =
3475 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3476 }
3477 return FrameAddr;
3478}
3479
3480SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3481 SelectionDAG &DAG) const {
3482 // Currently only support lowering return address for current frame.
3483 if (Op.getConstantOperandVal(0) != 0) {
3484 DAG.getContext()->emitError(
3485 "return address can only be determined for the current frame");
3486 return SDValue();
3487 }
3488
3489 MachineFunction &MF = DAG.getMachineFunction();
3491 MVT GRLenVT = Subtarget.getGRLenVT();
3492
3493 // Return the value of the return address register, marking it an implicit
3494 // live-in.
3495 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3496 getRegClassFor(GRLenVT));
3497 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3498}
3499
3500SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3501 SelectionDAG &DAG) const {
3502 MachineFunction &MF = DAG.getMachineFunction();
3503 auto Size = Subtarget.getGRLen() / 8;
3504 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3505 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3506}
3507
3508SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3509 SelectionDAG &DAG) const {
3510 MachineFunction &MF = DAG.getMachineFunction();
3511 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3512
3513 SDLoc DL(Op);
3514 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3516
3517 // vastart just stores the address of the VarArgsFrameIndex slot into the
3518 // memory location argument.
3519 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3520 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3521 MachinePointerInfo(SV));
3522}
3523
3524SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3525 SelectionDAG &DAG) const {
3526 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3527 !Subtarget.hasBasicD() && "unexpected target features");
3528
3529 SDLoc DL(Op);
3530 SDValue Op0 = Op.getOperand(0);
3531 if (Op0->getOpcode() == ISD::AND) {
3532 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3533 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3534 return Op;
3535 }
3536
3537 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3538 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3539 Op0.getConstantOperandVal(2) == UINT64_C(0))
3540 return Op;
3541
3542 if (Op0.getOpcode() == ISD::AssertZext &&
3543 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3544 return Op;
3545
3546 EVT OpVT = Op0.getValueType();
3547 EVT RetVT = Op.getValueType();
3548 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3549 MakeLibCallOptions CallOptions;
3550 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3551 SDValue Chain = SDValue();
3553 std::tie(Result, Chain) =
3554 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3555 return Result;
3556}
3557
3558SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3561 !Subtarget.hasBasicD() && "unexpected target features");
3562
3563 SDLoc DL(Op);
3564 SDValue Op0 = Op.getOperand(0);
3565
3566 if ((Op0.getOpcode() == ISD::AssertSext ||
3568 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3569 return Op;
3570
3571 EVT OpVT = Op0.getValueType();
3572 EVT RetVT = Op.getValueType();
3573 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3574 MakeLibCallOptions CallOptions;
3575 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3576 SDValue Chain = SDValue();
3578 std::tie(Result, Chain) =
3579 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3580 return Result;
3581}
3582
3583SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3584 SelectionDAG &DAG) const {
3585
3586 SDLoc DL(Op);
3587 EVT VT = Op.getValueType();
3588 SDValue Op0 = Op.getOperand(0);
3589 EVT Op0VT = Op0.getValueType();
3590
3591 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3592 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3593 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3594 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3595 }
3596 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3597 SDValue Lo, Hi;
3598 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3599 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3600 }
3601 return Op;
3602}
3603
3604SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3605 SelectionDAG &DAG) const {
3606
3607 SDLoc DL(Op);
3608 SDValue Op0 = Op.getOperand(0);
3609
3610 if (Op0.getValueType() == MVT::f16)
3611 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3612
3613 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3614 !Subtarget.hasBasicD()) {
3615 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3616 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3617 }
3618
3619 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3620 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3621 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3622}
3623
3625 SelectionDAG &DAG, unsigned Flags) {
3626 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3627}
3628
3630 SelectionDAG &DAG, unsigned Flags) {
3631 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3632 Flags);
3633}
3634
3636 SelectionDAG &DAG, unsigned Flags) {
3637 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3638 N->getOffset(), Flags);
3639}
3640
3642 SelectionDAG &DAG, unsigned Flags) {
3643 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3644}
3645
3646template <class NodeTy>
3647SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3649 bool IsLocal) const {
3650 SDLoc DL(N);
3651 EVT Ty = getPointerTy(DAG.getDataLayout());
3652 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3653 SDValue Load;
3654
3655 switch (M) {
3656 default:
3657 report_fatal_error("Unsupported code model");
3658
3659 case CodeModel::Large: {
3660 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3661
3662 // This is not actually used, but is necessary for successfully matching
3663 // the PseudoLA_*_LARGE nodes.
3664 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3665 if (IsLocal) {
3666 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3667 // eventually becomes the desired 5-insn code sequence.
3668 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3669 Tmp, Addr),
3670 0);
3671 } else {
3672 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3673 // eventually becomes the desired 5-insn code sequence.
3674 Load = SDValue(
3675 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3676 0);
3677 }
3678 break;
3679 }
3680
3681 case CodeModel::Small:
3682 case CodeModel::Medium:
3683 if (IsLocal) {
3684 // This generates the pattern (PseudoLA_PCREL sym), which
3685 //
3686 // for la32r expands to:
3687 // (addi.w (pcaddu12i %pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3688 //
3689 // for la32s and la64 expands to:
3690 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3691 Load = SDValue(
3692 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3693 } else {
3694 // This generates the pattern (PseudoLA_GOT sym), which
3695 //
3696 // for la32r expands to:
3697 // (ld.w (pcaddu12i %got_pcadd_hi20(sym)) %pcadd_lo12(.Lpcadd_hi)).
3698 //
3699 // for la32s and la64 expands to:
3700 // (ld.w/d (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3701 Load =
3702 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3703 }
3704 }
3705
3706 if (!IsLocal) {
3707 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3708 MachineFunction &MF = DAG.getMachineFunction();
3709 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3713 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3714 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3715 }
3716
3717 return Load;
3718}
3719
3720SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3721 SelectionDAG &DAG) const {
3722 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3723 DAG.getTarget().getCodeModel());
3724}
3725
3726SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3727 SelectionDAG &DAG) const {
3728 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3729 DAG.getTarget().getCodeModel());
3730}
3731
3732SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3733 SelectionDAG &DAG) const {
3734 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3735 DAG.getTarget().getCodeModel());
3736}
3737
3738SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3739 SelectionDAG &DAG) const {
3740 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3741 assert(N->getOffset() == 0 && "unexpected offset in global node");
3742 auto CM = DAG.getTarget().getCodeModel();
3743 const GlobalValue *GV = N->getGlobal();
3744
3745 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3746 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3747 CM = *GCM;
3748 }
3749
3750 return getAddr(N, DAG, CM, GV->isDSOLocal());
3751}
3752
3753SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3754 SelectionDAG &DAG,
3755 unsigned Opc, bool UseGOT,
3756 bool Large) const {
3757 SDLoc DL(N);
3758 EVT Ty = getPointerTy(DAG.getDataLayout());
3759 MVT GRLenVT = Subtarget.getGRLenVT();
3760
3761 // This is not actually used, but is necessary for successfully matching the
3762 // PseudoLA_*_LARGE nodes.
3763 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3764 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3765
3766 // Only IE needs an extra argument for large code model.
3767 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3768 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3769 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3770
3771 // If it is LE for normal/medium code model, the add tp operation will occur
3772 // during the pseudo-instruction expansion.
3773 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3774 return Offset;
3775
3776 if (UseGOT) {
3777 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3778 MachineFunction &MF = DAG.getMachineFunction();
3779 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3783 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3784 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3785 }
3786
3787 // Add the thread pointer.
3788 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3789 DAG.getRegister(LoongArch::R2, GRLenVT));
3790}
3791
3792SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3793 SelectionDAG &DAG,
3794 unsigned Opc,
3795 bool Large) const {
3796 SDLoc DL(N);
3797 EVT Ty = getPointerTy(DAG.getDataLayout());
3798 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3799
3800 // This is not actually used, but is necessary for successfully matching the
3801 // PseudoLA_*_LARGE nodes.
3802 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3803
3804 // Use a PC-relative addressing mode to access the dynamic GOT address.
3805 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3806 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3807 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3808
3809 // Prepare argument list to generate call.
3811 Args.emplace_back(Load, CallTy);
3812
3813 // Setup call to __tls_get_addr.
3814 TargetLowering::CallLoweringInfo CLI(DAG);
3815 CLI.setDebugLoc(DL)
3816 .setChain(DAG.getEntryNode())
3817 .setLibCallee(CallingConv::C, CallTy,
3818 DAG.getExternalSymbol("__tls_get_addr", Ty),
3819 std::move(Args));
3820
3821 return LowerCallTo(CLI).first;
3822}
3823
3824SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3825 SelectionDAG &DAG, unsigned Opc,
3826 bool Large) const {
3827 SDLoc DL(N);
3828 EVT Ty = getPointerTy(DAG.getDataLayout());
3829 const GlobalValue *GV = N->getGlobal();
3830
3831 // This is not actually used, but is necessary for successfully matching the
3832 // PseudoLA_*_LARGE nodes.
3833 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3834
3835 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3836 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3837 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3838 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3839 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3840}
3841
3842SDValue
3843LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3844 SelectionDAG &DAG) const {
3847 report_fatal_error("In GHC calling convention TLS is not supported");
3848
3849 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3850 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3851
3852 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3853 assert(N->getOffset() == 0 && "unexpected offset in global node");
3854
3855 if (DAG.getTarget().useEmulatedTLS())
3856 reportFatalUsageError("the emulated TLS is prohibited");
3857
3858 bool IsDesc = DAG.getTarget().useTLSDESC();
3859
3860 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3862 // In this model, application code calls the dynamic linker function
3863 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3864 // runtime.
3865 if (!IsDesc)
3866 return getDynamicTLSAddr(N, DAG,
3867 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3868 : LoongArch::PseudoLA_TLS_GD,
3869 Large);
3870 break;
3872 // Same as GeneralDynamic, except for assembly modifiers and relocation
3873 // records.
3874 if (!IsDesc)
3875 return getDynamicTLSAddr(N, DAG,
3876 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3877 : LoongArch::PseudoLA_TLS_LD,
3878 Large);
3879 break;
3881 // This model uses the GOT to resolve TLS offsets.
3882 return getStaticTLSAddr(N, DAG,
3883 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3884 : LoongArch::PseudoLA_TLS_IE,
3885 /*UseGOT=*/true, Large);
3887 // This model is used when static linking as the TLS offsets are resolved
3888 // during program linking.
3889 //
3890 // This node doesn't need an extra argument for the large code model.
3891 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3892 /*UseGOT=*/false, Large);
3893 }
3894
3895 return getTLSDescAddr(N, DAG,
3896 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3897 : LoongArch::PseudoLA_TLS_DESC,
3898 Large);
3899}
3900
3901template <unsigned N>
3903 SelectionDAG &DAG, bool IsSigned = false) {
3904 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3905 // Check the ImmArg.
3906 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3907 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3908 DAG.getContext()->emitError(Op->getOperationName(0) +
3909 ": argument out of range.");
3910 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3911 }
3912 return SDValue();
3913}
3914
3915SDValue
3916LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3917 SelectionDAG &DAG) const {
3918 switch (Op.getConstantOperandVal(0)) {
3919 default:
3920 return SDValue(); // Don't custom lower most intrinsics.
3921 case Intrinsic::thread_pointer: {
3922 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3923 return DAG.getRegister(LoongArch::R2, PtrVT);
3924 }
3925 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3926 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3927 case Intrinsic::loongarch_lsx_vreplvei_d:
3928 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3929 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3930 case Intrinsic::loongarch_lsx_vreplvei_w:
3931 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3932 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3933 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3934 case Intrinsic::loongarch_lasx_xvpickve_d:
3935 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3936 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3937 case Intrinsic::loongarch_lasx_xvinsve0_d:
3938 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3939 case Intrinsic::loongarch_lsx_vsat_b:
3940 case Intrinsic::loongarch_lsx_vsat_bu:
3941 case Intrinsic::loongarch_lsx_vrotri_b:
3942 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3943 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3944 case Intrinsic::loongarch_lsx_vsrlri_b:
3945 case Intrinsic::loongarch_lsx_vsrari_b:
3946 case Intrinsic::loongarch_lsx_vreplvei_h:
3947 case Intrinsic::loongarch_lasx_xvsat_b:
3948 case Intrinsic::loongarch_lasx_xvsat_bu:
3949 case Intrinsic::loongarch_lasx_xvrotri_b:
3950 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3951 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3952 case Intrinsic::loongarch_lasx_xvsrlri_b:
3953 case Intrinsic::loongarch_lasx_xvsrari_b:
3954 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3955 case Intrinsic::loongarch_lasx_xvpickve_w:
3956 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3957 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3958 case Intrinsic::loongarch_lasx_xvinsve0_w:
3959 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3960 case Intrinsic::loongarch_lsx_vsat_h:
3961 case Intrinsic::loongarch_lsx_vsat_hu:
3962 case Intrinsic::loongarch_lsx_vrotri_h:
3963 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3964 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3965 case Intrinsic::loongarch_lsx_vsrlri_h:
3966 case Intrinsic::loongarch_lsx_vsrari_h:
3967 case Intrinsic::loongarch_lsx_vreplvei_b:
3968 case Intrinsic::loongarch_lasx_xvsat_h:
3969 case Intrinsic::loongarch_lasx_xvsat_hu:
3970 case Intrinsic::loongarch_lasx_xvrotri_h:
3971 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3972 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3973 case Intrinsic::loongarch_lasx_xvsrlri_h:
3974 case Intrinsic::loongarch_lasx_xvsrari_h:
3975 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3976 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3977 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3978 case Intrinsic::loongarch_lsx_vsrani_b_h:
3979 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3980 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3981 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3982 case Intrinsic::loongarch_lsx_vssrani_b_h:
3983 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3984 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3985 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3986 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3987 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3988 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3989 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3990 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3991 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3992 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3993 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3994 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3995 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3996 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3997 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3998 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3999 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
4000 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
4001 return checkIntrinsicImmArg<4>(Op, 3, DAG);
4002 case Intrinsic::loongarch_lsx_vsat_w:
4003 case Intrinsic::loongarch_lsx_vsat_wu:
4004 case Intrinsic::loongarch_lsx_vrotri_w:
4005 case Intrinsic::loongarch_lsx_vsllwil_d_w:
4006 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
4007 case Intrinsic::loongarch_lsx_vsrlri_w:
4008 case Intrinsic::loongarch_lsx_vsrari_w:
4009 case Intrinsic::loongarch_lsx_vslei_bu:
4010 case Intrinsic::loongarch_lsx_vslei_hu:
4011 case Intrinsic::loongarch_lsx_vslei_wu:
4012 case Intrinsic::loongarch_lsx_vslei_du:
4013 case Intrinsic::loongarch_lsx_vslti_bu:
4014 case Intrinsic::loongarch_lsx_vslti_hu:
4015 case Intrinsic::loongarch_lsx_vslti_wu:
4016 case Intrinsic::loongarch_lsx_vslti_du:
4017 case Intrinsic::loongarch_lsx_vbsll_v:
4018 case Intrinsic::loongarch_lsx_vbsrl_v:
4019 case Intrinsic::loongarch_lasx_xvsat_w:
4020 case Intrinsic::loongarch_lasx_xvsat_wu:
4021 case Intrinsic::loongarch_lasx_xvrotri_w:
4022 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4023 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4024 case Intrinsic::loongarch_lasx_xvsrlri_w:
4025 case Intrinsic::loongarch_lasx_xvsrari_w:
4026 case Intrinsic::loongarch_lasx_xvslei_bu:
4027 case Intrinsic::loongarch_lasx_xvslei_hu:
4028 case Intrinsic::loongarch_lasx_xvslei_wu:
4029 case Intrinsic::loongarch_lasx_xvslei_du:
4030 case Intrinsic::loongarch_lasx_xvslti_bu:
4031 case Intrinsic::loongarch_lasx_xvslti_hu:
4032 case Intrinsic::loongarch_lasx_xvslti_wu:
4033 case Intrinsic::loongarch_lasx_xvslti_du:
4034 case Intrinsic::loongarch_lasx_xvbsll_v:
4035 case Intrinsic::loongarch_lasx_xvbsrl_v:
4036 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4037 case Intrinsic::loongarch_lsx_vseqi_b:
4038 case Intrinsic::loongarch_lsx_vseqi_h:
4039 case Intrinsic::loongarch_lsx_vseqi_w:
4040 case Intrinsic::loongarch_lsx_vseqi_d:
4041 case Intrinsic::loongarch_lsx_vslei_b:
4042 case Intrinsic::loongarch_lsx_vslei_h:
4043 case Intrinsic::loongarch_lsx_vslei_w:
4044 case Intrinsic::loongarch_lsx_vslei_d:
4045 case Intrinsic::loongarch_lsx_vslti_b:
4046 case Intrinsic::loongarch_lsx_vslti_h:
4047 case Intrinsic::loongarch_lsx_vslti_w:
4048 case Intrinsic::loongarch_lsx_vslti_d:
4049 case Intrinsic::loongarch_lasx_xvseqi_b:
4050 case Intrinsic::loongarch_lasx_xvseqi_h:
4051 case Intrinsic::loongarch_lasx_xvseqi_w:
4052 case Intrinsic::loongarch_lasx_xvseqi_d:
4053 case Intrinsic::loongarch_lasx_xvslei_b:
4054 case Intrinsic::loongarch_lasx_xvslei_h:
4055 case Intrinsic::loongarch_lasx_xvslei_w:
4056 case Intrinsic::loongarch_lasx_xvslei_d:
4057 case Intrinsic::loongarch_lasx_xvslti_b:
4058 case Intrinsic::loongarch_lasx_xvslti_h:
4059 case Intrinsic::loongarch_lasx_xvslti_w:
4060 case Intrinsic::loongarch_lasx_xvslti_d:
4061 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4062 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4063 case Intrinsic::loongarch_lsx_vsrani_h_w:
4064 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4065 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4066 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4067 case Intrinsic::loongarch_lsx_vssrani_h_w:
4068 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4069 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4070 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4071 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4072 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4073 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4074 case Intrinsic::loongarch_lsx_vfrstpi_b:
4075 case Intrinsic::loongarch_lsx_vfrstpi_h:
4076 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4077 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4078 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4079 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4080 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4081 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4082 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4083 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4084 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4085 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4086 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4087 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4088 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4089 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4090 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4091 case Intrinsic::loongarch_lsx_vsat_d:
4092 case Intrinsic::loongarch_lsx_vsat_du:
4093 case Intrinsic::loongarch_lsx_vrotri_d:
4094 case Intrinsic::loongarch_lsx_vsrlri_d:
4095 case Intrinsic::loongarch_lsx_vsrari_d:
4096 case Intrinsic::loongarch_lasx_xvsat_d:
4097 case Intrinsic::loongarch_lasx_xvsat_du:
4098 case Intrinsic::loongarch_lasx_xvrotri_d:
4099 case Intrinsic::loongarch_lasx_xvsrlri_d:
4100 case Intrinsic::loongarch_lasx_xvsrari_d:
4101 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4102 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4103 case Intrinsic::loongarch_lsx_vsrani_w_d:
4104 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4105 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4106 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4107 case Intrinsic::loongarch_lsx_vssrani_w_d:
4108 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4109 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4110 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4111 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4112 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4113 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4114 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4115 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4116 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4117 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4118 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4119 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4120 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4121 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4122 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4123 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4124 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4125 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4126 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4127 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4128 case Intrinsic::loongarch_lsx_vsrani_d_q:
4129 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4130 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4131 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4132 case Intrinsic::loongarch_lsx_vssrani_d_q:
4133 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4134 case Intrinsic::loongarch_lsx_vssrani_du_q:
4135 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4136 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4137 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4138 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4139 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4140 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4141 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4142 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4143 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4144 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4145 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4146 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4147 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4148 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4149 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4150 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4151 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4152 case Intrinsic::loongarch_lsx_vnori_b:
4153 case Intrinsic::loongarch_lsx_vshuf4i_b:
4154 case Intrinsic::loongarch_lsx_vshuf4i_h:
4155 case Intrinsic::loongarch_lsx_vshuf4i_w:
4156 case Intrinsic::loongarch_lasx_xvnori_b:
4157 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4158 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4159 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4160 case Intrinsic::loongarch_lasx_xvpermi_d:
4161 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4162 case Intrinsic::loongarch_lsx_vshuf4i_d:
4163 case Intrinsic::loongarch_lsx_vpermi_w:
4164 case Intrinsic::loongarch_lsx_vbitseli_b:
4165 case Intrinsic::loongarch_lsx_vextrins_b:
4166 case Intrinsic::loongarch_lsx_vextrins_h:
4167 case Intrinsic::loongarch_lsx_vextrins_w:
4168 case Intrinsic::loongarch_lsx_vextrins_d:
4169 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4170 case Intrinsic::loongarch_lasx_xvpermi_w:
4171 case Intrinsic::loongarch_lasx_xvpermi_q:
4172 case Intrinsic::loongarch_lasx_xvbitseli_b:
4173 case Intrinsic::loongarch_lasx_xvextrins_b:
4174 case Intrinsic::loongarch_lasx_xvextrins_h:
4175 case Intrinsic::loongarch_lasx_xvextrins_w:
4176 case Intrinsic::loongarch_lasx_xvextrins_d:
4177 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4178 case Intrinsic::loongarch_lsx_vrepli_b:
4179 case Intrinsic::loongarch_lsx_vrepli_h:
4180 case Intrinsic::loongarch_lsx_vrepli_w:
4181 case Intrinsic::loongarch_lsx_vrepli_d:
4182 case Intrinsic::loongarch_lasx_xvrepli_b:
4183 case Intrinsic::loongarch_lasx_xvrepli_h:
4184 case Intrinsic::loongarch_lasx_xvrepli_w:
4185 case Intrinsic::loongarch_lasx_xvrepli_d:
4186 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4187 case Intrinsic::loongarch_lsx_vldi:
4188 case Intrinsic::loongarch_lasx_xvldi:
4189 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4190 }
4191}
4192
4193// Helper function that emits error message for intrinsics with chain and return
4194// merge values of a UNDEF and the chain.
4196 StringRef ErrorMsg,
4197 SelectionDAG &DAG) {
4198 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4199 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4200 SDLoc(Op));
4201}
4202
4203SDValue
4204LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4205 SelectionDAG &DAG) const {
4206 SDLoc DL(Op);
4207 MVT GRLenVT = Subtarget.getGRLenVT();
4208 EVT VT = Op.getValueType();
4209 SDValue Chain = Op.getOperand(0);
4210 const StringRef ErrorMsgOOR = "argument out of range";
4211 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4212 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4213
4214 switch (Op.getConstantOperandVal(1)) {
4215 default:
4216 return Op;
4217 case Intrinsic::loongarch_crc_w_b_w:
4218 case Intrinsic::loongarch_crc_w_h_w:
4219 case Intrinsic::loongarch_crc_w_w_w:
4220 case Intrinsic::loongarch_crc_w_d_w:
4221 case Intrinsic::loongarch_crcc_w_b_w:
4222 case Intrinsic::loongarch_crcc_w_h_w:
4223 case Intrinsic::loongarch_crcc_w_w_w:
4224 case Intrinsic::loongarch_crcc_w_d_w:
4225 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4226 case Intrinsic::loongarch_csrrd_w:
4227 case Intrinsic::loongarch_csrrd_d: {
4228 unsigned Imm = Op.getConstantOperandVal(2);
4229 return !isUInt<14>(Imm)
4230 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4231 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4232 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4233 }
4234 case Intrinsic::loongarch_csrwr_w:
4235 case Intrinsic::loongarch_csrwr_d: {
4236 unsigned Imm = Op.getConstantOperandVal(3);
4237 return !isUInt<14>(Imm)
4238 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4239 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4240 {Chain, Op.getOperand(2),
4241 DAG.getConstant(Imm, DL, GRLenVT)});
4242 }
4243 case Intrinsic::loongarch_csrxchg_w:
4244 case Intrinsic::loongarch_csrxchg_d: {
4245 unsigned Imm = Op.getConstantOperandVal(4);
4246 return !isUInt<14>(Imm)
4247 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4248 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4249 {Chain, Op.getOperand(2), Op.getOperand(3),
4250 DAG.getConstant(Imm, DL, GRLenVT)});
4251 }
4252 case Intrinsic::loongarch_iocsrrd_d: {
4253 return DAG.getNode(
4254 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4255 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4256 }
4257#define IOCSRRD_CASE(NAME, NODE) \
4258 case Intrinsic::loongarch_##NAME: { \
4259 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4260 {Chain, Op.getOperand(2)}); \
4261 }
4262 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4263 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4264 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4265#undef IOCSRRD_CASE
4266 case Intrinsic::loongarch_cpucfg: {
4267 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4268 {Chain, Op.getOperand(2)});
4269 }
4270 case Intrinsic::loongarch_lddir_d: {
4271 unsigned Imm = Op.getConstantOperandVal(3);
4272 return !isUInt<8>(Imm)
4273 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4274 : Op;
4275 }
4276 case Intrinsic::loongarch_movfcsr2gr: {
4277 if (!Subtarget.hasBasicF())
4278 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4279 unsigned Imm = Op.getConstantOperandVal(2);
4280 return !isUInt<2>(Imm)
4281 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4282 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4283 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4284 }
4285 case Intrinsic::loongarch_lsx_vld:
4286 case Intrinsic::loongarch_lsx_vldrepl_b:
4287 case Intrinsic::loongarch_lasx_xvld:
4288 case Intrinsic::loongarch_lasx_xvldrepl_b:
4289 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4290 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4291 : SDValue();
4292 case Intrinsic::loongarch_lsx_vldrepl_h:
4293 case Intrinsic::loongarch_lasx_xvldrepl_h:
4294 return !isShiftedInt<11, 1>(
4295 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4297 Op, "argument out of range or not a multiple of 2", DAG)
4298 : SDValue();
4299 case Intrinsic::loongarch_lsx_vldrepl_w:
4300 case Intrinsic::loongarch_lasx_xvldrepl_w:
4301 return !isShiftedInt<10, 2>(
4302 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4304 Op, "argument out of range or not a multiple of 4", DAG)
4305 : SDValue();
4306 case Intrinsic::loongarch_lsx_vldrepl_d:
4307 case Intrinsic::loongarch_lasx_xvldrepl_d:
4308 return !isShiftedInt<9, 3>(
4309 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4311 Op, "argument out of range or not a multiple of 8", DAG)
4312 : SDValue();
4313 }
4314}
4315
4316// Helper function that emits error message for intrinsics with void return
4317// value and return the chain.
4319 SelectionDAG &DAG) {
4320
4321 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4322 return Op.getOperand(0);
4323}
4324
4325SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4326 SelectionDAG &DAG) const {
4327 SDLoc DL(Op);
4328 MVT GRLenVT = Subtarget.getGRLenVT();
4329 SDValue Chain = Op.getOperand(0);
4330 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4331 SDValue Op2 = Op.getOperand(2);
4332 const StringRef ErrorMsgOOR = "argument out of range";
4333 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4334 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4335 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4336
4337 switch (IntrinsicEnum) {
4338 default:
4339 // TODO: Add more Intrinsics.
4340 return SDValue();
4341 case Intrinsic::loongarch_cacop_d:
4342 case Intrinsic::loongarch_cacop_w: {
4343 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4344 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4345 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4346 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4347 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4348 unsigned Imm1 = Op2->getAsZExtVal();
4349 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4350 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4351 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4352 return Op;
4353 }
4354 case Intrinsic::loongarch_dbar: {
4355 unsigned Imm = Op2->getAsZExtVal();
4356 return !isUInt<15>(Imm)
4357 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4358 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4359 DAG.getConstant(Imm, DL, GRLenVT));
4360 }
4361 case Intrinsic::loongarch_ibar: {
4362 unsigned Imm = Op2->getAsZExtVal();
4363 return !isUInt<15>(Imm)
4364 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4365 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4366 DAG.getConstant(Imm, DL, GRLenVT));
4367 }
4368 case Intrinsic::loongarch_break: {
4369 unsigned Imm = Op2->getAsZExtVal();
4370 return !isUInt<15>(Imm)
4371 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4372 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4373 DAG.getConstant(Imm, DL, GRLenVT));
4374 }
4375 case Intrinsic::loongarch_movgr2fcsr: {
4376 if (!Subtarget.hasBasicF())
4377 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4378 unsigned Imm = Op2->getAsZExtVal();
4379 return !isUInt<2>(Imm)
4380 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4381 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4382 DAG.getConstant(Imm, DL, GRLenVT),
4383 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4384 Op.getOperand(3)));
4385 }
4386 case Intrinsic::loongarch_syscall: {
4387 unsigned Imm = Op2->getAsZExtVal();
4388 return !isUInt<15>(Imm)
4389 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4390 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4391 DAG.getConstant(Imm, DL, GRLenVT));
4392 }
4393#define IOCSRWR_CASE(NAME, NODE) \
4394 case Intrinsic::loongarch_##NAME: { \
4395 SDValue Op3 = Op.getOperand(3); \
4396 return Subtarget.is64Bit() \
4397 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4398 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4399 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4400 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4401 Op3); \
4402 }
4403 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4404 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4405 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4406#undef IOCSRWR_CASE
4407 case Intrinsic::loongarch_iocsrwr_d: {
4408 return !Subtarget.is64Bit()
4409 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4410 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4411 Op2,
4412 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4413 Op.getOperand(3)));
4414 }
4415#define ASRT_LE_GT_CASE(NAME) \
4416 case Intrinsic::loongarch_##NAME: { \
4417 return !Subtarget.is64Bit() \
4418 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4419 : Op; \
4420 }
4421 ASRT_LE_GT_CASE(asrtle_d)
4422 ASRT_LE_GT_CASE(asrtgt_d)
4423#undef ASRT_LE_GT_CASE
4424 case Intrinsic::loongarch_ldpte_d: {
4425 unsigned Imm = Op.getConstantOperandVal(3);
4426 return !Subtarget.is64Bit()
4427 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4428 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4429 : Op;
4430 }
4431 case Intrinsic::loongarch_lsx_vst:
4432 case Intrinsic::loongarch_lasx_xvst:
4433 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4434 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4435 : SDValue();
4436 case Intrinsic::loongarch_lasx_xvstelm_b:
4437 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4438 !isUInt<5>(Op.getConstantOperandVal(5)))
4439 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4440 : SDValue();
4441 case Intrinsic::loongarch_lsx_vstelm_b:
4442 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4443 !isUInt<4>(Op.getConstantOperandVal(5)))
4444 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4445 : SDValue();
4446 case Intrinsic::loongarch_lasx_xvstelm_h:
4447 return (!isShiftedInt<8, 1>(
4448 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4449 !isUInt<4>(Op.getConstantOperandVal(5)))
4451 Op, "argument out of range or not a multiple of 2", DAG)
4452 : SDValue();
4453 case Intrinsic::loongarch_lsx_vstelm_h:
4454 return (!isShiftedInt<8, 1>(
4455 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4456 !isUInt<3>(Op.getConstantOperandVal(5)))
4458 Op, "argument out of range or not a multiple of 2", DAG)
4459 : SDValue();
4460 case Intrinsic::loongarch_lasx_xvstelm_w:
4461 return (!isShiftedInt<8, 2>(
4462 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4463 !isUInt<3>(Op.getConstantOperandVal(5)))
4465 Op, "argument out of range or not a multiple of 4", DAG)
4466 : SDValue();
4467 case Intrinsic::loongarch_lsx_vstelm_w:
4468 return (!isShiftedInt<8, 2>(
4469 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4470 !isUInt<2>(Op.getConstantOperandVal(5)))
4472 Op, "argument out of range or not a multiple of 4", DAG)
4473 : SDValue();
4474 case Intrinsic::loongarch_lasx_xvstelm_d:
4475 return (!isShiftedInt<8, 3>(
4476 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4477 !isUInt<2>(Op.getConstantOperandVal(5)))
4479 Op, "argument out of range or not a multiple of 8", DAG)
4480 : SDValue();
4481 case Intrinsic::loongarch_lsx_vstelm_d:
4482 return (!isShiftedInt<8, 3>(
4483 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4484 !isUInt<1>(Op.getConstantOperandVal(5)))
4486 Op, "argument out of range or not a multiple of 8", DAG)
4487 : SDValue();
4488 }
4489}
4490
4491SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4492 SelectionDAG &DAG) const {
4493 SDLoc DL(Op);
4494 SDValue Lo = Op.getOperand(0);
4495 SDValue Hi = Op.getOperand(1);
4496 SDValue Shamt = Op.getOperand(2);
4497 EVT VT = Lo.getValueType();
4498
4499 // if Shamt-GRLen < 0: // Shamt < GRLen
4500 // Lo = Lo << Shamt
4501 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4502 // else:
4503 // Lo = 0
4504 // Hi = Lo << (Shamt-GRLen)
4505
4506 SDValue Zero = DAG.getConstant(0, DL, VT);
4507 SDValue One = DAG.getConstant(1, DL, VT);
4508 SDValue MinusGRLen =
4509 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4510 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4511 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4512 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4513
4514 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4515 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4516 SDValue ShiftRightLo =
4517 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4518 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4519 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4520 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4521
4522 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4523
4524 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4525 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4526
4527 SDValue Parts[2] = {Lo, Hi};
4528 return DAG.getMergeValues(Parts, DL);
4529}
4530
4531SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4532 SelectionDAG &DAG,
4533 bool IsSRA) const {
4534 SDLoc DL(Op);
4535 SDValue Lo = Op.getOperand(0);
4536 SDValue Hi = Op.getOperand(1);
4537 SDValue Shamt = Op.getOperand(2);
4538 EVT VT = Lo.getValueType();
4539
4540 // SRA expansion:
4541 // if Shamt-GRLen < 0: // Shamt < GRLen
4542 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4543 // Hi = Hi >>s Shamt
4544 // else:
4545 // Lo = Hi >>s (Shamt-GRLen);
4546 // Hi = Hi >>s (GRLen-1)
4547 //
4548 // SRL expansion:
4549 // if Shamt-GRLen < 0: // Shamt < GRLen
4550 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4551 // Hi = Hi >>u Shamt
4552 // else:
4553 // Lo = Hi >>u (Shamt-GRLen);
4554 // Hi = 0;
4555
4556 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4557
4558 SDValue Zero = DAG.getConstant(0, DL, VT);
4559 SDValue One = DAG.getConstant(1, DL, VT);
4560 SDValue MinusGRLen =
4561 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4562 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4563 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4564 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4565
4566 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4567 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4568 SDValue ShiftLeftHi =
4569 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4570 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4571 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4572 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4573 SDValue HiFalse =
4574 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4575
4576 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4577
4578 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4579 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4580
4581 SDValue Parts[2] = {Lo, Hi};
4582 return DAG.getMergeValues(Parts, DL);
4583}
4584
4585// Returns the opcode of the target-specific SDNode that implements the 32-bit
4586// form of the given Opcode.
4587static unsigned getLoongArchWOpcode(unsigned Opcode) {
4588 switch (Opcode) {
4589 default:
4590 llvm_unreachable("Unexpected opcode");
4591 case ISD::SDIV:
4592 return LoongArchISD::DIV_W;
4593 case ISD::UDIV:
4594 return LoongArchISD::DIV_WU;
4595 case ISD::SREM:
4596 return LoongArchISD::MOD_W;
4597 case ISD::UREM:
4598 return LoongArchISD::MOD_WU;
4599 case ISD::SHL:
4600 return LoongArchISD::SLL_W;
4601 case ISD::SRA:
4602 return LoongArchISD::SRA_W;
4603 case ISD::SRL:
4604 return LoongArchISD::SRL_W;
4605 case ISD::ROTL:
4606 case ISD::ROTR:
4607 return LoongArchISD::ROTR_W;
4608 case ISD::CTTZ:
4609 return LoongArchISD::CTZ_W;
4610 case ISD::CTLZ:
4611 return LoongArchISD::CLZ_W;
4612 }
4613}
4614
4615// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4616// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4617// otherwise be promoted to i64, making it difficult to select the
4618// SLL_W/.../*W later one because the fact the operation was originally of
4619// type i8/i16/i32 is lost.
4621 unsigned ExtOpc = ISD::ANY_EXTEND) {
4622 SDLoc DL(N);
4623 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4624 SDValue NewOp0, NewRes;
4625
4626 switch (NumOp) {
4627 default:
4628 llvm_unreachable("Unexpected NumOp");
4629 case 1: {
4630 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4631 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4632 break;
4633 }
4634 case 2: {
4635 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4636 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4637 if (N->getOpcode() == ISD::ROTL) {
4638 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4639 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4640 }
4641 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4642 break;
4643 }
4644 // TODO:Handle more NumOp.
4645 }
4646
4647 // ReplaceNodeResults requires we maintain the same type for the return
4648 // value.
4649 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4650}
4651
4652// Converts the given 32-bit operation to a i64 operation with signed extension
4653// semantic to reduce the signed extension instructions.
4655 SDLoc DL(N);
4656 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4657 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4658 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4659 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4660 DAG.getValueType(MVT::i32));
4661 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4662}
4663
4664// Helper function that emits error message for intrinsics with/without chain
4665// and return a UNDEF or and the chain as the results.
4668 StringRef ErrorMsg, bool WithChain = true) {
4669 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4670 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4671 if (!WithChain)
4672 return;
4673 Results.push_back(N->getOperand(0));
4674}
4675
4676template <unsigned N>
4677static void
4679 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4680 unsigned ResOp) {
4681 const StringRef ErrorMsgOOR = "argument out of range";
4682 unsigned Imm = Node->getConstantOperandVal(2);
4683 if (!isUInt<N>(Imm)) {
4685 /*WithChain=*/false);
4686 return;
4687 }
4688 SDLoc DL(Node);
4689 SDValue Vec = Node->getOperand(1);
4690
4691 SDValue PickElt =
4692 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4693 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4695 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4696 PickElt.getValue(0)));
4697}
4698
4701 SelectionDAG &DAG,
4702 const LoongArchSubtarget &Subtarget,
4703 unsigned ResOp) {
4704 SDLoc DL(N);
4705 SDValue Vec = N->getOperand(1);
4706
4707 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4708 Results.push_back(
4709 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4710}
4711
4712static void
4714 SelectionDAG &DAG,
4715 const LoongArchSubtarget &Subtarget) {
4716 switch (N->getConstantOperandVal(0)) {
4717 default:
4718 llvm_unreachable("Unexpected Intrinsic.");
4719 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4720 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4721 LoongArchISD::VPICK_SEXT_ELT);
4722 break;
4723 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4724 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4725 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4726 LoongArchISD::VPICK_SEXT_ELT);
4727 break;
4728 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4729 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4730 LoongArchISD::VPICK_SEXT_ELT);
4731 break;
4732 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4733 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4734 LoongArchISD::VPICK_ZEXT_ELT);
4735 break;
4736 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4737 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4738 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4739 LoongArchISD::VPICK_ZEXT_ELT);
4740 break;
4741 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4742 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4743 LoongArchISD::VPICK_ZEXT_ELT);
4744 break;
4745 case Intrinsic::loongarch_lsx_bz_b:
4746 case Intrinsic::loongarch_lsx_bz_h:
4747 case Intrinsic::loongarch_lsx_bz_w:
4748 case Intrinsic::loongarch_lsx_bz_d:
4749 case Intrinsic::loongarch_lasx_xbz_b:
4750 case Intrinsic::loongarch_lasx_xbz_h:
4751 case Intrinsic::loongarch_lasx_xbz_w:
4752 case Intrinsic::loongarch_lasx_xbz_d:
4753 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4754 LoongArchISD::VALL_ZERO);
4755 break;
4756 case Intrinsic::loongarch_lsx_bz_v:
4757 case Intrinsic::loongarch_lasx_xbz_v:
4758 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4759 LoongArchISD::VANY_ZERO);
4760 break;
4761 case Intrinsic::loongarch_lsx_bnz_b:
4762 case Intrinsic::loongarch_lsx_bnz_h:
4763 case Intrinsic::loongarch_lsx_bnz_w:
4764 case Intrinsic::loongarch_lsx_bnz_d:
4765 case Intrinsic::loongarch_lasx_xbnz_b:
4766 case Intrinsic::loongarch_lasx_xbnz_h:
4767 case Intrinsic::loongarch_lasx_xbnz_w:
4768 case Intrinsic::loongarch_lasx_xbnz_d:
4769 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4770 LoongArchISD::VALL_NONZERO);
4771 break;
4772 case Intrinsic::loongarch_lsx_bnz_v:
4773 case Intrinsic::loongarch_lasx_xbnz_v:
4774 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4775 LoongArchISD::VANY_NONZERO);
4776 break;
4777 }
4778}
4779
4782 SelectionDAG &DAG) {
4783 assert(N->getValueType(0) == MVT::i128 &&
4784 "AtomicCmpSwap on types less than 128 should be legal");
4785 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4786
4787 unsigned Opcode;
4788 switch (MemOp->getMergedOrdering()) {
4792 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4793 break;
4796 Opcode = LoongArch::PseudoCmpXchg128;
4797 break;
4798 default:
4799 llvm_unreachable("Unexpected ordering!");
4800 }
4801
4802 SDLoc DL(N);
4803 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4804 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4805 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4806 NewVal.first, NewVal.second, N->getOperand(0)};
4807
4808 SDNode *CmpSwap = DAG.getMachineNode(
4809 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4810 Ops);
4811 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4812 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4813 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4814 Results.push_back(SDValue(CmpSwap, 3));
4815}
4816
4819 SDLoc DL(N);
4820 EVT VT = N->getValueType(0);
4821 switch (N->getOpcode()) {
4822 default:
4823 llvm_unreachable("Don't know how to legalize this operation");
4824 case ISD::ADD:
4825 case ISD::SUB:
4826 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4827 "Unexpected custom legalisation");
4828 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4829 break;
4830 case ISD::SDIV:
4831 case ISD::UDIV:
4832 case ISD::SREM:
4833 case ISD::UREM:
4834 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4835 "Unexpected custom legalisation");
4836 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4837 Subtarget.hasDiv32() && VT == MVT::i32
4839 : ISD::SIGN_EXTEND));
4840 break;
4841 case ISD::SHL:
4842 case ISD::SRA:
4843 case ISD::SRL:
4844 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4845 "Unexpected custom legalisation");
4846 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4847 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4848 break;
4849 }
4850 break;
4851 case ISD::ROTL:
4852 case ISD::ROTR:
4853 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4854 "Unexpected custom legalisation");
4855 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4856 break;
4857 case ISD::FP_TO_SINT: {
4858 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4859 "Unexpected custom legalisation");
4860 SDValue Src = N->getOperand(0);
4861 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4862 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4864 if (!isTypeLegal(Src.getValueType()))
4865 return;
4866 if (Src.getValueType() == MVT::f16)
4867 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4868 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4869 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4870 return;
4871 }
4872 // If the FP type needs to be softened, emit a library call using the 'si'
4873 // version. If we left it to default legalization we'd end up with 'di'.
4874 RTLIB::Libcall LC;
4875 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4876 MakeLibCallOptions CallOptions;
4877 EVT OpVT = Src.getValueType();
4878 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4879 SDValue Chain = SDValue();
4880 SDValue Result;
4881 std::tie(Result, Chain) =
4882 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4883 Results.push_back(Result);
4884 break;
4885 }
4886 case ISD::BITCAST: {
4887 SDValue Src = N->getOperand(0);
4888 EVT SrcVT = Src.getValueType();
4889 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4890 Subtarget.hasBasicF()) {
4891 SDValue Dst =
4892 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4893 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4894 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4895 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4896 DAG.getVTList(MVT::i32, MVT::i32), Src);
4897 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4898 NewReg.getValue(0), NewReg.getValue(1));
4899 Results.push_back(RetReg);
4900 }
4901 break;
4902 }
4903 case ISD::FP_TO_UINT: {
4904 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4905 "Unexpected custom legalisation");
4906 auto &TLI = DAG.getTargetLoweringInfo();
4907 SDValue Tmp1, Tmp2;
4908 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4909 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4910 break;
4911 }
4912 case ISD::BSWAP: {
4913 SDValue Src = N->getOperand(0);
4914 assert((VT == MVT::i16 || VT == MVT::i32) &&
4915 "Unexpected custom legalization");
4916 MVT GRLenVT = Subtarget.getGRLenVT();
4917 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4918 SDValue Tmp;
4919 switch (VT.getSizeInBits()) {
4920 default:
4921 llvm_unreachable("Unexpected operand width");
4922 case 16:
4923 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4924 break;
4925 case 32:
4926 // Only LA64 will get to here due to the size mismatch between VT and
4927 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4928 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4929 break;
4930 }
4931 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4932 break;
4933 }
4934 case ISD::BITREVERSE: {
4935 SDValue Src = N->getOperand(0);
4936 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4937 "Unexpected custom legalization");
4938 MVT GRLenVT = Subtarget.getGRLenVT();
4939 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4940 SDValue Tmp;
4941 switch (VT.getSizeInBits()) {
4942 default:
4943 llvm_unreachable("Unexpected operand width");
4944 case 8:
4945 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4946 break;
4947 case 32:
4948 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4949 break;
4950 }
4951 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4952 break;
4953 }
4954 case ISD::CTLZ:
4955 case ISD::CTTZ: {
4956 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4957 "Unexpected custom legalisation");
4958 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4959 break;
4960 }
4962 SDValue Chain = N->getOperand(0);
4963 SDValue Op2 = N->getOperand(2);
4964 MVT GRLenVT = Subtarget.getGRLenVT();
4965 const StringRef ErrorMsgOOR = "argument out of range";
4966 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4967 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4968
4969 switch (N->getConstantOperandVal(1)) {
4970 default:
4971 llvm_unreachable("Unexpected Intrinsic.");
4972 case Intrinsic::loongarch_movfcsr2gr: {
4973 if (!Subtarget.hasBasicF()) {
4974 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4975 return;
4976 }
4977 unsigned Imm = Op2->getAsZExtVal();
4978 if (!isUInt<2>(Imm)) {
4979 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4980 return;
4981 }
4982 SDValue MOVFCSR2GRResults = DAG.getNode(
4983 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4984 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4985 Results.push_back(
4986 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4987 Results.push_back(MOVFCSR2GRResults.getValue(1));
4988 break;
4989 }
4990#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4991 case Intrinsic::loongarch_##NAME: { \
4992 SDValue NODE = DAG.getNode( \
4993 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4994 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4995 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4996 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4997 Results.push_back(NODE.getValue(1)); \
4998 break; \
4999 }
5000 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
5001 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
5002 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
5003 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
5004 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
5005 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
5006#undef CRC_CASE_EXT_BINARYOP
5007
5008#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
5009 case Intrinsic::loongarch_##NAME: { \
5010 SDValue NODE = DAG.getNode( \
5011 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5012 {Chain, Op2, \
5013 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5014 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5015 Results.push_back(NODE.getValue(1)); \
5016 break; \
5017 }
5018 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5019 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5020#undef CRC_CASE_EXT_UNARYOP
5021#define CSR_CASE(ID) \
5022 case Intrinsic::loongarch_##ID: { \
5023 if (!Subtarget.is64Bit()) \
5024 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5025 break; \
5026 }
5027 CSR_CASE(csrrd_d);
5028 CSR_CASE(csrwr_d);
5029 CSR_CASE(csrxchg_d);
5030 CSR_CASE(iocsrrd_d);
5031#undef CSR_CASE
5032 case Intrinsic::loongarch_csrrd_w: {
5033 unsigned Imm = Op2->getAsZExtVal();
5034 if (!isUInt<14>(Imm)) {
5035 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5036 return;
5037 }
5038 SDValue CSRRDResults =
5039 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5040 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5041 Results.push_back(
5042 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5043 Results.push_back(CSRRDResults.getValue(1));
5044 break;
5045 }
5046 case Intrinsic::loongarch_csrwr_w: {
5047 unsigned Imm = N->getConstantOperandVal(3);
5048 if (!isUInt<14>(Imm)) {
5049 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5050 return;
5051 }
5052 SDValue CSRWRResults =
5053 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5054 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5055 DAG.getConstant(Imm, DL, GRLenVT)});
5056 Results.push_back(
5057 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5058 Results.push_back(CSRWRResults.getValue(1));
5059 break;
5060 }
5061 case Intrinsic::loongarch_csrxchg_w: {
5062 unsigned Imm = N->getConstantOperandVal(4);
5063 if (!isUInt<14>(Imm)) {
5064 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5065 return;
5066 }
5067 SDValue CSRXCHGResults = DAG.getNode(
5068 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5069 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5070 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5071 DAG.getConstant(Imm, DL, GRLenVT)});
5072 Results.push_back(
5073 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5074 Results.push_back(CSRXCHGResults.getValue(1));
5075 break;
5076 }
5077#define IOCSRRD_CASE(NAME, NODE) \
5078 case Intrinsic::loongarch_##NAME: { \
5079 SDValue IOCSRRDResults = \
5080 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5081 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5082 Results.push_back( \
5083 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5084 Results.push_back(IOCSRRDResults.getValue(1)); \
5085 break; \
5086 }
5087 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5088 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5089 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5090#undef IOCSRRD_CASE
5091 case Intrinsic::loongarch_cpucfg: {
5092 SDValue CPUCFGResults =
5093 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5094 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5095 Results.push_back(
5096 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5097 Results.push_back(CPUCFGResults.getValue(1));
5098 break;
5099 }
5100 case Intrinsic::loongarch_lddir_d: {
5101 if (!Subtarget.is64Bit()) {
5102 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5103 return;
5104 }
5105 break;
5106 }
5107 }
5108 break;
5109 }
5110 case ISD::READ_REGISTER: {
5111 if (Subtarget.is64Bit())
5112 DAG.getContext()->emitError(
5113 "On LA64, only 64-bit registers can be read.");
5114 else
5115 DAG.getContext()->emitError(
5116 "On LA32, only 32-bit registers can be read.");
5117 Results.push_back(DAG.getUNDEF(VT));
5118 Results.push_back(N->getOperand(0));
5119 break;
5120 }
5122 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5123 break;
5124 }
5125 case ISD::LROUND: {
5126 SDValue Op0 = N->getOperand(0);
5127 EVT OpVT = Op0.getValueType();
5128 RTLIB::Libcall LC =
5129 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5130 MakeLibCallOptions CallOptions;
5131 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5132 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5133 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5134 Results.push_back(Result);
5135 break;
5136 }
5137 case ISD::ATOMIC_CMP_SWAP: {
5139 break;
5140 }
5141 case ISD::TRUNCATE: {
5142 MVT VT = N->getSimpleValueType(0);
5143 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5144 return;
5145
5146 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5147 SDValue In = N->getOperand(0);
5148 EVT InVT = In.getValueType();
5149 EVT InEltVT = InVT.getVectorElementType();
5150 EVT EltVT = VT.getVectorElementType();
5151 unsigned MinElts = VT.getVectorNumElements();
5152 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5153 unsigned InBits = InVT.getSizeInBits();
5154
5155 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5156 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5157 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5158 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5159 for (unsigned I = 0; I < MinElts; ++I)
5160 TruncMask[I] = Scale * I;
5161
5162 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5163 MVT SVT = In.getSimpleValueType().getScalarType();
5164 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5165 SDValue WidenIn =
5166 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5167 DAG.getVectorIdxConstant(0, DL));
5168 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5169 "Illegal vector type in truncation");
5170 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5171 Results.push_back(
5172 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5173 return;
5174 }
5175 }
5176
5177 break;
5178 }
5179 }
5180}
5181
5182/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5184 SelectionDAG &DAG) {
5185 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5186
5187 MVT VT = N->getSimpleValueType(0);
5188 if (!VT.is128BitVector() && !VT.is256BitVector())
5189 return SDValue();
5190
5191 SDValue X, Y;
5192 SDValue N0 = N->getOperand(0);
5193 SDValue N1 = N->getOperand(1);
5194
5195 if (SDValue Not = isNOT(N0, DAG)) {
5196 X = Not;
5197 Y = N1;
5198 } else if (SDValue Not = isNOT(N1, DAG)) {
5199 X = Not;
5200 Y = N0;
5201 } else
5202 return SDValue();
5203
5204 X = DAG.getBitcast(VT, X);
5205 Y = DAG.getBitcast(VT, Y);
5206 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5207}
5208
5211 const LoongArchSubtarget &Subtarget) {
5212 if (DCI.isBeforeLegalizeOps())
5213 return SDValue();
5214
5215 SDValue FirstOperand = N->getOperand(0);
5216 SDValue SecondOperand = N->getOperand(1);
5217 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5218 EVT ValTy = N->getValueType(0);
5219 SDLoc DL(N);
5220 uint64_t lsb, msb;
5221 unsigned SMIdx, SMLen;
5222 ConstantSDNode *CN;
5223 SDValue NewOperand;
5224 MVT GRLenVT = Subtarget.getGRLenVT();
5225
5226 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5227 return R;
5228
5229 // BSTRPICK requires the 32S feature.
5230 if (!Subtarget.has32S())
5231 return SDValue();
5232
5233 // Op's second operand must be a shifted mask.
5234 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5235 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5236 return SDValue();
5237
5238 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5239 // Pattern match BSTRPICK.
5240 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5241 // => BSTRPICK $dst, $src, msb, lsb
5242 // where msb = lsb + len - 1
5243
5244 // The second operand of the shift must be an immediate.
5245 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5246 return SDValue();
5247
5248 lsb = CN->getZExtValue();
5249
5250 // Return if the shifted mask does not start at bit 0 or the sum of its
5251 // length and lsb exceeds the word's size.
5252 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5253 return SDValue();
5254
5255 NewOperand = FirstOperand.getOperand(0);
5256 } else {
5257 // Pattern match BSTRPICK.
5258 // $dst = and $src, (2**len- 1) , if len > 12
5259 // => BSTRPICK $dst, $src, msb, lsb
5260 // where lsb = 0 and msb = len - 1
5261
5262 // If the mask is <= 0xfff, andi can be used instead.
5263 if (CN->getZExtValue() <= 0xfff)
5264 return SDValue();
5265
5266 // Return if the MSB exceeds.
5267 if (SMIdx + SMLen > ValTy.getSizeInBits())
5268 return SDValue();
5269
5270 if (SMIdx > 0) {
5271 // Omit if the constant has more than 2 uses. This a conservative
5272 // decision. Whether it is a win depends on the HW microarchitecture.
5273 // However it should always be better for 1 and 2 uses.
5274 if (CN->use_size() > 2)
5275 return SDValue();
5276 // Return if the constant can be composed by a single LU12I.W.
5277 if ((CN->getZExtValue() & 0xfff) == 0)
5278 return SDValue();
5279 // Return if the constand can be composed by a single ADDI with
5280 // the zero register.
5281 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5282 return SDValue();
5283 }
5284
5285 lsb = SMIdx;
5286 NewOperand = FirstOperand;
5287 }
5288
5289 msb = lsb + SMLen - 1;
5290 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5291 DAG.getConstant(msb, DL, GRLenVT),
5292 DAG.getConstant(lsb, DL, GRLenVT));
5293 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5294 return NR0;
5295 // Try to optimize to
5296 // bstrpick $Rd, $Rs, msb, lsb
5297 // slli $Rd, $Rd, lsb
5298 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5299 DAG.getConstant(lsb, DL, GRLenVT));
5300}
5301
5304 const LoongArchSubtarget &Subtarget) {
5305 // BSTRPICK requires the 32S feature.
5306 if (!Subtarget.has32S())
5307 return SDValue();
5308
5309 if (DCI.isBeforeLegalizeOps())
5310 return SDValue();
5311
5312 // $dst = srl (and $src, Mask), Shamt
5313 // =>
5314 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5315 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5316 //
5317
5318 SDValue FirstOperand = N->getOperand(0);
5319 ConstantSDNode *CN;
5320 EVT ValTy = N->getValueType(0);
5321 SDLoc DL(N);
5322 MVT GRLenVT = Subtarget.getGRLenVT();
5323 unsigned MaskIdx, MaskLen;
5324 uint64_t Shamt;
5325
5326 // The first operand must be an AND and the second operand of the AND must be
5327 // a shifted mask.
5328 if (FirstOperand.getOpcode() != ISD::AND ||
5329 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5330 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5331 return SDValue();
5332
5333 // The second operand (shift amount) must be an immediate.
5334 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5335 return SDValue();
5336
5337 Shamt = CN->getZExtValue();
5338 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5339 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5340 FirstOperand->getOperand(0),
5341 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5342 DAG.getConstant(Shamt, DL, GRLenVT));
5343
5344 return SDValue();
5345}
5346
5347// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5348// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5349static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5350 unsigned Depth) {
5351 // Limit recursion.
5353 return false;
5354 switch (Src.getOpcode()) {
5355 case ISD::SETCC:
5356 case ISD::TRUNCATE:
5357 return Src.getOperand(0).getValueSizeInBits() == Size;
5358 case ISD::FREEZE:
5359 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5360 case ISD::AND:
5361 case ISD::XOR:
5362 case ISD::OR:
5363 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5364 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5365 case ISD::SELECT:
5366 case ISD::VSELECT:
5367 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5368 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5369 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5370 case ISD::BUILD_VECTOR:
5371 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5372 ISD::isBuildVectorAllOnes(Src.getNode());
5373 }
5374 return false;
5375}
5376
5377// Helper to push sign extension of vXi1 SETCC result through bitops.
5379 SDValue Src, const SDLoc &DL) {
5380 switch (Src.getOpcode()) {
5381 case ISD::SETCC:
5382 case ISD::FREEZE:
5383 case ISD::TRUNCATE:
5384 case ISD::BUILD_VECTOR:
5385 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5386 case ISD::AND:
5387 case ISD::XOR:
5388 case ISD::OR:
5389 return DAG.getNode(
5390 Src.getOpcode(), DL, SExtVT,
5391 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5392 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5393 case ISD::SELECT:
5394 case ISD::VSELECT:
5395 return DAG.getSelect(
5396 DL, SExtVT, Src.getOperand(0),
5397 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5398 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5399 }
5400 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5401}
5402
5403static SDValue
5406 const LoongArchSubtarget &Subtarget) {
5407 SDLoc DL(N);
5408 EVT VT = N->getValueType(0);
5409 SDValue Src = N->getOperand(0);
5410 EVT SrcVT = Src.getValueType();
5411
5412 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5413 return SDValue();
5414
5415 bool UseLASX;
5416 unsigned Opc = ISD::DELETED_NODE;
5417 EVT CmpVT = Src.getOperand(0).getValueType();
5418 EVT EltVT = CmpVT.getVectorElementType();
5419
5420 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5421 UseLASX = false;
5422 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5423 CmpVT.getSizeInBits() == 256)
5424 UseLASX = true;
5425 else
5426 return SDValue();
5427
5428 SDValue SrcN1 = Src.getOperand(1);
5429 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5430 default:
5431 break;
5432 case ISD::SETEQ:
5433 // x == 0 => not (vmsknez.b x)
5434 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5435 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5436 break;
5437 case ISD::SETGT:
5438 // x > -1 => vmskgez.b x
5439 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5440 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5441 break;
5442 case ISD::SETGE:
5443 // x >= 0 => vmskgez.b x
5444 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5445 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5446 break;
5447 case ISD::SETLT:
5448 // x < 0 => vmskltz.{b,h,w,d} x
5449 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5450 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5451 EltVT == MVT::i64))
5452 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5453 break;
5454 case ISD::SETLE:
5455 // x <= -1 => vmskltz.{b,h,w,d} x
5456 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5457 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5458 EltVT == MVT::i64))
5459 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5460 break;
5461 case ISD::SETNE:
5462 // x != 0 => vmsknez.b x
5463 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5464 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5465 break;
5466 }
5467
5468 if (Opc == ISD::DELETED_NODE)
5469 return SDValue();
5470
5471 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5473 V = DAG.getZExtOrTrunc(V, DL, T);
5474 return DAG.getBitcast(VT, V);
5475}
5476
5479 const LoongArchSubtarget &Subtarget) {
5480 SDLoc DL(N);
5481 EVT VT = N->getValueType(0);
5482 SDValue Src = N->getOperand(0);
5483 EVT SrcVT = Src.getValueType();
5484 MVT GRLenVT = Subtarget.getGRLenVT();
5485
5486 if (!DCI.isBeforeLegalizeOps())
5487 return SDValue();
5488
5489 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5490 return SDValue();
5491
5492 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5493 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5494 if (Res)
5495 return Res;
5496
5497 // Generate vXi1 using [X]VMSKLTZ
5498 MVT SExtVT;
5499 unsigned Opc;
5500 bool UseLASX = false;
5501 bool PropagateSExt = false;
5502
5503 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5504 EVT CmpVT = Src.getOperand(0).getValueType();
5505 if (CmpVT.getSizeInBits() > 256)
5506 return SDValue();
5507 }
5508
5509 switch (SrcVT.getSimpleVT().SimpleTy) {
5510 default:
5511 return SDValue();
5512 case MVT::v2i1:
5513 SExtVT = MVT::v2i64;
5514 break;
5515 case MVT::v4i1:
5516 SExtVT = MVT::v4i32;
5517 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5518 SExtVT = MVT::v4i64;
5519 UseLASX = true;
5520 PropagateSExt = true;
5521 }
5522 break;
5523 case MVT::v8i1:
5524 SExtVT = MVT::v8i16;
5525 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5526 SExtVT = MVT::v8i32;
5527 UseLASX = true;
5528 PropagateSExt = true;
5529 }
5530 break;
5531 case MVT::v16i1:
5532 SExtVT = MVT::v16i8;
5533 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5534 SExtVT = MVT::v16i16;
5535 UseLASX = true;
5536 PropagateSExt = true;
5537 }
5538 break;
5539 case MVT::v32i1:
5540 SExtVT = MVT::v32i8;
5541 UseLASX = true;
5542 break;
5543 };
5544 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5545 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5546
5547 SDValue V;
5548 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5549 if (Src.getSimpleValueType() == MVT::v32i8) {
5550 SDValue Lo, Hi;
5551 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5552 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5553 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5554 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5555 DAG.getShiftAmountConstant(16, GRLenVT, DL));
5556 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5557 } else if (UseLASX) {
5558 return SDValue();
5559 }
5560 }
5561
5562 if (!V) {
5563 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5564 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5565 }
5566
5568 V = DAG.getZExtOrTrunc(V, DL, T);
5569 return DAG.getBitcast(VT, V);
5570}
5571
5574 const LoongArchSubtarget &Subtarget) {
5575 MVT GRLenVT = Subtarget.getGRLenVT();
5576 EVT ValTy = N->getValueType(0);
5577 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5578 ConstantSDNode *CN0, *CN1;
5579 SDLoc DL(N);
5580 unsigned ValBits = ValTy.getSizeInBits();
5581 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5582 unsigned Shamt;
5583 bool SwapAndRetried = false;
5584
5585 // BSTRPICK requires the 32S feature.
5586 if (!Subtarget.has32S())
5587 return SDValue();
5588
5589 if (DCI.isBeforeLegalizeOps())
5590 return SDValue();
5591
5592 if (ValBits != 32 && ValBits != 64)
5593 return SDValue();
5594
5595Retry:
5596 // 1st pattern to match BSTRINS:
5597 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5598 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5599 // =>
5600 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5601 if (N0.getOpcode() == ISD::AND &&
5602 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5603 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5604 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5605 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5606 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5607 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5608 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5609 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5610 (MaskIdx0 + MaskLen0 <= ValBits)) {
5611 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5612 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5613 N1.getOperand(0).getOperand(0),
5614 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5615 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5616 }
5617
5618 // 2nd pattern to match BSTRINS:
5619 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5620 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5621 // =>
5622 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5623 if (N0.getOpcode() == ISD::AND &&
5624 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5625 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5626 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5627 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5628 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5629 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5630 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5631 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5632 (MaskIdx0 + MaskLen0 <= ValBits)) {
5633 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5634 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5635 N1.getOperand(0).getOperand(0),
5636 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5637 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5638 }
5639
5640 // 3rd pattern to match BSTRINS:
5641 // R = or (and X, mask0), (and Y, mask1)
5642 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5643 // =>
5644 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5645 // where msb = lsb + size - 1
5646 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5647 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5648 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5649 (MaskIdx0 + MaskLen0 <= 64) &&
5650 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5651 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5652 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5653 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5654 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5655 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5656 DAG.getConstant(ValBits == 32
5657 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5658 : (MaskIdx0 + MaskLen0 - 1),
5659 DL, GRLenVT),
5660 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5661 }
5662
5663 // 4th pattern to match BSTRINS:
5664 // R = or (and X, mask), (shl Y, shamt)
5665 // where mask = (2**shamt - 1)
5666 // =>
5667 // R = BSTRINS X, Y, ValBits - 1, shamt
5668 // where ValBits = 32 or 64
5669 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5670 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5671 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5672 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5673 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5674 (MaskIdx0 + MaskLen0 <= ValBits)) {
5675 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5676 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5677 N1.getOperand(0),
5678 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5679 DAG.getConstant(Shamt, DL, GRLenVT));
5680 }
5681
5682 // 5th pattern to match BSTRINS:
5683 // R = or (and X, mask), const
5684 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5685 // =>
5686 // R = BSTRINS X, (const >> lsb), msb, lsb
5687 // where msb = lsb + size - 1
5688 if (N0.getOpcode() == ISD::AND &&
5689 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5690 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5691 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5692 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5693 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5694 return DAG.getNode(
5695 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5696 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5697 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5698 : (MaskIdx0 + MaskLen0 - 1),
5699 DL, GRLenVT),
5700 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5701 }
5702
5703 // 6th pattern.
5704 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5705 // by the incoming bits are known to be zero.
5706 // =>
5707 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5708 //
5709 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5710 // pattern is more common than the 1st. So we put the 1st before the 6th in
5711 // order to match as many nodes as possible.
5712 ConstantSDNode *CNMask, *CNShamt;
5713 unsigned MaskIdx, MaskLen;
5714 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5715 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5716 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5717 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5718 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5719 Shamt = CNShamt->getZExtValue();
5720 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5721 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5722 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5723 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5724 N1.getOperand(0).getOperand(0),
5725 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5726 DAG.getConstant(Shamt, DL, GRLenVT));
5727 }
5728 }
5729
5730 // 7th pattern.
5731 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5732 // overwritten by the incoming bits are known to be zero.
5733 // =>
5734 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5735 //
5736 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5737 // before the 7th in order to match as many nodes as possible.
5738 if (N1.getOpcode() == ISD::AND &&
5739 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5740 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5741 N1.getOperand(0).getOpcode() == ISD::SHL &&
5742 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5743 CNShamt->getZExtValue() == MaskIdx) {
5744 APInt ShMask(ValBits, CNMask->getZExtValue());
5745 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5746 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5747 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5748 N1.getOperand(0).getOperand(0),
5749 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5750 DAG.getConstant(MaskIdx, DL, GRLenVT));
5751 }
5752 }
5753
5754 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5755 if (!SwapAndRetried) {
5756 std::swap(N0, N1);
5757 SwapAndRetried = true;
5758 goto Retry;
5759 }
5760
5761 SwapAndRetried = false;
5762Retry2:
5763 // 8th pattern.
5764 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5765 // the incoming bits are known to be zero.
5766 // =>
5767 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5768 //
5769 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5770 // we put it here in order to match as many nodes as possible or generate less
5771 // instructions.
5772 if (N1.getOpcode() == ISD::AND &&
5773 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5774 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5775 APInt ShMask(ValBits, CNMask->getZExtValue());
5776 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5777 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5778 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5779 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5780 N1->getOperand(0),
5781 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5782 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5783 DAG.getConstant(MaskIdx, DL, GRLenVT));
5784 }
5785 }
5786 // Swap N0/N1 and retry.
5787 if (!SwapAndRetried) {
5788 std::swap(N0, N1);
5789 SwapAndRetried = true;
5790 goto Retry2;
5791 }
5792
5793 return SDValue();
5794}
5795
5796static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5797 ExtType = ISD::NON_EXTLOAD;
5798
5799 switch (V.getNode()->getOpcode()) {
5800 case ISD::LOAD: {
5801 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5802 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5803 (LoadNode->getMemoryVT() == MVT::i16)) {
5804 ExtType = LoadNode->getExtensionType();
5805 return true;
5806 }
5807 return false;
5808 }
5809 case ISD::AssertSext: {
5810 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5811 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5812 ExtType = ISD::SEXTLOAD;
5813 return true;
5814 }
5815 return false;
5816 }
5817 case ISD::AssertZext: {
5818 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5819 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5820 ExtType = ISD::ZEXTLOAD;
5821 return true;
5822 }
5823 return false;
5824 }
5825 default:
5826 return false;
5827 }
5828
5829 return false;
5830}
5831
5832// Eliminate redundant truncation and zero-extension nodes.
5833// * Case 1:
5834// +------------+ +------------+ +------------+
5835// | Input1 | | Input2 | | CC |
5836// +------------+ +------------+ +------------+
5837// | | |
5838// V V +----+
5839// +------------+ +------------+ |
5840// | TRUNCATE | | TRUNCATE | |
5841// +------------+ +------------+ |
5842// | | |
5843// V V |
5844// +------------+ +------------+ |
5845// | ZERO_EXT | | ZERO_EXT | |
5846// +------------+ +------------+ |
5847// | | |
5848// | +-------------+ |
5849// V V | |
5850// +----------------+ | |
5851// | AND | | |
5852// +----------------+ | |
5853// | | |
5854// +---------------+ | |
5855// | | |
5856// V V V
5857// +-------------+
5858// | CMP |
5859// +-------------+
5860// * Case 2:
5861// +------------+ +------------+ +-------------+ +------------+ +------------+
5862// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5863// +------------+ +------------+ +-------------+ +------------+ +------------+
5864// | | | | |
5865// V | | | |
5866// +------------+ | | | |
5867// | XOR |<---------------------+ | |
5868// +------------+ | | |
5869// | | | |
5870// V V +---------------+ |
5871// +------------+ +------------+ | |
5872// | TRUNCATE | | TRUNCATE | | +-------------------------+
5873// +------------+ +------------+ | |
5874// | | | |
5875// V V | |
5876// +------------+ +------------+ | |
5877// | ZERO_EXT | | ZERO_EXT | | |
5878// +------------+ +------------+ | |
5879// | | | |
5880// V V | |
5881// +----------------+ | |
5882// | AND | | |
5883// +----------------+ | |
5884// | | |
5885// +---------------+ | |
5886// | | |
5887// V V V
5888// +-------------+
5889// | CMP |
5890// +-------------+
5893 const LoongArchSubtarget &Subtarget) {
5894 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5895
5896 SDNode *AndNode = N->getOperand(0).getNode();
5897 if (AndNode->getOpcode() != ISD::AND)
5898 return SDValue();
5899
5900 SDValue AndInputValue2 = AndNode->getOperand(1);
5901 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5902 return SDValue();
5903
5904 SDValue CmpInputValue = N->getOperand(1);
5905 SDValue AndInputValue1 = AndNode->getOperand(0);
5906 if (AndInputValue1.getOpcode() == ISD::XOR) {
5907 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5908 return SDValue();
5909 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5910 if (!CN || !CN->isAllOnes())
5911 return SDValue();
5912 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5913 if (!CN || !CN->isZero())
5914 return SDValue();
5915 AndInputValue1 = AndInputValue1.getOperand(0);
5916 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5917 return SDValue();
5918 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5919 if (AndInputValue2 != CmpInputValue)
5920 return SDValue();
5921 } else {
5922 return SDValue();
5923 }
5924
5925 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5926 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5927 return SDValue();
5928
5929 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5930 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5931 return SDValue();
5932
5933 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5934 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5935 ISD::LoadExtType ExtType1;
5936 ISD::LoadExtType ExtType2;
5937
5938 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5939 !checkValueWidth(TruncInputValue2, ExtType2))
5940 return SDValue();
5941
5942 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5943 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5944 return SDValue();
5945
5946 if ((ExtType2 != ISD::ZEXTLOAD) &&
5947 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5948 return SDValue();
5949
5950 // These truncation and zero-extension nodes are not necessary, remove them.
5951 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5952 TruncInputValue1, TruncInputValue2);
5953 SDValue NewSetCC =
5954 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5955 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5956 return SDValue(N, 0);
5957}
5958
5959// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5962 const LoongArchSubtarget &Subtarget) {
5963 if (DCI.isBeforeLegalizeOps())
5964 return SDValue();
5965
5966 SDValue Src = N->getOperand(0);
5967 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5968 return SDValue();
5969
5970 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5971 Src.getOperand(0));
5972}
5973
5974// Perform common combines for BR_CC and SELECT_CC conditions.
5975static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5976 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5977 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5978
5979 // As far as arithmetic right shift always saves the sign,
5980 // shift can be omitted.
5981 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5982 // setge (sra X, N), 0 -> setge X, 0
5983 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5984 LHS.getOpcode() == ISD::SRA) {
5985 LHS = LHS.getOperand(0);
5986 return true;
5987 }
5988
5989 if (!ISD::isIntEqualitySetCC(CCVal))
5990 return false;
5991
5992 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5993 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5994 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5995 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5996 // If we're looking for eq 0 instead of ne 0, we need to invert the
5997 // condition.
5998 bool Invert = CCVal == ISD::SETEQ;
5999 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6000 if (Invert)
6001 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6002
6003 RHS = LHS.getOperand(1);
6004 LHS = LHS.getOperand(0);
6005 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
6006
6007 CC = DAG.getCondCode(CCVal);
6008 return true;
6009 }
6010
6011 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6012 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6013 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6014 SDValue LHS0 = LHS.getOperand(0);
6015 if (LHS0.getOpcode() == ISD::AND &&
6016 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6017 uint64_t Mask = LHS0.getConstantOperandVal(1);
6018 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6019 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6020 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6021 CC = DAG.getCondCode(CCVal);
6022
6023 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6024 LHS = LHS0.getOperand(0);
6025 if (ShAmt != 0)
6026 LHS =
6027 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6028 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6029 return true;
6030 }
6031 }
6032 }
6033
6034 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6035 // This can occur when legalizing some floating point comparisons.
6036 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6037 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6038 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6039 CC = DAG.getCondCode(CCVal);
6040 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6041 return true;
6042 }
6043
6044 return false;
6045}
6046
6049 const LoongArchSubtarget &Subtarget) {
6050 SDValue LHS = N->getOperand(1);
6051 SDValue RHS = N->getOperand(2);
6052 SDValue CC = N->getOperand(3);
6053 SDLoc DL(N);
6054
6055 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6056 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6057 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6058
6059 return SDValue();
6060}
6061
6064 const LoongArchSubtarget &Subtarget) {
6065 // Transform
6066 SDValue LHS = N->getOperand(0);
6067 SDValue RHS = N->getOperand(1);
6068 SDValue CC = N->getOperand(2);
6069 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6070 SDValue TrueV = N->getOperand(3);
6071 SDValue FalseV = N->getOperand(4);
6072 SDLoc DL(N);
6073 EVT VT = N->getValueType(0);
6074
6075 // If the True and False values are the same, we don't need a select_cc.
6076 if (TrueV == FalseV)
6077 return TrueV;
6078
6079 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6080 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6081 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6083 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6084 if (CCVal == ISD::CondCode::SETGE)
6085 std::swap(TrueV, FalseV);
6086
6087 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6088 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6089 // Only handle simm12, if it is not in this range, it can be considered as
6090 // register.
6091 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6092 isInt<12>(TrueSImm - FalseSImm)) {
6093 SDValue SRA =
6094 DAG.getNode(ISD::SRA, DL, VT, LHS,
6095 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6096 SDValue AND =
6097 DAG.getNode(ISD::AND, DL, VT, SRA,
6098 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6099 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6100 }
6101
6102 if (CCVal == ISD::CondCode::SETGE)
6103 std::swap(TrueV, FalseV);
6104 }
6105
6106 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6107 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6108 {LHS, RHS, CC, TrueV, FalseV});
6109
6110 return SDValue();
6111}
6112
6113template <unsigned N>
6115 SelectionDAG &DAG,
6116 const LoongArchSubtarget &Subtarget,
6117 bool IsSigned = false) {
6118 SDLoc DL(Node);
6119 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6120 // Check the ImmArg.
6121 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6122 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6123 DAG.getContext()->emitError(Node->getOperationName(0) +
6124 ": argument out of range.");
6125 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6126 }
6127 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6128}
6129
6130template <unsigned N>
6131static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6132 SelectionDAG &DAG, bool IsSigned = false) {
6133 SDLoc DL(Node);
6134 EVT ResTy = Node->getValueType(0);
6135 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6136
6137 // Check the ImmArg.
6138 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6139 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6140 DAG.getContext()->emitError(Node->getOperationName(0) +
6141 ": argument out of range.");
6142 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6143 }
6144 return DAG.getConstant(
6146 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6147 DL, ResTy);
6148}
6149
6151 SDLoc DL(Node);
6152 EVT ResTy = Node->getValueType(0);
6153 SDValue Vec = Node->getOperand(2);
6154 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6155 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6156}
6157
6159 SDLoc DL(Node);
6160 EVT ResTy = Node->getValueType(0);
6161 SDValue One = DAG.getConstant(1, DL, ResTy);
6162 SDValue Bit =
6163 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6164
6165 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6166 DAG.getNOT(DL, Bit, ResTy));
6167}
6168
6169template <unsigned N>
6171 SDLoc DL(Node);
6172 EVT ResTy = Node->getValueType(0);
6173 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6174 // Check the unsigned ImmArg.
6175 if (!isUInt<N>(CImm->getZExtValue())) {
6176 DAG.getContext()->emitError(Node->getOperationName(0) +
6177 ": argument out of range.");
6178 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6179 }
6180
6181 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6182 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6183
6184 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6185}
6186
6187template <unsigned N>
6189 SDLoc DL(Node);
6190 EVT ResTy = Node->getValueType(0);
6191 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6192 // Check the unsigned ImmArg.
6193 if (!isUInt<N>(CImm->getZExtValue())) {
6194 DAG.getContext()->emitError(Node->getOperationName(0) +
6195 ": argument out of range.");
6196 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6197 }
6198
6199 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6200 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6201 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6202}
6203
6204template <unsigned N>
6206 SDLoc DL(Node);
6207 EVT ResTy = Node->getValueType(0);
6208 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6209 // Check the unsigned ImmArg.
6210 if (!isUInt<N>(CImm->getZExtValue())) {
6211 DAG.getContext()->emitError(Node->getOperationName(0) +
6212 ": argument out of range.");
6213 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6214 }
6215
6216 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6217 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6218 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6219}
6220
6221template <unsigned W>
6223 unsigned ResOp) {
6224 unsigned Imm = N->getConstantOperandVal(2);
6225 if (!isUInt<W>(Imm)) {
6226 const StringRef ErrorMsg = "argument out of range";
6227 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6228 return DAG.getUNDEF(N->getValueType(0));
6229 }
6230 SDLoc DL(N);
6231 SDValue Vec = N->getOperand(1);
6232 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6234 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6235}
6236
6237static SDValue
6240 const LoongArchSubtarget &Subtarget) {
6241 SDLoc DL(N);
6242 switch (N->getConstantOperandVal(0)) {
6243 default:
6244 break;
6245 case Intrinsic::loongarch_lsx_vadd_b:
6246 case Intrinsic::loongarch_lsx_vadd_h:
6247 case Intrinsic::loongarch_lsx_vadd_w:
6248 case Intrinsic::loongarch_lsx_vadd_d:
6249 case Intrinsic::loongarch_lasx_xvadd_b:
6250 case Intrinsic::loongarch_lasx_xvadd_h:
6251 case Intrinsic::loongarch_lasx_xvadd_w:
6252 case Intrinsic::loongarch_lasx_xvadd_d:
6253 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6254 N->getOperand(2));
6255 case Intrinsic::loongarch_lsx_vaddi_bu:
6256 case Intrinsic::loongarch_lsx_vaddi_hu:
6257 case Intrinsic::loongarch_lsx_vaddi_wu:
6258 case Intrinsic::loongarch_lsx_vaddi_du:
6259 case Intrinsic::loongarch_lasx_xvaddi_bu:
6260 case Intrinsic::loongarch_lasx_xvaddi_hu:
6261 case Intrinsic::loongarch_lasx_xvaddi_wu:
6262 case Intrinsic::loongarch_lasx_xvaddi_du:
6263 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6264 lowerVectorSplatImm<5>(N, 2, DAG));
6265 case Intrinsic::loongarch_lsx_vsub_b:
6266 case Intrinsic::loongarch_lsx_vsub_h:
6267 case Intrinsic::loongarch_lsx_vsub_w:
6268 case Intrinsic::loongarch_lsx_vsub_d:
6269 case Intrinsic::loongarch_lasx_xvsub_b:
6270 case Intrinsic::loongarch_lasx_xvsub_h:
6271 case Intrinsic::loongarch_lasx_xvsub_w:
6272 case Intrinsic::loongarch_lasx_xvsub_d:
6273 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6274 N->getOperand(2));
6275 case Intrinsic::loongarch_lsx_vsubi_bu:
6276 case Intrinsic::loongarch_lsx_vsubi_hu:
6277 case Intrinsic::loongarch_lsx_vsubi_wu:
6278 case Intrinsic::loongarch_lsx_vsubi_du:
6279 case Intrinsic::loongarch_lasx_xvsubi_bu:
6280 case Intrinsic::loongarch_lasx_xvsubi_hu:
6281 case Intrinsic::loongarch_lasx_xvsubi_wu:
6282 case Intrinsic::loongarch_lasx_xvsubi_du:
6283 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6284 lowerVectorSplatImm<5>(N, 2, DAG));
6285 case Intrinsic::loongarch_lsx_vneg_b:
6286 case Intrinsic::loongarch_lsx_vneg_h:
6287 case Intrinsic::loongarch_lsx_vneg_w:
6288 case Intrinsic::loongarch_lsx_vneg_d:
6289 case Intrinsic::loongarch_lasx_xvneg_b:
6290 case Intrinsic::loongarch_lasx_xvneg_h:
6291 case Intrinsic::loongarch_lasx_xvneg_w:
6292 case Intrinsic::loongarch_lasx_xvneg_d:
6293 return DAG.getNode(
6294 ISD::SUB, DL, N->getValueType(0),
6295 DAG.getConstant(
6296 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6297 /*isSigned=*/true),
6298 SDLoc(N), N->getValueType(0)),
6299 N->getOperand(1));
6300 case Intrinsic::loongarch_lsx_vmax_b:
6301 case Intrinsic::loongarch_lsx_vmax_h:
6302 case Intrinsic::loongarch_lsx_vmax_w:
6303 case Intrinsic::loongarch_lsx_vmax_d:
6304 case Intrinsic::loongarch_lasx_xvmax_b:
6305 case Intrinsic::loongarch_lasx_xvmax_h:
6306 case Intrinsic::loongarch_lasx_xvmax_w:
6307 case Intrinsic::loongarch_lasx_xvmax_d:
6308 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6309 N->getOperand(2));
6310 case Intrinsic::loongarch_lsx_vmax_bu:
6311 case Intrinsic::loongarch_lsx_vmax_hu:
6312 case Intrinsic::loongarch_lsx_vmax_wu:
6313 case Intrinsic::loongarch_lsx_vmax_du:
6314 case Intrinsic::loongarch_lasx_xvmax_bu:
6315 case Intrinsic::loongarch_lasx_xvmax_hu:
6316 case Intrinsic::loongarch_lasx_xvmax_wu:
6317 case Intrinsic::loongarch_lasx_xvmax_du:
6318 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6319 N->getOperand(2));
6320 case Intrinsic::loongarch_lsx_vmaxi_b:
6321 case Intrinsic::loongarch_lsx_vmaxi_h:
6322 case Intrinsic::loongarch_lsx_vmaxi_w:
6323 case Intrinsic::loongarch_lsx_vmaxi_d:
6324 case Intrinsic::loongarch_lasx_xvmaxi_b:
6325 case Intrinsic::loongarch_lasx_xvmaxi_h:
6326 case Intrinsic::loongarch_lasx_xvmaxi_w:
6327 case Intrinsic::loongarch_lasx_xvmaxi_d:
6328 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6329 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6330 case Intrinsic::loongarch_lsx_vmaxi_bu:
6331 case Intrinsic::loongarch_lsx_vmaxi_hu:
6332 case Intrinsic::loongarch_lsx_vmaxi_wu:
6333 case Intrinsic::loongarch_lsx_vmaxi_du:
6334 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6335 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6336 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6337 case Intrinsic::loongarch_lasx_xvmaxi_du:
6338 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6339 lowerVectorSplatImm<5>(N, 2, DAG));
6340 case Intrinsic::loongarch_lsx_vmin_b:
6341 case Intrinsic::loongarch_lsx_vmin_h:
6342 case Intrinsic::loongarch_lsx_vmin_w:
6343 case Intrinsic::loongarch_lsx_vmin_d:
6344 case Intrinsic::loongarch_lasx_xvmin_b:
6345 case Intrinsic::loongarch_lasx_xvmin_h:
6346 case Intrinsic::loongarch_lasx_xvmin_w:
6347 case Intrinsic::loongarch_lasx_xvmin_d:
6348 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6349 N->getOperand(2));
6350 case Intrinsic::loongarch_lsx_vmin_bu:
6351 case Intrinsic::loongarch_lsx_vmin_hu:
6352 case Intrinsic::loongarch_lsx_vmin_wu:
6353 case Intrinsic::loongarch_lsx_vmin_du:
6354 case Intrinsic::loongarch_lasx_xvmin_bu:
6355 case Intrinsic::loongarch_lasx_xvmin_hu:
6356 case Intrinsic::loongarch_lasx_xvmin_wu:
6357 case Intrinsic::loongarch_lasx_xvmin_du:
6358 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6359 N->getOperand(2));
6360 case Intrinsic::loongarch_lsx_vmini_b:
6361 case Intrinsic::loongarch_lsx_vmini_h:
6362 case Intrinsic::loongarch_lsx_vmini_w:
6363 case Intrinsic::loongarch_lsx_vmini_d:
6364 case Intrinsic::loongarch_lasx_xvmini_b:
6365 case Intrinsic::loongarch_lasx_xvmini_h:
6366 case Intrinsic::loongarch_lasx_xvmini_w:
6367 case Intrinsic::loongarch_lasx_xvmini_d:
6368 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6369 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6370 case Intrinsic::loongarch_lsx_vmini_bu:
6371 case Intrinsic::loongarch_lsx_vmini_hu:
6372 case Intrinsic::loongarch_lsx_vmini_wu:
6373 case Intrinsic::loongarch_lsx_vmini_du:
6374 case Intrinsic::loongarch_lasx_xvmini_bu:
6375 case Intrinsic::loongarch_lasx_xvmini_hu:
6376 case Intrinsic::loongarch_lasx_xvmini_wu:
6377 case Intrinsic::loongarch_lasx_xvmini_du:
6378 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6379 lowerVectorSplatImm<5>(N, 2, DAG));
6380 case Intrinsic::loongarch_lsx_vmul_b:
6381 case Intrinsic::loongarch_lsx_vmul_h:
6382 case Intrinsic::loongarch_lsx_vmul_w:
6383 case Intrinsic::loongarch_lsx_vmul_d:
6384 case Intrinsic::loongarch_lasx_xvmul_b:
6385 case Intrinsic::loongarch_lasx_xvmul_h:
6386 case Intrinsic::loongarch_lasx_xvmul_w:
6387 case Intrinsic::loongarch_lasx_xvmul_d:
6388 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6389 N->getOperand(2));
6390 case Intrinsic::loongarch_lsx_vmadd_b:
6391 case Intrinsic::loongarch_lsx_vmadd_h:
6392 case Intrinsic::loongarch_lsx_vmadd_w:
6393 case Intrinsic::loongarch_lsx_vmadd_d:
6394 case Intrinsic::loongarch_lasx_xvmadd_b:
6395 case Intrinsic::loongarch_lasx_xvmadd_h:
6396 case Intrinsic::loongarch_lasx_xvmadd_w:
6397 case Intrinsic::loongarch_lasx_xvmadd_d: {
6398 EVT ResTy = N->getValueType(0);
6399 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6400 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6401 N->getOperand(3)));
6402 }
6403 case Intrinsic::loongarch_lsx_vmsub_b:
6404 case Intrinsic::loongarch_lsx_vmsub_h:
6405 case Intrinsic::loongarch_lsx_vmsub_w:
6406 case Intrinsic::loongarch_lsx_vmsub_d:
6407 case Intrinsic::loongarch_lasx_xvmsub_b:
6408 case Intrinsic::loongarch_lasx_xvmsub_h:
6409 case Intrinsic::loongarch_lasx_xvmsub_w:
6410 case Intrinsic::loongarch_lasx_xvmsub_d: {
6411 EVT ResTy = N->getValueType(0);
6412 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6413 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6414 N->getOperand(3)));
6415 }
6416 case Intrinsic::loongarch_lsx_vdiv_b:
6417 case Intrinsic::loongarch_lsx_vdiv_h:
6418 case Intrinsic::loongarch_lsx_vdiv_w:
6419 case Intrinsic::loongarch_lsx_vdiv_d:
6420 case Intrinsic::loongarch_lasx_xvdiv_b:
6421 case Intrinsic::loongarch_lasx_xvdiv_h:
6422 case Intrinsic::loongarch_lasx_xvdiv_w:
6423 case Intrinsic::loongarch_lasx_xvdiv_d:
6424 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6425 N->getOperand(2));
6426 case Intrinsic::loongarch_lsx_vdiv_bu:
6427 case Intrinsic::loongarch_lsx_vdiv_hu:
6428 case Intrinsic::loongarch_lsx_vdiv_wu:
6429 case Intrinsic::loongarch_lsx_vdiv_du:
6430 case Intrinsic::loongarch_lasx_xvdiv_bu:
6431 case Intrinsic::loongarch_lasx_xvdiv_hu:
6432 case Intrinsic::loongarch_lasx_xvdiv_wu:
6433 case Intrinsic::loongarch_lasx_xvdiv_du:
6434 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6435 N->getOperand(2));
6436 case Intrinsic::loongarch_lsx_vmod_b:
6437 case Intrinsic::loongarch_lsx_vmod_h:
6438 case Intrinsic::loongarch_lsx_vmod_w:
6439 case Intrinsic::loongarch_lsx_vmod_d:
6440 case Intrinsic::loongarch_lasx_xvmod_b:
6441 case Intrinsic::loongarch_lasx_xvmod_h:
6442 case Intrinsic::loongarch_lasx_xvmod_w:
6443 case Intrinsic::loongarch_lasx_xvmod_d:
6444 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6445 N->getOperand(2));
6446 case Intrinsic::loongarch_lsx_vmod_bu:
6447 case Intrinsic::loongarch_lsx_vmod_hu:
6448 case Intrinsic::loongarch_lsx_vmod_wu:
6449 case Intrinsic::loongarch_lsx_vmod_du:
6450 case Intrinsic::loongarch_lasx_xvmod_bu:
6451 case Intrinsic::loongarch_lasx_xvmod_hu:
6452 case Intrinsic::loongarch_lasx_xvmod_wu:
6453 case Intrinsic::loongarch_lasx_xvmod_du:
6454 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6455 N->getOperand(2));
6456 case Intrinsic::loongarch_lsx_vand_v:
6457 case Intrinsic::loongarch_lasx_xvand_v:
6458 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6459 N->getOperand(2));
6460 case Intrinsic::loongarch_lsx_vor_v:
6461 case Intrinsic::loongarch_lasx_xvor_v:
6462 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6463 N->getOperand(2));
6464 case Intrinsic::loongarch_lsx_vxor_v:
6465 case Intrinsic::loongarch_lasx_xvxor_v:
6466 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6467 N->getOperand(2));
6468 case Intrinsic::loongarch_lsx_vnor_v:
6469 case Intrinsic::loongarch_lasx_xvnor_v: {
6470 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6471 N->getOperand(2));
6472 return DAG.getNOT(DL, Res, Res->getValueType(0));
6473 }
6474 case Intrinsic::loongarch_lsx_vandi_b:
6475 case Intrinsic::loongarch_lasx_xvandi_b:
6476 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6477 lowerVectorSplatImm<8>(N, 2, DAG));
6478 case Intrinsic::loongarch_lsx_vori_b:
6479 case Intrinsic::loongarch_lasx_xvori_b:
6480 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6481 lowerVectorSplatImm<8>(N, 2, DAG));
6482 case Intrinsic::loongarch_lsx_vxori_b:
6483 case Intrinsic::loongarch_lasx_xvxori_b:
6484 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6485 lowerVectorSplatImm<8>(N, 2, DAG));
6486 case Intrinsic::loongarch_lsx_vsll_b:
6487 case Intrinsic::loongarch_lsx_vsll_h:
6488 case Intrinsic::loongarch_lsx_vsll_w:
6489 case Intrinsic::loongarch_lsx_vsll_d:
6490 case Intrinsic::loongarch_lasx_xvsll_b:
6491 case Intrinsic::loongarch_lasx_xvsll_h:
6492 case Intrinsic::loongarch_lasx_xvsll_w:
6493 case Intrinsic::loongarch_lasx_xvsll_d:
6494 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6495 truncateVecElts(N, DAG));
6496 case Intrinsic::loongarch_lsx_vslli_b:
6497 case Intrinsic::loongarch_lasx_xvslli_b:
6498 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6499 lowerVectorSplatImm<3>(N, 2, DAG));
6500 case Intrinsic::loongarch_lsx_vslli_h:
6501 case Intrinsic::loongarch_lasx_xvslli_h:
6502 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6503 lowerVectorSplatImm<4>(N, 2, DAG));
6504 case Intrinsic::loongarch_lsx_vslli_w:
6505 case Intrinsic::loongarch_lasx_xvslli_w:
6506 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6507 lowerVectorSplatImm<5>(N, 2, DAG));
6508 case Intrinsic::loongarch_lsx_vslli_d:
6509 case Intrinsic::loongarch_lasx_xvslli_d:
6510 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6511 lowerVectorSplatImm<6>(N, 2, DAG));
6512 case Intrinsic::loongarch_lsx_vsrl_b:
6513 case Intrinsic::loongarch_lsx_vsrl_h:
6514 case Intrinsic::loongarch_lsx_vsrl_w:
6515 case Intrinsic::loongarch_lsx_vsrl_d:
6516 case Intrinsic::loongarch_lasx_xvsrl_b:
6517 case Intrinsic::loongarch_lasx_xvsrl_h:
6518 case Intrinsic::loongarch_lasx_xvsrl_w:
6519 case Intrinsic::loongarch_lasx_xvsrl_d:
6520 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6521 truncateVecElts(N, DAG));
6522 case Intrinsic::loongarch_lsx_vsrli_b:
6523 case Intrinsic::loongarch_lasx_xvsrli_b:
6524 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6525 lowerVectorSplatImm<3>(N, 2, DAG));
6526 case Intrinsic::loongarch_lsx_vsrli_h:
6527 case Intrinsic::loongarch_lasx_xvsrli_h:
6528 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6529 lowerVectorSplatImm<4>(N, 2, DAG));
6530 case Intrinsic::loongarch_lsx_vsrli_w:
6531 case Intrinsic::loongarch_lasx_xvsrli_w:
6532 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6533 lowerVectorSplatImm<5>(N, 2, DAG));
6534 case Intrinsic::loongarch_lsx_vsrli_d:
6535 case Intrinsic::loongarch_lasx_xvsrli_d:
6536 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6537 lowerVectorSplatImm<6>(N, 2, DAG));
6538 case Intrinsic::loongarch_lsx_vsra_b:
6539 case Intrinsic::loongarch_lsx_vsra_h:
6540 case Intrinsic::loongarch_lsx_vsra_w:
6541 case Intrinsic::loongarch_lsx_vsra_d:
6542 case Intrinsic::loongarch_lasx_xvsra_b:
6543 case Intrinsic::loongarch_lasx_xvsra_h:
6544 case Intrinsic::loongarch_lasx_xvsra_w:
6545 case Intrinsic::loongarch_lasx_xvsra_d:
6546 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6547 truncateVecElts(N, DAG));
6548 case Intrinsic::loongarch_lsx_vsrai_b:
6549 case Intrinsic::loongarch_lasx_xvsrai_b:
6550 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6551 lowerVectorSplatImm<3>(N, 2, DAG));
6552 case Intrinsic::loongarch_lsx_vsrai_h:
6553 case Intrinsic::loongarch_lasx_xvsrai_h:
6554 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6555 lowerVectorSplatImm<4>(N, 2, DAG));
6556 case Intrinsic::loongarch_lsx_vsrai_w:
6557 case Intrinsic::loongarch_lasx_xvsrai_w:
6558 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6559 lowerVectorSplatImm<5>(N, 2, DAG));
6560 case Intrinsic::loongarch_lsx_vsrai_d:
6561 case Intrinsic::loongarch_lasx_xvsrai_d:
6562 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6563 lowerVectorSplatImm<6>(N, 2, DAG));
6564 case Intrinsic::loongarch_lsx_vclz_b:
6565 case Intrinsic::loongarch_lsx_vclz_h:
6566 case Intrinsic::loongarch_lsx_vclz_w:
6567 case Intrinsic::loongarch_lsx_vclz_d:
6568 case Intrinsic::loongarch_lasx_xvclz_b:
6569 case Intrinsic::loongarch_lasx_xvclz_h:
6570 case Intrinsic::loongarch_lasx_xvclz_w:
6571 case Intrinsic::loongarch_lasx_xvclz_d:
6572 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6573 case Intrinsic::loongarch_lsx_vpcnt_b:
6574 case Intrinsic::loongarch_lsx_vpcnt_h:
6575 case Intrinsic::loongarch_lsx_vpcnt_w:
6576 case Intrinsic::loongarch_lsx_vpcnt_d:
6577 case Intrinsic::loongarch_lasx_xvpcnt_b:
6578 case Intrinsic::loongarch_lasx_xvpcnt_h:
6579 case Intrinsic::loongarch_lasx_xvpcnt_w:
6580 case Intrinsic::loongarch_lasx_xvpcnt_d:
6581 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6582 case Intrinsic::loongarch_lsx_vbitclr_b:
6583 case Intrinsic::loongarch_lsx_vbitclr_h:
6584 case Intrinsic::loongarch_lsx_vbitclr_w:
6585 case Intrinsic::loongarch_lsx_vbitclr_d:
6586 case Intrinsic::loongarch_lasx_xvbitclr_b:
6587 case Intrinsic::loongarch_lasx_xvbitclr_h:
6588 case Intrinsic::loongarch_lasx_xvbitclr_w:
6589 case Intrinsic::loongarch_lasx_xvbitclr_d:
6590 return lowerVectorBitClear(N, DAG);
6591 case Intrinsic::loongarch_lsx_vbitclri_b:
6592 case Intrinsic::loongarch_lasx_xvbitclri_b:
6593 return lowerVectorBitClearImm<3>(N, DAG);
6594 case Intrinsic::loongarch_lsx_vbitclri_h:
6595 case Intrinsic::loongarch_lasx_xvbitclri_h:
6596 return lowerVectorBitClearImm<4>(N, DAG);
6597 case Intrinsic::loongarch_lsx_vbitclri_w:
6598 case Intrinsic::loongarch_lasx_xvbitclri_w:
6599 return lowerVectorBitClearImm<5>(N, DAG);
6600 case Intrinsic::loongarch_lsx_vbitclri_d:
6601 case Intrinsic::loongarch_lasx_xvbitclri_d:
6602 return lowerVectorBitClearImm<6>(N, DAG);
6603 case Intrinsic::loongarch_lsx_vbitset_b:
6604 case Intrinsic::loongarch_lsx_vbitset_h:
6605 case Intrinsic::loongarch_lsx_vbitset_w:
6606 case Intrinsic::loongarch_lsx_vbitset_d:
6607 case Intrinsic::loongarch_lasx_xvbitset_b:
6608 case Intrinsic::loongarch_lasx_xvbitset_h:
6609 case Intrinsic::loongarch_lasx_xvbitset_w:
6610 case Intrinsic::loongarch_lasx_xvbitset_d: {
6611 EVT VecTy = N->getValueType(0);
6612 SDValue One = DAG.getConstant(1, DL, VecTy);
6613 return DAG.getNode(
6614 ISD::OR, DL, VecTy, N->getOperand(1),
6615 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6616 }
6617 case Intrinsic::loongarch_lsx_vbitseti_b:
6618 case Intrinsic::loongarch_lasx_xvbitseti_b:
6619 return lowerVectorBitSetImm<3>(N, DAG);
6620 case Intrinsic::loongarch_lsx_vbitseti_h:
6621 case Intrinsic::loongarch_lasx_xvbitseti_h:
6622 return lowerVectorBitSetImm<4>(N, DAG);
6623 case Intrinsic::loongarch_lsx_vbitseti_w:
6624 case Intrinsic::loongarch_lasx_xvbitseti_w:
6625 return lowerVectorBitSetImm<5>(N, DAG);
6626 case Intrinsic::loongarch_lsx_vbitseti_d:
6627 case Intrinsic::loongarch_lasx_xvbitseti_d:
6628 return lowerVectorBitSetImm<6>(N, DAG);
6629 case Intrinsic::loongarch_lsx_vbitrev_b:
6630 case Intrinsic::loongarch_lsx_vbitrev_h:
6631 case Intrinsic::loongarch_lsx_vbitrev_w:
6632 case Intrinsic::loongarch_lsx_vbitrev_d:
6633 case Intrinsic::loongarch_lasx_xvbitrev_b:
6634 case Intrinsic::loongarch_lasx_xvbitrev_h:
6635 case Intrinsic::loongarch_lasx_xvbitrev_w:
6636 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6637 EVT VecTy = N->getValueType(0);
6638 SDValue One = DAG.getConstant(1, DL, VecTy);
6639 return DAG.getNode(
6640 ISD::XOR, DL, VecTy, N->getOperand(1),
6641 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6642 }
6643 case Intrinsic::loongarch_lsx_vbitrevi_b:
6644 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6645 return lowerVectorBitRevImm<3>(N, DAG);
6646 case Intrinsic::loongarch_lsx_vbitrevi_h:
6647 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6648 return lowerVectorBitRevImm<4>(N, DAG);
6649 case Intrinsic::loongarch_lsx_vbitrevi_w:
6650 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6651 return lowerVectorBitRevImm<5>(N, DAG);
6652 case Intrinsic::loongarch_lsx_vbitrevi_d:
6653 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6654 return lowerVectorBitRevImm<6>(N, DAG);
6655 case Intrinsic::loongarch_lsx_vfadd_s:
6656 case Intrinsic::loongarch_lsx_vfadd_d:
6657 case Intrinsic::loongarch_lasx_xvfadd_s:
6658 case Intrinsic::loongarch_lasx_xvfadd_d:
6659 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6660 N->getOperand(2));
6661 case Intrinsic::loongarch_lsx_vfsub_s:
6662 case Intrinsic::loongarch_lsx_vfsub_d:
6663 case Intrinsic::loongarch_lasx_xvfsub_s:
6664 case Intrinsic::loongarch_lasx_xvfsub_d:
6665 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6666 N->getOperand(2));
6667 case Intrinsic::loongarch_lsx_vfmul_s:
6668 case Intrinsic::loongarch_lsx_vfmul_d:
6669 case Intrinsic::loongarch_lasx_xvfmul_s:
6670 case Intrinsic::loongarch_lasx_xvfmul_d:
6671 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6672 N->getOperand(2));
6673 case Intrinsic::loongarch_lsx_vfdiv_s:
6674 case Intrinsic::loongarch_lsx_vfdiv_d:
6675 case Intrinsic::loongarch_lasx_xvfdiv_s:
6676 case Intrinsic::loongarch_lasx_xvfdiv_d:
6677 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6678 N->getOperand(2));
6679 case Intrinsic::loongarch_lsx_vfmadd_s:
6680 case Intrinsic::loongarch_lsx_vfmadd_d:
6681 case Intrinsic::loongarch_lasx_xvfmadd_s:
6682 case Intrinsic::loongarch_lasx_xvfmadd_d:
6683 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6684 N->getOperand(2), N->getOperand(3));
6685 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6686 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6687 N->getOperand(1), N->getOperand(2),
6688 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6689 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6690 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6691 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6692 N->getOperand(1), N->getOperand(2),
6693 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6694 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6695 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6696 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6697 N->getOperand(1), N->getOperand(2),
6698 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6699 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6700 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6701 N->getOperand(1), N->getOperand(2),
6702 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6703 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6704 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6705 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6706 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6707 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6708 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6709 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6710 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6711 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6712 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6713 N->getOperand(1)));
6714 case Intrinsic::loongarch_lsx_vreplve_b:
6715 case Intrinsic::loongarch_lsx_vreplve_h:
6716 case Intrinsic::loongarch_lsx_vreplve_w:
6717 case Intrinsic::loongarch_lsx_vreplve_d:
6718 case Intrinsic::loongarch_lasx_xvreplve_b:
6719 case Intrinsic::loongarch_lasx_xvreplve_h:
6720 case Intrinsic::loongarch_lasx_xvreplve_w:
6721 case Intrinsic::loongarch_lasx_xvreplve_d:
6722 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6723 N->getOperand(1),
6724 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6725 N->getOperand(2)));
6726 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6727 if (!Subtarget.is64Bit())
6728 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6729 break;
6730 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6731 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6732 if (!Subtarget.is64Bit())
6733 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6734 break;
6735 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6736 if (!Subtarget.is64Bit())
6737 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6738 break;
6739 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6740 if (!Subtarget.is64Bit())
6741 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6742 break;
6743 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6744 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6745 if (!Subtarget.is64Bit())
6746 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6747 break;
6748 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6749 if (!Subtarget.is64Bit())
6750 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6751 break;
6752 case Intrinsic::loongarch_lsx_bz_b:
6753 case Intrinsic::loongarch_lsx_bz_h:
6754 case Intrinsic::loongarch_lsx_bz_w:
6755 case Intrinsic::loongarch_lsx_bz_d:
6756 case Intrinsic::loongarch_lasx_xbz_b:
6757 case Intrinsic::loongarch_lasx_xbz_h:
6758 case Intrinsic::loongarch_lasx_xbz_w:
6759 case Intrinsic::loongarch_lasx_xbz_d:
6760 if (!Subtarget.is64Bit())
6761 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6762 N->getOperand(1));
6763 break;
6764 case Intrinsic::loongarch_lsx_bz_v:
6765 case Intrinsic::loongarch_lasx_xbz_v:
6766 if (!Subtarget.is64Bit())
6767 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6768 N->getOperand(1));
6769 break;
6770 case Intrinsic::loongarch_lsx_bnz_b:
6771 case Intrinsic::loongarch_lsx_bnz_h:
6772 case Intrinsic::loongarch_lsx_bnz_w:
6773 case Intrinsic::loongarch_lsx_bnz_d:
6774 case Intrinsic::loongarch_lasx_xbnz_b:
6775 case Intrinsic::loongarch_lasx_xbnz_h:
6776 case Intrinsic::loongarch_lasx_xbnz_w:
6777 case Intrinsic::loongarch_lasx_xbnz_d:
6778 if (!Subtarget.is64Bit())
6779 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6780 N->getOperand(1));
6781 break;
6782 case Intrinsic::loongarch_lsx_bnz_v:
6783 case Intrinsic::loongarch_lasx_xbnz_v:
6784 if (!Subtarget.is64Bit())
6785 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6786 N->getOperand(1));
6787 break;
6788 case Intrinsic::loongarch_lasx_concat_128_s:
6789 case Intrinsic::loongarch_lasx_concat_128_d:
6790 case Intrinsic::loongarch_lasx_concat_128:
6791 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6792 N->getOperand(1), N->getOperand(2));
6793 }
6794 return SDValue();
6795}
6796
6799 const LoongArchSubtarget &Subtarget) {
6800 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6801 // conversion is unnecessary and can be replaced with the
6802 // MOVFR2GR_S_LA64 operand.
6803 SDValue Op0 = N->getOperand(0);
6804 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6805 return Op0.getOperand(0);
6806 return SDValue();
6807}
6808
6811 const LoongArchSubtarget &Subtarget) {
6812 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6813 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6814 // operand.
6815 SDValue Op0 = N->getOperand(0);
6816 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6817 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6818 "Unexpected value type!");
6819 return Op0.getOperand(0);
6820 }
6821 return SDValue();
6822}
6823
6826 const LoongArchSubtarget &Subtarget) {
6827 MVT VT = N->getSimpleValueType(0);
6828 unsigned NumBits = VT.getScalarSizeInBits();
6829
6830 // Simplify the inputs.
6831 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6832 APInt DemandedMask(APInt::getAllOnes(NumBits));
6833 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6834 return SDValue(N, 0);
6835
6836 return SDValue();
6837}
6838
6839static SDValue
6842 const LoongArchSubtarget &Subtarget) {
6843 SDValue Op0 = N->getOperand(0);
6844 SDLoc DL(N);
6845
6846 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6847 // redundant. Instead, use BuildPairF64's operands directly.
6848 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6849 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6850
6851 if (Op0->isUndef()) {
6852 SDValue Lo = DAG.getUNDEF(MVT::i32);
6853 SDValue Hi = DAG.getUNDEF(MVT::i32);
6854 return DCI.CombineTo(N, Lo, Hi);
6855 }
6856
6857 // It's cheaper to materialise two 32-bit integers than to load a double
6858 // from the constant pool and transfer it to integer registers through the
6859 // stack.
6861 APInt V = C->getValueAPF().bitcastToAPInt();
6862 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6863 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6864 return DCI.CombineTo(N, Lo, Hi);
6865 }
6866
6867 return SDValue();
6868}
6869
6870static SDValue
6873 const LoongArchSubtarget &Subtarget) {
6874 if (!DCI.isBeforeLegalize())
6875 return SDValue();
6876
6877 MVT EltVT = N->getSimpleValueType(0);
6878 SDValue Vec = N->getOperand(0);
6879 EVT VecTy = Vec->getValueType(0);
6880 SDValue Idx = N->getOperand(1);
6881 unsigned IdxOp = Idx.getOpcode();
6882 SDLoc DL(N);
6883
6884 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6885 return SDValue();
6886
6887 // Combine:
6888 // t2 = truncate t1
6889 // t3 = {zero/sign/any}_extend t2
6890 // t4 = extract_vector_elt t0, t3
6891 // to:
6892 // t4 = extract_vector_elt t0, t1
6893 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6894 IdxOp == ISD::ANY_EXTEND) {
6895 SDValue IdxOrig = Idx.getOperand(0);
6896 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6897 return SDValue();
6898
6899 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6900 IdxOrig.getOperand(0));
6901 }
6902
6903 return SDValue();
6904}
6905
6906/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6909 const LoongArchSubtarget &Subtarget) {
6910 SDValue N0 = N->getOperand(0);
6911 SDValue N1 = N->getOperand(1);
6912 MVT VT = N->getSimpleValueType(0);
6913 SDLoc DL(N);
6914
6915 // VANDN(undef, x) -> 0
6916 // VANDN(x, undef) -> 0
6917 if (N0.isUndef() || N1.isUndef())
6918 return DAG.getConstant(0, DL, VT);
6919
6920 // VANDN(0, x) -> x
6922 return N1;
6923
6924 // VANDN(x, 0) -> 0
6926 return DAG.getConstant(0, DL, VT);
6927
6928 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6930 return DAG.getNOT(DL, N0, VT);
6931
6932 // Turn VANDN back to AND if input is inverted.
6933 if (SDValue Not = isNOT(N0, DAG))
6934 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6935
6936 // Folds for better commutativity:
6937 if (N1->hasOneUse()) {
6938 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6939 if (SDValue Not = isNOT(N1, DAG))
6940 return DAG.getNOT(
6941 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6942
6943 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6944 // -> NOT(OR(x, SplatVector(-Imm))
6945 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6946 // gain benefits.
6947 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6948 N1.getOpcode() == ISD::BUILD_VECTOR) {
6949 if (SDValue SplatValue =
6950 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6951 if (!N1->isOnlyUserOf(SplatValue.getNode()))
6952 return SDValue();
6953
6954 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6955 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6956 SDValue Not =
6957 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6958 return DAG.getNOT(
6959 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6960 VT);
6961 }
6962 }
6963 }
6964 }
6965
6966 return SDValue();
6967}
6968
6971 const LoongArchSubtarget &Subtarget) {
6972 SDLoc DL(N);
6973 EVT VT = N->getValueType(0);
6974
6975 if (VT != MVT::f32 && VT != MVT::f64)
6976 return SDValue();
6977 if (VT == MVT::f32 && !Subtarget.hasBasicF())
6978 return SDValue();
6979 if (VT == MVT::f64 && !Subtarget.hasBasicD())
6980 return SDValue();
6981
6982 // Only optimize when the source and destination types have the same width.
6983 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
6984 return SDValue();
6985
6986 SDValue Src = N->getOperand(0);
6987 // If the result of an integer load is only used by an integer-to-float
6988 // conversion, use a fp load instead. This eliminates an integer-to-float-move
6989 // (movgr2fr) instruction.
6990 if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
6991 // Do not change the width of a volatile load. This condition check is
6992 // inspired by AArch64.
6993 !cast<LoadSDNode>(Src)->isVolatile()) {
6994 LoadSDNode *LN0 = cast<LoadSDNode>(Src);
6995 SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
6996 LN0->getPointerInfo(), LN0->getAlign(),
6997 LN0->getMemOperand()->getFlags());
6998
6999 // Make sure successors of the original load stay after it by updating them
7000 // to use the new Chain.
7001 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
7002 return DAG.getNode(LoongArchISD::SITOF, SDLoc(N), VT, Load);
7003 }
7004
7005 return SDValue();
7006}
7007
7009 DAGCombinerInfo &DCI) const {
7010 SelectionDAG &DAG = DCI.DAG;
7011 switch (N->getOpcode()) {
7012 default:
7013 break;
7014 case ISD::AND:
7015 return performANDCombine(N, DAG, DCI, Subtarget);
7016 case ISD::OR:
7017 return performORCombine(N, DAG, DCI, Subtarget);
7018 case ISD::SETCC:
7019 return performSETCCCombine(N, DAG, DCI, Subtarget);
7020 case ISD::SRL:
7021 return performSRLCombine(N, DAG, DCI, Subtarget);
7022 case ISD::BITCAST:
7023 return performBITCASTCombine(N, DAG, DCI, Subtarget);
7024 case ISD::SINT_TO_FP:
7025 return performSINT_TO_FPCombine(N, DAG, DCI, Subtarget);
7026 case LoongArchISD::BITREV_W:
7027 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
7028 case LoongArchISD::BR_CC:
7029 return performBR_CCCombine(N, DAG, DCI, Subtarget);
7030 case LoongArchISD::SELECT_CC:
7031 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
7033 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
7034 case LoongArchISD::MOVGR2FR_W_LA64:
7035 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
7036 case LoongArchISD::MOVFR2GR_S_LA64:
7037 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
7038 case LoongArchISD::VMSKLTZ:
7039 case LoongArchISD::XVMSKLTZ:
7040 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
7041 case LoongArchISD::SPLIT_PAIR_F64:
7042 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
7044 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
7045 case LoongArchISD::VANDN:
7046 return performVANDNCombine(N, DAG, DCI, Subtarget);
7047 }
7048 return SDValue();
7049}
7050
7053 if (!ZeroDivCheck)
7054 return MBB;
7055
7056 // Build instructions:
7057 // MBB:
7058 // div(or mod) $dst, $dividend, $divisor
7059 // bne $divisor, $zero, SinkMBB
7060 // BreakMBB:
7061 // break 7 // BRK_DIVZERO
7062 // SinkMBB:
7063 // fallthrough
7064 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7065 MachineFunction::iterator It = ++MBB->getIterator();
7066 MachineFunction *MF = MBB->getParent();
7067 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7068 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7069 MF->insert(It, BreakMBB);
7070 MF->insert(It, SinkMBB);
7071
7072 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7073 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7074 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7075
7076 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7077 DebugLoc DL = MI.getDebugLoc();
7078 MachineOperand &Divisor = MI.getOperand(2);
7079 Register DivisorReg = Divisor.getReg();
7080
7081 // MBB:
7082 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7083 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7084 .addReg(LoongArch::R0)
7085 .addMBB(SinkMBB);
7086 MBB->addSuccessor(BreakMBB);
7087 MBB->addSuccessor(SinkMBB);
7088
7089 // BreakMBB:
7090 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7091 // definition of BRK_DIVZERO.
7092 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7093 BreakMBB->addSuccessor(SinkMBB);
7094
7095 // Clear Divisor's kill flag.
7096 Divisor.setIsKill(false);
7097
7098 return SinkMBB;
7099}
7100
7101static MachineBasicBlock *
7103 const LoongArchSubtarget &Subtarget) {
7104 unsigned CondOpc;
7105 switch (MI.getOpcode()) {
7106 default:
7107 llvm_unreachable("Unexpected opcode");
7108 case LoongArch::PseudoVBZ:
7109 CondOpc = LoongArch::VSETEQZ_V;
7110 break;
7111 case LoongArch::PseudoVBZ_B:
7112 CondOpc = LoongArch::VSETANYEQZ_B;
7113 break;
7114 case LoongArch::PseudoVBZ_H:
7115 CondOpc = LoongArch::VSETANYEQZ_H;
7116 break;
7117 case LoongArch::PseudoVBZ_W:
7118 CondOpc = LoongArch::VSETANYEQZ_W;
7119 break;
7120 case LoongArch::PseudoVBZ_D:
7121 CondOpc = LoongArch::VSETANYEQZ_D;
7122 break;
7123 case LoongArch::PseudoVBNZ:
7124 CondOpc = LoongArch::VSETNEZ_V;
7125 break;
7126 case LoongArch::PseudoVBNZ_B:
7127 CondOpc = LoongArch::VSETALLNEZ_B;
7128 break;
7129 case LoongArch::PseudoVBNZ_H:
7130 CondOpc = LoongArch::VSETALLNEZ_H;
7131 break;
7132 case LoongArch::PseudoVBNZ_W:
7133 CondOpc = LoongArch::VSETALLNEZ_W;
7134 break;
7135 case LoongArch::PseudoVBNZ_D:
7136 CondOpc = LoongArch::VSETALLNEZ_D;
7137 break;
7138 case LoongArch::PseudoXVBZ:
7139 CondOpc = LoongArch::XVSETEQZ_V;
7140 break;
7141 case LoongArch::PseudoXVBZ_B:
7142 CondOpc = LoongArch::XVSETANYEQZ_B;
7143 break;
7144 case LoongArch::PseudoXVBZ_H:
7145 CondOpc = LoongArch::XVSETANYEQZ_H;
7146 break;
7147 case LoongArch::PseudoXVBZ_W:
7148 CondOpc = LoongArch::XVSETANYEQZ_W;
7149 break;
7150 case LoongArch::PseudoXVBZ_D:
7151 CondOpc = LoongArch::XVSETANYEQZ_D;
7152 break;
7153 case LoongArch::PseudoXVBNZ:
7154 CondOpc = LoongArch::XVSETNEZ_V;
7155 break;
7156 case LoongArch::PseudoXVBNZ_B:
7157 CondOpc = LoongArch::XVSETALLNEZ_B;
7158 break;
7159 case LoongArch::PseudoXVBNZ_H:
7160 CondOpc = LoongArch::XVSETALLNEZ_H;
7161 break;
7162 case LoongArch::PseudoXVBNZ_W:
7163 CondOpc = LoongArch::XVSETALLNEZ_W;
7164 break;
7165 case LoongArch::PseudoXVBNZ_D:
7166 CondOpc = LoongArch::XVSETALLNEZ_D;
7167 break;
7168 }
7169
7170 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7171 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7172 DebugLoc DL = MI.getDebugLoc();
7175
7176 MachineFunction *F = BB->getParent();
7177 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7178 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7179 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7180
7181 F->insert(It, FalseBB);
7182 F->insert(It, TrueBB);
7183 F->insert(It, SinkBB);
7184
7185 // Transfer the remainder of MBB and its successor edges to Sink.
7186 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7188
7189 // Insert the real instruction to BB.
7190 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7191 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7192
7193 // Insert branch.
7194 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7195 BB->addSuccessor(FalseBB);
7196 BB->addSuccessor(TrueBB);
7197
7198 // FalseBB.
7199 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7200 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7201 .addReg(LoongArch::R0)
7202 .addImm(0);
7203 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7204 FalseBB->addSuccessor(SinkBB);
7205
7206 // TrueBB.
7207 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7208 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7209 .addReg(LoongArch::R0)
7210 .addImm(1);
7211 TrueBB->addSuccessor(SinkBB);
7212
7213 // SinkBB: merge the results.
7214 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7215 MI.getOperand(0).getReg())
7216 .addReg(RD1)
7217 .addMBB(FalseBB)
7218 .addReg(RD2)
7219 .addMBB(TrueBB);
7220
7221 // The pseudo instruction is gone now.
7222 MI.eraseFromParent();
7223 return SinkBB;
7224}
7225
7226static MachineBasicBlock *
7228 const LoongArchSubtarget &Subtarget) {
7229 unsigned InsOp;
7230 unsigned BroadcastOp;
7231 unsigned HalfSize;
7232 switch (MI.getOpcode()) {
7233 default:
7234 llvm_unreachable("Unexpected opcode");
7235 case LoongArch::PseudoXVINSGR2VR_B:
7236 HalfSize = 16;
7237 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7238 InsOp = LoongArch::XVEXTRINS_B;
7239 break;
7240 case LoongArch::PseudoXVINSGR2VR_H:
7241 HalfSize = 8;
7242 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7243 InsOp = LoongArch::XVEXTRINS_H;
7244 break;
7245 }
7246 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7247 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7248 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7249 DebugLoc DL = MI.getDebugLoc();
7251 // XDst = vector_insert XSrc, Elt, Idx
7252 Register XDst = MI.getOperand(0).getReg();
7253 Register XSrc = MI.getOperand(1).getReg();
7254 Register Elt = MI.getOperand(2).getReg();
7255 unsigned Idx = MI.getOperand(3).getImm();
7256
7257 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7258 Idx < HalfSize) {
7259 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7260 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7261
7262 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7263 .addReg(XSrc, 0, LoongArch::sub_128);
7264 BuildMI(*BB, MI, DL,
7265 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7266 : LoongArch::VINSGR2VR_B),
7267 ScratchSubReg2)
7268 .addReg(ScratchSubReg1)
7269 .addReg(Elt)
7270 .addImm(Idx);
7271
7272 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7273 .addImm(0)
7274 .addReg(ScratchSubReg2)
7275 .addImm(LoongArch::sub_128);
7276 } else {
7277 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7278 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7279
7280 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7281
7282 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7283 .addReg(ScratchReg1)
7284 .addReg(XSrc)
7285 .addImm(Idx >= HalfSize ? 48 : 18);
7286
7287 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7288 .addReg(XSrc)
7289 .addReg(ScratchReg2)
7290 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7291 }
7292
7293 MI.eraseFromParent();
7294 return BB;
7295}
7296
7299 const LoongArchSubtarget &Subtarget) {
7300 assert(Subtarget.hasExtLSX());
7301 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7302 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7303 DebugLoc DL = MI.getDebugLoc();
7305 Register Dst = MI.getOperand(0).getReg();
7306 Register Src = MI.getOperand(1).getReg();
7307 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7308 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7309 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7310
7311 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7312 BuildMI(*BB, MI, DL,
7313 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7314 : LoongArch::VINSGR2VR_W),
7315 ScratchReg2)
7316 .addReg(ScratchReg1)
7317 .addReg(Src)
7318 .addImm(0);
7319 BuildMI(
7320 *BB, MI, DL,
7321 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7322 ScratchReg3)
7323 .addReg(ScratchReg2);
7324 BuildMI(*BB, MI, DL,
7325 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7326 : LoongArch::VPICKVE2GR_W),
7327 Dst)
7328 .addReg(ScratchReg3)
7329 .addImm(0);
7330
7331 MI.eraseFromParent();
7332 return BB;
7333}
7334
7335static MachineBasicBlock *
7337 const LoongArchSubtarget &Subtarget) {
7338 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7339 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7340 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7342 Register Dst = MI.getOperand(0).getReg();
7343 Register Src = MI.getOperand(1).getReg();
7344 DebugLoc DL = MI.getDebugLoc();
7345 unsigned EleBits = 8;
7346 unsigned NotOpc = 0;
7347 unsigned MskOpc;
7348
7349 switch (MI.getOpcode()) {
7350 default:
7351 llvm_unreachable("Unexpected opcode");
7352 case LoongArch::PseudoVMSKLTZ_B:
7353 MskOpc = LoongArch::VMSKLTZ_B;
7354 break;
7355 case LoongArch::PseudoVMSKLTZ_H:
7356 MskOpc = LoongArch::VMSKLTZ_H;
7357 EleBits = 16;
7358 break;
7359 case LoongArch::PseudoVMSKLTZ_W:
7360 MskOpc = LoongArch::VMSKLTZ_W;
7361 EleBits = 32;
7362 break;
7363 case LoongArch::PseudoVMSKLTZ_D:
7364 MskOpc = LoongArch::VMSKLTZ_D;
7365 EleBits = 64;
7366 break;
7367 case LoongArch::PseudoVMSKGEZ_B:
7368 MskOpc = LoongArch::VMSKGEZ_B;
7369 break;
7370 case LoongArch::PseudoVMSKEQZ_B:
7371 MskOpc = LoongArch::VMSKNZ_B;
7372 NotOpc = LoongArch::VNOR_V;
7373 break;
7374 case LoongArch::PseudoVMSKNEZ_B:
7375 MskOpc = LoongArch::VMSKNZ_B;
7376 break;
7377 case LoongArch::PseudoXVMSKLTZ_B:
7378 MskOpc = LoongArch::XVMSKLTZ_B;
7379 RC = &LoongArch::LASX256RegClass;
7380 break;
7381 case LoongArch::PseudoXVMSKLTZ_H:
7382 MskOpc = LoongArch::XVMSKLTZ_H;
7383 RC = &LoongArch::LASX256RegClass;
7384 EleBits = 16;
7385 break;
7386 case LoongArch::PseudoXVMSKLTZ_W:
7387 MskOpc = LoongArch::XVMSKLTZ_W;
7388 RC = &LoongArch::LASX256RegClass;
7389 EleBits = 32;
7390 break;
7391 case LoongArch::PseudoXVMSKLTZ_D:
7392 MskOpc = LoongArch::XVMSKLTZ_D;
7393 RC = &LoongArch::LASX256RegClass;
7394 EleBits = 64;
7395 break;
7396 case LoongArch::PseudoXVMSKGEZ_B:
7397 MskOpc = LoongArch::XVMSKGEZ_B;
7398 RC = &LoongArch::LASX256RegClass;
7399 break;
7400 case LoongArch::PseudoXVMSKEQZ_B:
7401 MskOpc = LoongArch::XVMSKNZ_B;
7402 NotOpc = LoongArch::XVNOR_V;
7403 RC = &LoongArch::LASX256RegClass;
7404 break;
7405 case LoongArch::PseudoXVMSKNEZ_B:
7406 MskOpc = LoongArch::XVMSKNZ_B;
7407 RC = &LoongArch::LASX256RegClass;
7408 break;
7409 }
7410
7411 Register Msk = MRI.createVirtualRegister(RC);
7412 if (NotOpc) {
7413 Register Tmp = MRI.createVirtualRegister(RC);
7414 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7415 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7416 .addReg(Tmp, RegState::Kill)
7417 .addReg(Tmp, RegState::Kill);
7418 } else {
7419 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7420 }
7421
7422 if (TRI->getRegSizeInBits(*RC) > 128) {
7423 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7424 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7425 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7426 .addReg(Msk)
7427 .addImm(0);
7428 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7429 .addReg(Msk, RegState::Kill)
7430 .addImm(4);
7431 BuildMI(*BB, MI, DL,
7432 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7433 : LoongArch::BSTRINS_W),
7434 Dst)
7437 .addImm(256 / EleBits - 1)
7438 .addImm(128 / EleBits);
7439 } else {
7440 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7441 .addReg(Msk, RegState::Kill)
7442 .addImm(0);
7443 }
7444
7445 MI.eraseFromParent();
7446 return BB;
7447}
7448
7449static MachineBasicBlock *
7451 const LoongArchSubtarget &Subtarget) {
7452 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7453 "Unexpected instruction");
7454
7455 MachineFunction &MF = *BB->getParent();
7456 DebugLoc DL = MI.getDebugLoc();
7458 Register LoReg = MI.getOperand(0).getReg();
7459 Register HiReg = MI.getOperand(1).getReg();
7460 Register SrcReg = MI.getOperand(2).getReg();
7461
7462 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7463 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7464 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7465 MI.eraseFromParent(); // The pseudo instruction is gone now.
7466 return BB;
7467}
7468
7469static MachineBasicBlock *
7471 const LoongArchSubtarget &Subtarget) {
7472 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7473 "Unexpected instruction");
7474
7475 MachineFunction &MF = *BB->getParent();
7476 DebugLoc DL = MI.getDebugLoc();
7479 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7480 Register DstReg = MI.getOperand(0).getReg();
7481 Register LoReg = MI.getOperand(1).getReg();
7482 Register HiReg = MI.getOperand(2).getReg();
7483
7484 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7485 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7486 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7487 .addReg(TmpReg, RegState::Kill)
7488 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7489 MI.eraseFromParent(); // The pseudo instruction is gone now.
7490 return BB;
7491}
7492
7494 switch (MI.getOpcode()) {
7495 default:
7496 return false;
7497 case LoongArch::Select_GPR_Using_CC_GPR:
7498 return true;
7499 }
7500}
7501
7502static MachineBasicBlock *
7504 const LoongArchSubtarget &Subtarget) {
7505 // To "insert" Select_* instructions, we actually have to insert the triangle
7506 // control-flow pattern. The incoming instructions know the destination vreg
7507 // to set, the condition code register to branch on, the true/false values to
7508 // select between, and the condcode to use to select the appropriate branch.
7509 //
7510 // We produce the following control flow:
7511 // HeadMBB
7512 // | \
7513 // | IfFalseMBB
7514 // | /
7515 // TailMBB
7516 //
7517 // When we find a sequence of selects we attempt to optimize their emission
7518 // by sharing the control flow. Currently we only handle cases where we have
7519 // multiple selects with the exact same condition (same LHS, RHS and CC).
7520 // The selects may be interleaved with other instructions if the other
7521 // instructions meet some requirements we deem safe:
7522 // - They are not pseudo instructions.
7523 // - They are debug instructions. Otherwise,
7524 // - They do not have side-effects, do not access memory and their inputs do
7525 // not depend on the results of the select pseudo-instructions.
7526 // The TrueV/FalseV operands of the selects cannot depend on the result of
7527 // previous selects in the sequence.
7528 // These conditions could be further relaxed. See the X86 target for a
7529 // related approach and more information.
7530
7531 Register LHS = MI.getOperand(1).getReg();
7532 Register RHS;
7533 if (MI.getOperand(2).isReg())
7534 RHS = MI.getOperand(2).getReg();
7535 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7536
7537 SmallVector<MachineInstr *, 4> SelectDebugValues;
7538 SmallSet<Register, 4> SelectDests;
7539 SelectDests.insert(MI.getOperand(0).getReg());
7540
7541 MachineInstr *LastSelectPseudo = &MI;
7542 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7543 SequenceMBBI != E; ++SequenceMBBI) {
7544 if (SequenceMBBI->isDebugInstr())
7545 continue;
7546 if (isSelectPseudo(*SequenceMBBI)) {
7547 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7548 !SequenceMBBI->getOperand(2).isReg() ||
7549 SequenceMBBI->getOperand(2).getReg() != RHS ||
7550 SequenceMBBI->getOperand(3).getImm() != CC ||
7551 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7552 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7553 break;
7554 LastSelectPseudo = &*SequenceMBBI;
7555 SequenceMBBI->collectDebugValues(SelectDebugValues);
7556 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7557 continue;
7558 }
7559 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7560 SequenceMBBI->mayLoadOrStore() ||
7561 SequenceMBBI->usesCustomInsertionHook())
7562 break;
7563 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7564 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7565 }))
7566 break;
7567 }
7568
7569 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7570 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7571 DebugLoc DL = MI.getDebugLoc();
7573
7574 MachineBasicBlock *HeadMBB = BB;
7575 MachineFunction *F = BB->getParent();
7576 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7577 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7578
7579 F->insert(I, IfFalseMBB);
7580 F->insert(I, TailMBB);
7581
7582 // Set the call frame size on entry to the new basic blocks.
7583 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7584 IfFalseMBB->setCallFrameSize(CallFrameSize);
7585 TailMBB->setCallFrameSize(CallFrameSize);
7586
7587 // Transfer debug instructions associated with the selects to TailMBB.
7588 for (MachineInstr *DebugInstr : SelectDebugValues) {
7589 TailMBB->push_back(DebugInstr->removeFromParent());
7590 }
7591
7592 // Move all instructions after the sequence to TailMBB.
7593 TailMBB->splice(TailMBB->end(), HeadMBB,
7594 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7595 // Update machine-CFG edges by transferring all successors of the current
7596 // block to the new block which will contain the Phi nodes for the selects.
7597 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7598 // Set the successors for HeadMBB.
7599 HeadMBB->addSuccessor(IfFalseMBB);
7600 HeadMBB->addSuccessor(TailMBB);
7601
7602 // Insert appropriate branch.
7603 if (MI.getOperand(2).isImm())
7604 BuildMI(HeadMBB, DL, TII.get(CC))
7605 .addReg(LHS)
7606 .addImm(MI.getOperand(2).getImm())
7607 .addMBB(TailMBB);
7608 else
7609 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7610
7611 // IfFalseMBB just falls through to TailMBB.
7612 IfFalseMBB->addSuccessor(TailMBB);
7613
7614 // Create PHIs for all of the select pseudo-instructions.
7615 auto SelectMBBI = MI.getIterator();
7616 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7617 auto InsertionPoint = TailMBB->begin();
7618 while (SelectMBBI != SelectEnd) {
7619 auto Next = std::next(SelectMBBI);
7620 if (isSelectPseudo(*SelectMBBI)) {
7621 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7622 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7623 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7624 .addReg(SelectMBBI->getOperand(4).getReg())
7625 .addMBB(HeadMBB)
7626 .addReg(SelectMBBI->getOperand(5).getReg())
7627 .addMBB(IfFalseMBB);
7628 SelectMBBI->eraseFromParent();
7629 }
7630 SelectMBBI = Next;
7631 }
7632
7633 F->getProperties().resetNoPHIs();
7634 return TailMBB;
7635}
7636
7637MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7638 MachineInstr &MI, MachineBasicBlock *BB) const {
7639 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7640 DebugLoc DL = MI.getDebugLoc();
7641
7642 switch (MI.getOpcode()) {
7643 default:
7644 llvm_unreachable("Unexpected instr type to insert");
7645 case LoongArch::DIV_W:
7646 case LoongArch::DIV_WU:
7647 case LoongArch::MOD_W:
7648 case LoongArch::MOD_WU:
7649 case LoongArch::DIV_D:
7650 case LoongArch::DIV_DU:
7651 case LoongArch::MOD_D:
7652 case LoongArch::MOD_DU:
7653 return insertDivByZeroTrap(MI, BB);
7654 break;
7655 case LoongArch::WRFCSR: {
7656 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7657 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7658 .addReg(MI.getOperand(1).getReg());
7659 MI.eraseFromParent();
7660 return BB;
7661 }
7662 case LoongArch::RDFCSR: {
7663 MachineInstr *ReadFCSR =
7664 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7665 MI.getOperand(0).getReg())
7666 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7667 ReadFCSR->getOperand(1).setIsUndef();
7668 MI.eraseFromParent();
7669 return BB;
7670 }
7671 case LoongArch::Select_GPR_Using_CC_GPR:
7672 return emitSelectPseudo(MI, BB, Subtarget);
7673 case LoongArch::BuildPairF64Pseudo:
7674 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7675 case LoongArch::SplitPairF64Pseudo:
7676 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7677 case LoongArch::PseudoVBZ:
7678 case LoongArch::PseudoVBZ_B:
7679 case LoongArch::PseudoVBZ_H:
7680 case LoongArch::PseudoVBZ_W:
7681 case LoongArch::PseudoVBZ_D:
7682 case LoongArch::PseudoVBNZ:
7683 case LoongArch::PseudoVBNZ_B:
7684 case LoongArch::PseudoVBNZ_H:
7685 case LoongArch::PseudoVBNZ_W:
7686 case LoongArch::PseudoVBNZ_D:
7687 case LoongArch::PseudoXVBZ:
7688 case LoongArch::PseudoXVBZ_B:
7689 case LoongArch::PseudoXVBZ_H:
7690 case LoongArch::PseudoXVBZ_W:
7691 case LoongArch::PseudoXVBZ_D:
7692 case LoongArch::PseudoXVBNZ:
7693 case LoongArch::PseudoXVBNZ_B:
7694 case LoongArch::PseudoXVBNZ_H:
7695 case LoongArch::PseudoXVBNZ_W:
7696 case LoongArch::PseudoXVBNZ_D:
7697 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7698 case LoongArch::PseudoXVINSGR2VR_B:
7699 case LoongArch::PseudoXVINSGR2VR_H:
7700 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7701 case LoongArch::PseudoCTPOP:
7702 return emitPseudoCTPOP(MI, BB, Subtarget);
7703 case LoongArch::PseudoVMSKLTZ_B:
7704 case LoongArch::PseudoVMSKLTZ_H:
7705 case LoongArch::PseudoVMSKLTZ_W:
7706 case LoongArch::PseudoVMSKLTZ_D:
7707 case LoongArch::PseudoVMSKGEZ_B:
7708 case LoongArch::PseudoVMSKEQZ_B:
7709 case LoongArch::PseudoVMSKNEZ_B:
7710 case LoongArch::PseudoXVMSKLTZ_B:
7711 case LoongArch::PseudoXVMSKLTZ_H:
7712 case LoongArch::PseudoXVMSKLTZ_W:
7713 case LoongArch::PseudoXVMSKLTZ_D:
7714 case LoongArch::PseudoXVMSKGEZ_B:
7715 case LoongArch::PseudoXVMSKEQZ_B:
7716 case LoongArch::PseudoXVMSKNEZ_B:
7717 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7718 case TargetOpcode::STATEPOINT:
7719 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7720 // while bl call instruction (where statepoint will be lowered at the
7721 // end) has implicit def. This def is early-clobber as it will be set at
7722 // the moment of the call and earlier than any use is read.
7723 // Add this implicit dead def here as a workaround.
7724 MI.addOperand(*MI.getMF(),
7726 LoongArch::R1, /*isDef*/ true,
7727 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7728 /*isUndef*/ false, /*isEarlyClobber*/ true));
7729 if (!Subtarget.is64Bit())
7730 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7731 return emitPatchPoint(MI, BB);
7732 }
7733}
7734
7736 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7737 unsigned *Fast) const {
7738 if (!Subtarget.hasUAL())
7739 return false;
7740
7741 // TODO: set reasonable speed number.
7742 if (Fast)
7743 *Fast = 1;
7744 return true;
7745}
7746
7747//===----------------------------------------------------------------------===//
7748// Calling Convention Implementation
7749//===----------------------------------------------------------------------===//
7750
7751// Eight general-purpose registers a0-a7 used for passing integer arguments,
7752// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7753// fixed-point arguments, and floating-point arguments when no FPR is available
7754// or with soft float ABI.
7755const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7756 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7757 LoongArch::R10, LoongArch::R11};
7758// Eight floating-point registers fa0-fa7 used for passing floating-point
7759// arguments, and fa0-fa1 are also used to return values.
7760const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7761 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7762 LoongArch::F6, LoongArch::F7};
7763// FPR32 and FPR64 alias each other.
7765 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7766 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7767
7768const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7769 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7770 LoongArch::VR6, LoongArch::VR7};
7771
7772const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7773 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7774 LoongArch::XR6, LoongArch::XR7};
7775
7776// Pass a 2*GRLen argument that has been split into two GRLen values through
7777// registers or the stack as necessary.
7778static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7779 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7780 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7781 ISD::ArgFlagsTy ArgFlags2) {
7782 unsigned GRLenInBytes = GRLen / 8;
7783 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7784 // At least one half can be passed via register.
7785 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7786 VA1.getLocVT(), CCValAssign::Full));
7787 } else {
7788 // Both halves must be passed on the stack, with proper alignment.
7789 Align StackAlign =
7790 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7791 State.addLoc(
7793 State.AllocateStack(GRLenInBytes, StackAlign),
7794 VA1.getLocVT(), CCValAssign::Full));
7795 State.addLoc(CCValAssign::getMem(
7796 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7797 LocVT2, CCValAssign::Full));
7798 return false;
7799 }
7800 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7801 // The second half can also be passed via register.
7802 State.addLoc(
7803 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7804 } else {
7805 // The second half is passed via the stack, without additional alignment.
7806 State.addLoc(CCValAssign::getMem(
7807 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7808 LocVT2, CCValAssign::Full));
7809 }
7810 return false;
7811}
7812
7813// Implements the LoongArch calling convention. Returns true upon failure.
7815 unsigned ValNo, MVT ValVT,
7816 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7817 CCState &State, bool IsRet, Type *OrigTy) {
7818 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7819 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7820 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7821 MVT LocVT = ValVT;
7822
7823 // Any return value split into more than two values can't be returned
7824 // directly.
7825 if (IsRet && ValNo > 1)
7826 return true;
7827
7828 // If passing a variadic argument, or if no FPR is available.
7829 bool UseGPRForFloat = true;
7830
7831 switch (ABI) {
7832 default:
7833 llvm_unreachable("Unexpected ABI");
7834 break;
7839 UseGPRForFloat = ArgFlags.isVarArg();
7840 break;
7843 break;
7844 }
7845
7846 // If this is a variadic argument, the LoongArch calling convention requires
7847 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7848 // byte alignment. An aligned register should be used regardless of whether
7849 // the original argument was split during legalisation or not. The argument
7850 // will not be passed by registers if the original type is larger than
7851 // 2*GRLen, so the register alignment rule does not apply.
7852 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7853 if (ArgFlags.isVarArg() &&
7854 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7855 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7856 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7857 // Skip 'odd' register if necessary.
7858 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7859 State.AllocateReg(ArgGPRs);
7860 }
7861
7862 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7863 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7864 State.getPendingArgFlags();
7865
7866 assert(PendingLocs.size() == PendingArgFlags.size() &&
7867 "PendingLocs and PendingArgFlags out of sync");
7868
7869 // FPR32 and FPR64 alias each other.
7870 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7871 UseGPRForFloat = true;
7872
7873 if (UseGPRForFloat && ValVT == MVT::f32) {
7874 LocVT = GRLenVT;
7875 LocInfo = CCValAssign::BCvt;
7876 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7877 LocVT = MVT::i64;
7878 LocInfo = CCValAssign::BCvt;
7879 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7880 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7881 // registers are exhausted.
7882 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7883 // Depending on available argument GPRS, f64 may be passed in a pair of
7884 // GPRs, split between a GPR and the stack, or passed completely on the
7885 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7886 // cases.
7887 MCRegister Reg = State.AllocateReg(ArgGPRs);
7888 if (!Reg) {
7889 int64_t StackOffset = State.AllocateStack(8, Align(8));
7890 State.addLoc(
7891 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7892 return false;
7893 }
7894 LocVT = MVT::i32;
7895 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7896 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7897 if (HiReg) {
7898 State.addLoc(
7899 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7900 } else {
7901 int64_t StackOffset = State.AllocateStack(4, Align(4));
7902 State.addLoc(
7903 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7904 }
7905 return false;
7906 }
7907
7908 // Split arguments might be passed indirectly, so keep track of the pending
7909 // values.
7910 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7911 LocVT = GRLenVT;
7912 LocInfo = CCValAssign::Indirect;
7913 PendingLocs.push_back(
7914 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7915 PendingArgFlags.push_back(ArgFlags);
7916 if (!ArgFlags.isSplitEnd()) {
7917 return false;
7918 }
7919 }
7920
7921 // If the split argument only had two elements, it should be passed directly
7922 // in registers or on the stack.
7923 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7924 PendingLocs.size() <= 2) {
7925 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7926 // Apply the normal calling convention rules to the first half of the
7927 // split argument.
7928 CCValAssign VA = PendingLocs[0];
7929 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7930 PendingLocs.clear();
7931 PendingArgFlags.clear();
7932 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7933 ArgFlags);
7934 }
7935
7936 // Allocate to a register if possible, or else a stack slot.
7937 Register Reg;
7938 unsigned StoreSizeBytes = GRLen / 8;
7939 Align StackAlign = Align(GRLen / 8);
7940
7941 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7942 Reg = State.AllocateReg(ArgFPR32s);
7943 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7944 Reg = State.AllocateReg(ArgFPR64s);
7945 } else if (ValVT.is128BitVector()) {
7946 Reg = State.AllocateReg(ArgVRs);
7947 UseGPRForFloat = false;
7948 StoreSizeBytes = 16;
7949 StackAlign = Align(16);
7950 } else if (ValVT.is256BitVector()) {
7951 Reg = State.AllocateReg(ArgXRs);
7952 UseGPRForFloat = false;
7953 StoreSizeBytes = 32;
7954 StackAlign = Align(32);
7955 } else {
7956 Reg = State.AllocateReg(ArgGPRs);
7957 }
7958
7959 unsigned StackOffset =
7960 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7961
7962 // If we reach this point and PendingLocs is non-empty, we must be at the
7963 // end of a split argument that must be passed indirectly.
7964 if (!PendingLocs.empty()) {
7965 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7966 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7967 for (auto &It : PendingLocs) {
7968 if (Reg)
7969 It.convertToReg(Reg);
7970 else
7971 It.convertToMem(StackOffset);
7972 State.addLoc(It);
7973 }
7974 PendingLocs.clear();
7975 PendingArgFlags.clear();
7976 return false;
7977 }
7978 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7979 "Expected an GRLenVT at this stage");
7980
7981 if (Reg) {
7982 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7983 return false;
7984 }
7985
7986 // When a floating-point value is passed on the stack, no bit-cast is needed.
7987 if (ValVT.isFloatingPoint()) {
7988 LocVT = ValVT;
7989 LocInfo = CCValAssign::Full;
7990 }
7991
7992 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7993 return false;
7994}
7995
7996void LoongArchTargetLowering::analyzeInputArgs(
7997 MachineFunction &MF, CCState &CCInfo,
7998 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7999 LoongArchCCAssignFn Fn) const {
8000 FunctionType *FType = MF.getFunction().getFunctionType();
8001 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
8002 MVT ArgVT = Ins[i].VT;
8003 Type *ArgTy = nullptr;
8004 if (IsRet)
8005 ArgTy = FType->getReturnType();
8006 else if (Ins[i].isOrigArg())
8007 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
8009 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8010 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
8011 CCInfo, IsRet, ArgTy)) {
8012 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
8013 << '\n');
8014 llvm_unreachable("");
8015 }
8016 }
8017}
8018
8019void LoongArchTargetLowering::analyzeOutputArgs(
8020 MachineFunction &MF, CCState &CCInfo,
8021 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
8022 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
8023 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8024 MVT ArgVT = Outs[i].VT;
8025 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
8027 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8028 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
8029 CCInfo, IsRet, OrigTy)) {
8030 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
8031 << "\n");
8032 llvm_unreachable("");
8033 }
8034 }
8035}
8036
8037// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
8038// values.
8040 const CCValAssign &VA, const SDLoc &DL) {
8041 switch (VA.getLocInfo()) {
8042 default:
8043 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8044 case CCValAssign::Full:
8046 break;
8047 case CCValAssign::BCvt:
8048 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8049 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
8050 else
8051 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8052 break;
8053 }
8054 return Val;
8055}
8056
8058 const CCValAssign &VA, const SDLoc &DL,
8059 const ISD::InputArg &In,
8060 const LoongArchTargetLowering &TLI) {
8063 EVT LocVT = VA.getLocVT();
8064 SDValue Val;
8065 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8066 Register VReg = RegInfo.createVirtualRegister(RC);
8067 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8068 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8069
8070 // If input is sign extended from 32 bits, note it for the OptW pass.
8071 if (In.isOrigArg()) {
8072 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8073 if (OrigArg->getType()->isIntegerTy()) {
8074 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8075 // An input zero extended from i31 can also be considered sign extended.
8076 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8077 (BitWidth < 32 && In.Flags.isZExt())) {
8080 LAFI->addSExt32Register(VReg);
8081 }
8082 }
8083 }
8084
8085 return convertLocVTToValVT(DAG, Val, VA, DL);
8086}
8087
8088// The caller is responsible for loading the full value if the argument is
8089// passed with CCValAssign::Indirect.
8091 const CCValAssign &VA, const SDLoc &DL) {
8093 MachineFrameInfo &MFI = MF.getFrameInfo();
8094 EVT ValVT = VA.getValVT();
8095 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8096 /*IsImmutable=*/true);
8097 SDValue FIN = DAG.getFrameIndex(
8099
8100 ISD::LoadExtType ExtType;
8101 switch (VA.getLocInfo()) {
8102 default:
8103 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8104 case CCValAssign::Full:
8106 case CCValAssign::BCvt:
8107 ExtType = ISD::NON_EXTLOAD;
8108 break;
8109 }
8110 return DAG.getExtLoad(
8111 ExtType, DL, VA.getLocVT(), Chain, FIN,
8113}
8114
8116 const CCValAssign &VA,
8117 const CCValAssign &HiVA,
8118 const SDLoc &DL) {
8119 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8120 "Unexpected VA");
8122 MachineFrameInfo &MFI = MF.getFrameInfo();
8124
8125 assert(VA.isRegLoc() && "Expected register VA assignment");
8126
8127 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8128 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8129 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8130 SDValue Hi;
8131 if (HiVA.isMemLoc()) {
8132 // Second half of f64 is passed on the stack.
8133 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8134 /*IsImmutable=*/true);
8135 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8136 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8138 } else {
8139 // Second half of f64 is passed in another GPR.
8140 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8141 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8142 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8143 }
8144 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8145}
8146
8148 const CCValAssign &VA, const SDLoc &DL) {
8149 EVT LocVT = VA.getLocVT();
8150
8151 switch (VA.getLocInfo()) {
8152 default:
8153 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8154 case CCValAssign::Full:
8155 break;
8156 case CCValAssign::BCvt:
8157 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8158 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8159 else
8160 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8161 break;
8162 }
8163 return Val;
8164}
8165
8166static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8167 CCValAssign::LocInfo LocInfo,
8168 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8169 CCState &State) {
8170 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8171 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8172 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8173 static const MCPhysReg GPRList[] = {
8174 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8175 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8176 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8177 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8178 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8179 return false;
8180 }
8181 }
8182
8183 if (LocVT == MVT::f32) {
8184 // Pass in STG registers: F1, F2, F3, F4
8185 // fs0,fs1,fs2,fs3
8186 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8187 LoongArch::F26, LoongArch::F27};
8188 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8189 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8190 return false;
8191 }
8192 }
8193
8194 if (LocVT == MVT::f64) {
8195 // Pass in STG registers: D1, D2, D3, D4
8196 // fs4,fs5,fs6,fs7
8197 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8198 LoongArch::F30_64, LoongArch::F31_64};
8199 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8200 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8201 return false;
8202 }
8203 }
8204
8205 report_fatal_error("No registers left in GHC calling convention");
8206 return true;
8207}
8208
8209// Transform physical registers into virtual registers.
8211 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8212 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8213 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8214
8216 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8217
8218 switch (CallConv) {
8219 default:
8220 llvm_unreachable("Unsupported calling convention");
8221 case CallingConv::C:
8222 case CallingConv::Fast:
8224 break;
8225 case CallingConv::GHC:
8226 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8227 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8229 "GHC calling convention requires the F and D extensions");
8230 }
8231
8232 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8233 MVT GRLenVT = Subtarget.getGRLenVT();
8234 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8235 // Used with varargs to acumulate store chains.
8236 std::vector<SDValue> OutChains;
8237
8238 // Assign locations to all of the incoming arguments.
8240 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8241
8242 if (CallConv == CallingConv::GHC)
8244 else
8245 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8246
8247 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8248 CCValAssign &VA = ArgLocs[i];
8249 SDValue ArgValue;
8250 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8251 // case.
8252 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8253 assert(VA.needsCustom());
8254 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8255 } else if (VA.isRegLoc())
8256 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8257 else
8258 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8259 if (VA.getLocInfo() == CCValAssign::Indirect) {
8260 // If the original argument was split and passed by reference, we need to
8261 // load all parts of it here (using the same address).
8262 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8264 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8265 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8266 assert(ArgPartOffset == 0);
8267 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8268 CCValAssign &PartVA = ArgLocs[i + 1];
8269 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8270 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8271 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8272 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8274 ++i;
8275 ++InsIdx;
8276 }
8277 continue;
8278 }
8279 InVals.push_back(ArgValue);
8280 if (Ins[InsIdx].Flags.isByVal())
8281 LoongArchFI->addIncomingByValArgs(ArgValue);
8282 }
8283
8284 if (IsVarArg) {
8286 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8287 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8288 MachineFrameInfo &MFI = MF.getFrameInfo();
8289 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8290
8291 // Offset of the first variable argument from stack pointer, and size of
8292 // the vararg save area. For now, the varargs save area is either zero or
8293 // large enough to hold a0-a7.
8294 int VaArgOffset, VarArgsSaveSize;
8295
8296 // If all registers are allocated, then all varargs must be passed on the
8297 // stack and we don't need to save any argregs.
8298 if (ArgRegs.size() == Idx) {
8299 VaArgOffset = CCInfo.getStackSize();
8300 VarArgsSaveSize = 0;
8301 } else {
8302 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8303 VaArgOffset = -VarArgsSaveSize;
8304 }
8305
8306 // Record the frame index of the first variable argument
8307 // which is a value necessary to VASTART.
8308 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8309 LoongArchFI->setVarArgsFrameIndex(FI);
8310
8311 // If saving an odd number of registers then create an extra stack slot to
8312 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8313 // offsets to even-numbered registered remain 2*GRLen-aligned.
8314 if (Idx % 2) {
8315 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8316 true);
8317 VarArgsSaveSize += GRLenInBytes;
8318 }
8319
8320 // Copy the integer registers that may have been used for passing varargs
8321 // to the vararg save area.
8322 for (unsigned I = Idx; I < ArgRegs.size();
8323 ++I, VaArgOffset += GRLenInBytes) {
8324 const Register Reg = RegInfo.createVirtualRegister(RC);
8325 RegInfo.addLiveIn(ArgRegs[I], Reg);
8326 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8327 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8328 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8329 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8331 cast<StoreSDNode>(Store.getNode())
8332 ->getMemOperand()
8333 ->setValue((Value *)nullptr);
8334 OutChains.push_back(Store);
8335 }
8336 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8337 }
8338
8339 LoongArchFI->setArgumentStackSize(CCInfo.getStackSize());
8340
8341 // All stores are grouped in one node to allow the matching between
8342 // the size of Ins and InVals. This only happens for vararg functions.
8343 if (!OutChains.empty()) {
8344 OutChains.push_back(Chain);
8345 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8346 }
8347
8348 return Chain;
8349}
8350
8352 return CI->isTailCall();
8353}
8354
8355// Check if the return value is used as only a return value, as otherwise
8356// we can't perform a tail-call.
8358 SDValue &Chain) const {
8359 if (N->getNumValues() != 1)
8360 return false;
8361 if (!N->hasNUsesOfValue(1, 0))
8362 return false;
8363
8364 SDNode *Copy = *N->user_begin();
8365 if (Copy->getOpcode() != ISD::CopyToReg)
8366 return false;
8367
8368 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8369 // isn't safe to perform a tail call.
8370 if (Copy->getGluedNode())
8371 return false;
8372
8373 // The copy must be used by a LoongArchISD::RET, and nothing else.
8374 bool HasRet = false;
8375 for (SDNode *Node : Copy->users()) {
8376 if (Node->getOpcode() != LoongArchISD::RET)
8377 return false;
8378 HasRet = true;
8379 }
8380
8381 if (!HasRet)
8382 return false;
8383
8384 Chain = Copy->getOperand(0);
8385 return true;
8386}
8387
8388// Check whether the call is eligible for tail call optimization.
8389bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8390 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8391 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8392
8393 auto CalleeCC = CLI.CallConv;
8394 auto &Outs = CLI.Outs;
8395 auto &Caller = MF.getFunction();
8396 auto CallerCC = Caller.getCallingConv();
8397 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8398
8399 // If the stack arguments for this call do not fit into our own save area then
8400 // the call cannot be made tail.
8401 if (CCInfo.getStackSize() > LoongArchFI->getArgumentStackSize())
8402 return false;
8403
8404 // Do not tail call opt if any parameters need to be passed indirectly.
8405 for (auto &VA : ArgLocs)
8406 if (VA.getLocInfo() == CCValAssign::Indirect)
8407 return false;
8408
8409 // Do not tail call opt if either caller or callee uses struct return
8410 // semantics.
8411 auto IsCallerStructRet = Caller.hasStructRetAttr();
8412 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8413 if (IsCallerStructRet != IsCalleeStructRet)
8414 return false;
8415
8416 // Do not tail call opt if caller's and callee's byval arguments do not match.
8417 for (unsigned i = 0, j = 0; i < Outs.size(); i++) {
8418 if (!Outs[i].Flags.isByVal())
8419 continue;
8420 if (j++ >= LoongArchFI->getIncomingByValArgsSize())
8421 return false;
8422 if (LoongArchFI->getIncomingByValArgs(i).getValueType() != Outs[i].ArgVT)
8423 return false;
8424 }
8425
8426 // The callee has to preserve all registers the caller needs to preserve.
8427 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8428 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8429 if (CalleeCC != CallerCC) {
8430 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8431 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8432 return false;
8433 }
8434
8435 // If the callee takes no arguments then go on to check the results of the
8436 // call.
8437 const MachineRegisterInfo &MRI = MF.getRegInfo();
8438 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8439 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
8440 return false;
8441
8442 return true;
8443}
8444
8446 return DAG.getDataLayout().getPrefTypeAlign(
8447 VT.getTypeForEVT(*DAG.getContext()));
8448}
8449
8450// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8451// and output parameter nodes.
8452SDValue
8454 SmallVectorImpl<SDValue> &InVals) const {
8455 SelectionDAG &DAG = CLI.DAG;
8456 SDLoc &DL = CLI.DL;
8458 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8460 SDValue Chain = CLI.Chain;
8461 SDValue Callee = CLI.Callee;
8462 CallingConv::ID CallConv = CLI.CallConv;
8463 bool IsVarArg = CLI.IsVarArg;
8464 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8465 MVT GRLenVT = Subtarget.getGRLenVT();
8466 bool &IsTailCall = CLI.IsTailCall;
8467
8469 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8470
8471 // Analyze the operands of the call, assigning locations to each operand.
8473 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8474
8475 if (CallConv == CallingConv::GHC)
8476 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8477 else
8478 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8479
8480 // Check if it's really possible to do a tail call.
8481 if (IsTailCall)
8482 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8483
8484 if (IsTailCall)
8485 ++NumTailCalls;
8486 else if (CLI.CB && CLI.CB->isMustTailCall())
8487 report_fatal_error("failed to perform tail call elimination on a call "
8488 "site marked musttail");
8489
8490 // Get a count of how many bytes are to be pushed on the stack.
8491 unsigned NumBytes = ArgCCInfo.getStackSize();
8492
8493 // Create local copies for byval args.
8494 SmallVector<SDValue> ByValArgs;
8495 for (unsigned i = 0, j = 0, e = Outs.size(); i != e; ++i) {
8496 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8497 if (!Flags.isByVal())
8498 continue;
8499
8500 SDValue Arg = OutVals[i];
8501 unsigned Size = Flags.getByValSize();
8502 Align Alignment = Flags.getNonZeroByValAlign();
8503 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8504 SDValue Dst;
8505
8506 if (IsTailCall) {
8507 SDValue CallerArg = LoongArchFI->getIncomingByValArgs(j++);
8510 Dst = CallerArg;
8511 } else {
8512 int FI =
8513 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8514 Dst = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8515 }
8516 if (Dst) {
8517 Chain =
8518 DAG.getMemcpy(Chain, DL, Dst, Arg, SizeNode, Alignment,
8519 /*IsVolatile=*/false,
8520 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8522 ByValArgs.push_back(Dst);
8523 }
8524 }
8525
8526 if (!IsTailCall)
8527 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8528
8529 // During a tail call, stores to the argument area must happen after all of
8530 // the function's incoming arguments have been loaded because they may alias.
8531 // This is done by folding in a TokenFactor from LowerFormalArguments, but
8532 // there's no point in doing so repeatedly so this tracks whether that's
8533 // happened yet.
8534 bool AfterFormalArgLoads = false;
8535
8536 // Copy argument values to their designated locations.
8538 SmallVector<SDValue> MemOpChains;
8539 SDValue StackPtr;
8540 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8541 ++i, ++OutIdx) {
8542 CCValAssign &VA = ArgLocs[i];
8543 SDValue ArgValue = OutVals[OutIdx];
8544 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8545
8546 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8547 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8548 assert(VA.isRegLoc() && "Expected register VA assignment");
8549 assert(VA.needsCustom());
8550 SDValue SplitF64 =
8551 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8552 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8553 SDValue Lo = SplitF64.getValue(0);
8554 SDValue Hi = SplitF64.getValue(1);
8555
8556 Register RegLo = VA.getLocReg();
8557 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8558
8559 // Get the CCValAssign for the Hi part.
8560 CCValAssign &HiVA = ArgLocs[++i];
8561
8562 if (HiVA.isMemLoc()) {
8563 // Second half of f64 is passed on the stack.
8564 if (!StackPtr.getNode())
8565 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8567 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8568 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8569 // Emit the store.
8570 MemOpChains.push_back(DAG.getStore(
8571 Chain, DL, Hi, Address,
8573 } else {
8574 // Second half of f64 is passed in another GPR.
8575 Register RegHigh = HiVA.getLocReg();
8576 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8577 }
8578 continue;
8579 }
8580
8581 // Promote the value if needed.
8582 // For now, only handle fully promoted and indirect arguments.
8583 if (VA.getLocInfo() == CCValAssign::Indirect) {
8584 // Store the argument in a stack slot and pass its address.
8585 Align StackAlign =
8586 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8587 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8588 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8589 // If the original argument was split and passed by reference, we need to
8590 // store the required parts of it here (and pass just one address).
8591 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8592 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8593 assert(ArgPartOffset == 0);
8594 // Calculate the total size to store. We don't have access to what we're
8595 // actually storing other than performing the loop and collecting the
8596 // info.
8598 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8599 SDValue PartValue = OutVals[OutIdx + 1];
8600 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8601 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8602 EVT PartVT = PartValue.getValueType();
8603
8604 StoredSize += PartVT.getStoreSize();
8605 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8606 Parts.push_back(std::make_pair(PartValue, Offset));
8607 ++i;
8608 ++OutIdx;
8609 }
8610 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8611 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8612 MemOpChains.push_back(
8613 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8615 for (const auto &Part : Parts) {
8616 SDValue PartValue = Part.first;
8617 SDValue PartOffset = Part.second;
8619 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8620 MemOpChains.push_back(
8621 DAG.getStore(Chain, DL, PartValue, Address,
8623 }
8624 ArgValue = SpillSlot;
8625 } else {
8626 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8627 }
8628
8629 // Use local copy if it is a byval arg.
8630 if (Flags.isByVal()) {
8631 if (!IsTailCall || (isa<GlobalAddressSDNode>(ArgValue) ||
8632 isa<ExternalSymbolSDNode>(ArgValue) ||
8633 isa<FrameIndexSDNode>(ArgValue)))
8634 ArgValue = ByValArgs[j++];
8635 }
8636
8637 if (VA.isRegLoc()) {
8638 // Queue up the argument copies and emit them at the end.
8639 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8640 } else {
8641 assert(VA.isMemLoc() && "Argument not register or memory");
8642 SDValue DstAddr;
8643 MachinePointerInfo DstInfo;
8644 int32_t Offset = VA.getLocMemOffset();
8645
8646 // Work out the address of the stack slot.
8647 if (!StackPtr.getNode())
8648 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8649
8650 if (IsTailCall) {
8651 unsigned OpSize = divideCeil(VA.getValVT().getSizeInBits(), 8);
8652 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
8653 DstAddr = DAG.getFrameIndex(FI, PtrVT);
8654 DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
8655 if (!AfterFormalArgLoads) {
8656 Chain = DAG.getStackArgumentTokenFactor(Chain);
8657 AfterFormalArgLoads = true;
8658 }
8659 } else {
8660 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
8661 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
8662 DstInfo = MachinePointerInfo::getStack(MF, Offset);
8663 }
8664
8665 // Emit the store.
8666 MemOpChains.push_back(
8667 DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
8668 }
8669 }
8670
8671 // Join the stores, which are independent of one another.
8672 if (!MemOpChains.empty())
8673 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8674
8675 SDValue Glue;
8676
8677 // Build a sequence of copy-to-reg nodes, chained and glued together.
8678 for (auto &Reg : RegsToPass) {
8679 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8680 Glue = Chain.getValue(1);
8681 }
8682
8683 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8684 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8685 // split it and then direct call can be matched by PseudoCALL_SMALL.
8687 const GlobalValue *GV = S->getGlobal();
8688 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8691 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8692 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8693 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8696 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8697 }
8698
8699 // The first call operand is the chain and the second is the target address.
8701 Ops.push_back(Chain);
8702 Ops.push_back(Callee);
8703
8704 // Add argument registers to the end of the list so that they are
8705 // known live into the call.
8706 for (auto &Reg : RegsToPass)
8707 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8708
8709 if (!IsTailCall) {
8710 // Add a register mask operand representing the call-preserved registers.
8711 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8712 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8713 assert(Mask && "Missing call preserved mask for calling convention");
8714 Ops.push_back(DAG.getRegisterMask(Mask));
8715 }
8716
8717 // Glue the call to the argument copies, if any.
8718 if (Glue.getNode())
8719 Ops.push_back(Glue);
8720
8721 // Emit the call.
8722 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8723 unsigned Op;
8724 switch (DAG.getTarget().getCodeModel()) {
8725 default:
8726 report_fatal_error("Unsupported code model");
8727 case CodeModel::Small:
8728 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8729 break;
8730 case CodeModel::Medium:
8731 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8732 break;
8733 case CodeModel::Large:
8734 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8735 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8736 break;
8737 }
8738
8739 if (IsTailCall) {
8741 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8742 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8743 return Ret;
8744 }
8745
8746 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8747 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8748 Glue = Chain.getValue(1);
8749
8750 // Mark the end of the call, which is glued to the call itself.
8751 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8752 Glue = Chain.getValue(1);
8753
8754 // Assign locations to each value returned by this call.
8756 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8757 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8758
8759 // Copy all of the result registers out of their specified physreg.
8760 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8761 auto &VA = RVLocs[i];
8762 // Copy the value out.
8763 SDValue RetValue =
8764 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8765 // Glue the RetValue to the end of the call sequence.
8766 Chain = RetValue.getValue(1);
8767 Glue = RetValue.getValue(2);
8768
8769 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8770 assert(VA.needsCustom());
8771 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8772 MVT::i32, Glue);
8773 Chain = RetValue2.getValue(1);
8774 Glue = RetValue2.getValue(2);
8775 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8776 RetValue, RetValue2);
8777 } else
8778 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8779
8780 InVals.push_back(RetValue);
8781 }
8782
8783 return Chain;
8784}
8785
8787 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8788 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8789 const Type *RetTy) const {
8791 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8792
8793 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8794 LoongArchABI::ABI ABI =
8795 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8796 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8797 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8798 return false;
8799 }
8800 return true;
8801}
8802
8804 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8806 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8807 SelectionDAG &DAG) const {
8808 // Stores the assignment of the return value to a location.
8810
8811 // Info about the registers and stack slot.
8812 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8813 *DAG.getContext());
8814
8815 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8816 nullptr, CC_LoongArch);
8817 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8818 report_fatal_error("GHC functions return void only");
8819 SDValue Glue;
8820 SmallVector<SDValue, 4> RetOps(1, Chain);
8821
8822 // Copy the result values into the output registers.
8823 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8824 SDValue Val = OutVals[OutIdx];
8825 CCValAssign &VA = RVLocs[i];
8826 assert(VA.isRegLoc() && "Can only return in registers!");
8827
8828 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8829 // Handle returning f64 on LA32D with a soft float ABI.
8830 assert(VA.isRegLoc() && "Expected return via registers");
8831 assert(VA.needsCustom());
8832 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8833 DAG.getVTList(MVT::i32, MVT::i32), Val);
8834 SDValue Lo = SplitF64.getValue(0);
8835 SDValue Hi = SplitF64.getValue(1);
8836 Register RegLo = VA.getLocReg();
8837 Register RegHi = RVLocs[++i].getLocReg();
8838
8839 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8840 Glue = Chain.getValue(1);
8841 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8842 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8843 Glue = Chain.getValue(1);
8844 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8845 } else {
8846 // Handle a 'normal' return.
8847 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8848 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8849
8850 // Guarantee that all emitted copies are stuck together.
8851 Glue = Chain.getValue(1);
8852 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8853 }
8854 }
8855
8856 RetOps[0] = Chain; // Update chain.
8857
8858 // Add the glue node if we have it.
8859 if (Glue.getNode())
8860 RetOps.push_back(Glue);
8861
8862 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8863}
8864
8865// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8866// Note: The following prefixes are excluded:
8867// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8868// as they can be represented using [x]vrepli.[whb]
8870 const APInt &SplatValue, const unsigned SplatBitSize) const {
8871 uint64_t RequiredImm = 0;
8872 uint64_t V = SplatValue.getZExtValue();
8873 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8874 // 4'b0101
8875 RequiredImm = (0b10101 << 8) | (V >> 8);
8876 return {true, RequiredImm};
8877 } else if (SplatBitSize == 32) {
8878 // 4'b0001
8879 if (!(V & 0xFFFF00FF)) {
8880 RequiredImm = (0b10001 << 8) | (V >> 8);
8881 return {true, RequiredImm};
8882 }
8883 // 4'b0010
8884 if (!(V & 0xFF00FFFF)) {
8885 RequiredImm = (0b10010 << 8) | (V >> 16);
8886 return {true, RequiredImm};
8887 }
8888 // 4'b0011
8889 if (!(V & 0x00FFFFFF)) {
8890 RequiredImm = (0b10011 << 8) | (V >> 24);
8891 return {true, RequiredImm};
8892 }
8893 // 4'b0110
8894 if ((V & 0xFFFF00FF) == 0xFF) {
8895 RequiredImm = (0b10110 << 8) | (V >> 8);
8896 return {true, RequiredImm};
8897 }
8898 // 4'b0111
8899 if ((V & 0xFF00FFFF) == 0xFFFF) {
8900 RequiredImm = (0b10111 << 8) | (V >> 16);
8901 return {true, RequiredImm};
8902 }
8903 // 4'b1010
8904 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8905 RequiredImm =
8906 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8907 return {true, RequiredImm};
8908 }
8909 } else if (SplatBitSize == 64) {
8910 // 4'b1011
8911 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8912 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8913 RequiredImm =
8914 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8915 return {true, RequiredImm};
8916 }
8917 // 4'b1100
8918 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8919 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8920 RequiredImm =
8921 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8922 return {true, RequiredImm};
8923 }
8924 // 4'b1001
8925 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8926 uint8_t res = 0;
8927 for (int i = 0; i < 8; ++i) {
8928 uint8_t byte = x & 0xFF;
8929 if (byte == 0 || byte == 0xFF)
8930 res |= ((byte & 1) << i);
8931 else
8932 return {false, 0};
8933 x >>= 8;
8934 }
8935 return {true, res};
8936 };
8937 auto [IsSame, Suffix] = sameBitsPreByte(V);
8938 if (IsSame) {
8939 RequiredImm = (0b11001 << 8) | Suffix;
8940 return {true, RequiredImm};
8941 }
8942 }
8943 return {false, RequiredImm};
8944}
8945
8947 EVT VT) const {
8948 if (!Subtarget.hasExtLSX())
8949 return false;
8950
8951 if (VT == MVT::f32) {
8952 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8953 return (masked == 0x3e000000 || masked == 0x40000000);
8954 }
8955
8956 if (VT == MVT::f64) {
8957 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8958 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8959 }
8960
8961 return false;
8962}
8963
8964bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8965 bool ForCodeSize) const {
8966 // TODO: Maybe need more checks here after vector extension is supported.
8967 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8968 return false;
8969 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8970 return false;
8971 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8972}
8973
8975 return true;
8976}
8977
8979 return true;
8980}
8981
8982bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8983 const Instruction *I) const {
8984 if (!Subtarget.is64Bit())
8985 return isa<LoadInst>(I) || isa<StoreInst>(I);
8986
8987 if (isa<LoadInst>(I))
8988 return true;
8989
8990 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8991 // require fences beacuse we can use amswap_db.[w/d].
8992 Type *Ty = I->getOperand(0)->getType();
8993 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8994 unsigned Size = Ty->getIntegerBitWidth();
8995 return (Size == 8 || Size == 16);
8996 }
8997
8998 return false;
8999}
9000
9002 LLVMContext &Context,
9003 EVT VT) const {
9004 if (!VT.isVector())
9005 return getPointerTy(DL);
9007}
9008
9010 EVT VT = Y.getValueType();
9011
9012 if (VT.isVector())
9013 return Subtarget.hasExtLSX() && VT.isInteger();
9014
9015 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
9016}
9017
9019 const CallBase &I,
9020 MachineFunction &MF,
9021 unsigned Intrinsic) const {
9022 switch (Intrinsic) {
9023 default:
9024 return false;
9025 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
9026 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
9027 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
9028 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
9029 Info.opc = ISD::INTRINSIC_W_CHAIN;
9030 Info.memVT = MVT::i32;
9031 Info.ptrVal = I.getArgOperand(0);
9032 Info.offset = 0;
9033 Info.align = Align(4);
9036 return true;
9037 // TODO: Add more Intrinsics later.
9038 }
9039}
9040
9041// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
9042// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
9043// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
9044// regression, we need to implement it manually.
9047
9049 Op == AtomicRMWInst::And) &&
9050 "Unable to expand");
9051 unsigned MinWordSize = 4;
9052
9053 IRBuilder<> Builder(AI);
9054 LLVMContext &Ctx = Builder.getContext();
9055 const DataLayout &DL = AI->getDataLayout();
9056 Type *ValueType = AI->getType();
9057 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
9058
9059 Value *Addr = AI->getPointerOperand();
9060 PointerType *PtrTy = cast<PointerType>(Addr->getType());
9061 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
9062
9063 Value *AlignedAddr = Builder.CreateIntrinsic(
9064 Intrinsic::ptrmask, {PtrTy, IntTy},
9065 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
9066 "AlignedAddr");
9067
9068 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
9069 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
9070 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
9071 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
9072 Value *Mask = Builder.CreateShl(
9073 ConstantInt::get(WordType,
9074 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
9075 ShiftAmt, "Mask");
9076 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
9077 Value *ValOperand_Shifted =
9078 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
9079 ShiftAmt, "ValOperand_Shifted");
9080 Value *NewOperand;
9081 if (Op == AtomicRMWInst::And)
9082 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
9083 else
9084 NewOperand = ValOperand_Shifted;
9085
9086 AtomicRMWInst *NewAI =
9087 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
9088 AI->getOrdering(), AI->getSyncScopeID());
9089
9090 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
9091 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
9092 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
9093 AI->replaceAllUsesWith(FinalOldResult);
9094 AI->eraseFromParent();
9095}
9096
9099 const AtomicRMWInst *AI) const {
9100 // TODO: Add more AtomicRMWInst that needs to be extended.
9101
9102 // Since floating-point operation requires a non-trivial set of data
9103 // operations, use CmpXChg to expand.
9104 if (AI->isFloatingPointOperation() ||
9110
9111 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9114 AI->getOperation() == AtomicRMWInst::Sub)) {
9116 }
9117
9118 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9119 if (Subtarget.hasLAMCAS()) {
9120 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9124 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9126 }
9127
9128 if (Size == 8 || Size == 16)
9131}
9132
9133static Intrinsic::ID
9135 AtomicRMWInst::BinOp BinOp) {
9136 if (GRLen == 64) {
9137 switch (BinOp) {
9138 default:
9139 llvm_unreachable("Unexpected AtomicRMW BinOp");
9141 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9142 case AtomicRMWInst::Add:
9143 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9144 case AtomicRMWInst::Sub:
9145 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9147 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9149 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9151 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9152 case AtomicRMWInst::Max:
9153 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9154 case AtomicRMWInst::Min:
9155 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9156 // TODO: support other AtomicRMWInst.
9157 }
9158 }
9159
9160 if (GRLen == 32) {
9161 switch (BinOp) {
9162 default:
9163 llvm_unreachable("Unexpected AtomicRMW BinOp");
9165 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9166 case AtomicRMWInst::Add:
9167 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9168 case AtomicRMWInst::Sub:
9169 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9171 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9173 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9175 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9176 case AtomicRMWInst::Max:
9177 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9178 case AtomicRMWInst::Min:
9179 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9180 // TODO: support other AtomicRMWInst.
9181 }
9182 }
9183
9184 llvm_unreachable("Unexpected GRLen\n");
9185}
9186
9189 const AtomicCmpXchgInst *CI) const {
9190
9191 if (Subtarget.hasLAMCAS())
9193
9195 if (Size == 8 || Size == 16)
9198}
9199
9201 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9202 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9203 unsigned GRLen = Subtarget.getGRLen();
9204 AtomicOrdering FailOrd = CI->getFailureOrdering();
9205 Value *FailureOrdering =
9206 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9207 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9208 if (GRLen == 64) {
9209 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9210 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9211 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9212 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9213 }
9214 Type *Tys[] = {AlignedAddr->getType()};
9215 Value *Result = Builder.CreateIntrinsic(
9216 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9217 if (GRLen == 64)
9218 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9219 return Result;
9220}
9221
9223 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9224 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9225 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9226 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9227 // mask, as this produces better code than the LL/SC loop emitted by
9228 // int_loongarch_masked_atomicrmw_xchg.
9229 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9232 if (CVal->isZero())
9233 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9234 Builder.CreateNot(Mask, "Inv_Mask"),
9235 AI->getAlign(), Ord);
9236 if (CVal->isMinusOne())
9237 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9238 AI->getAlign(), Ord);
9239 }
9240
9241 unsigned GRLen = Subtarget.getGRLen();
9242 Value *Ordering =
9243 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9244 Type *Tys[] = {AlignedAddr->getType()};
9246 AI->getModule(),
9248
9249 if (GRLen == 64) {
9250 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9251 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9252 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9253 }
9254
9255 Value *Result;
9256
9257 // Must pass the shift amount needed to sign extend the loaded value prior
9258 // to performing a signed comparison for min/max. ShiftAmt is the number of
9259 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9260 // is the number of bits to left+right shift the value in order to
9261 // sign-extend.
9262 if (AI->getOperation() == AtomicRMWInst::Min ||
9264 const DataLayout &DL = AI->getDataLayout();
9265 unsigned ValWidth =
9266 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9267 Value *SextShamt =
9268 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9269 Result = Builder.CreateCall(LlwOpScwLoop,
9270 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9271 } else {
9272 Result =
9273 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9274 }
9275
9276 if (GRLen == 64)
9277 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9278 return Result;
9279}
9280
9282 const MachineFunction &MF, EVT VT) const {
9283 VT = VT.getScalarType();
9284
9285 if (!VT.isSimple())
9286 return false;
9287
9288 switch (VT.getSimpleVT().SimpleTy) {
9289 case MVT::f32:
9290 case MVT::f64:
9291 return true;
9292 default:
9293 break;
9294 }
9295
9296 return false;
9297}
9298
9300 const Constant *PersonalityFn) const {
9301 return LoongArch::R4;
9302}
9303
9305 const Constant *PersonalityFn) const {
9306 return LoongArch::R5;
9307}
9308
9309//===----------------------------------------------------------------------===//
9310// Target Optimization Hooks
9311//===----------------------------------------------------------------------===//
9312
9314 const LoongArchSubtarget &Subtarget) {
9315 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9316 // IEEE float has 23 digits and double has 52 digits.
9317 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9318 return RefinementSteps;
9319}
9320
9322 SelectionDAG &DAG, int Enabled,
9323 int &RefinementSteps,
9324 bool &UseOneConstNR,
9325 bool Reciprocal) const {
9326 if (Subtarget.hasFrecipe()) {
9327 SDLoc DL(Operand);
9328 EVT VT = Operand.getValueType();
9329
9330 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9331 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9332 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9333 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9334 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9335
9336 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9337 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9338
9339 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9340 if (Reciprocal)
9341 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9342
9343 return Estimate;
9344 }
9345 }
9346
9347 return SDValue();
9348}
9349
9351 SelectionDAG &DAG,
9352 int Enabled,
9353 int &RefinementSteps) const {
9354 if (Subtarget.hasFrecipe()) {
9355 SDLoc DL(Operand);
9356 EVT VT = Operand.getValueType();
9357
9358 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9359 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9360 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9361 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9362 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9363
9364 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9365 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9366
9367 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9368 }
9369 }
9370
9371 return SDValue();
9372}
9373
9374//===----------------------------------------------------------------------===//
9375// LoongArch Inline Assembly Support
9376//===----------------------------------------------------------------------===//
9377
9379LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9380 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9381 //
9382 // 'f': A floating-point register (if available).
9383 // 'k': A memory operand whose address is formed by a base register and
9384 // (optionally scaled) index register.
9385 // 'l': A signed 16-bit constant.
9386 // 'm': A memory operand whose address is formed by a base register and
9387 // offset that is suitable for use in instructions with the same
9388 // addressing mode as st.w and ld.w.
9389 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9390 // instruction)
9391 // 'I': A signed 12-bit constant (for arithmetic instructions).
9392 // 'J': Integer zero.
9393 // 'K': An unsigned 12-bit constant (for logic instructions).
9394 // "ZB": An address that is held in a general-purpose register. The offset is
9395 // zero.
9396 // "ZC": A memory operand whose address is formed by a base register and
9397 // offset that is suitable for use in instructions with the same
9398 // addressing mode as ll.w and sc.w.
9399 if (Constraint.size() == 1) {
9400 switch (Constraint[0]) {
9401 default:
9402 break;
9403 case 'f':
9404 case 'q':
9405 return C_RegisterClass;
9406 case 'l':
9407 case 'I':
9408 case 'J':
9409 case 'K':
9410 return C_Immediate;
9411 case 'k':
9412 return C_Memory;
9413 }
9414 }
9415
9416 if (Constraint == "ZC" || Constraint == "ZB")
9417 return C_Memory;
9418
9419 // 'm' is handled here.
9420 return TargetLowering::getConstraintType(Constraint);
9421}
9422
9423InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9424 StringRef ConstraintCode) const {
9425 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9429 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9430}
9431
9432std::pair<unsigned, const TargetRegisterClass *>
9433LoongArchTargetLowering::getRegForInlineAsmConstraint(
9434 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9435 // First, see if this is a constraint that directly corresponds to a LoongArch
9436 // register class.
9437 if (Constraint.size() == 1) {
9438 switch (Constraint[0]) {
9439 case 'r':
9440 // TODO: Support fixed vectors up to GRLen?
9441 if (VT.isVector())
9442 break;
9443 return std::make_pair(0U, &LoongArch::GPRRegClass);
9444 case 'q':
9445 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9446 case 'f':
9447 if (Subtarget.hasBasicF() && VT == MVT::f32)
9448 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9449 if (Subtarget.hasBasicD() && VT == MVT::f64)
9450 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9451 if (Subtarget.hasExtLSX() &&
9452 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9453 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9454 if (Subtarget.hasExtLASX() &&
9455 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9456 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9457 break;
9458 default:
9459 break;
9460 }
9461 }
9462
9463 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9464 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9465 // constraints while the official register name is prefixed with a '$'. So we
9466 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9467 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9468 // case insensitive, so no need to convert the constraint to upper case here.
9469 //
9470 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9471 // decode the usage of register name aliases into their official names. And
9472 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9473 // official register names.
9474 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9475 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9476 bool IsFP = Constraint[2] == 'f';
9477 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9478 std::pair<unsigned, const TargetRegisterClass *> R;
9480 TRI, join_items("", Temp.first, Temp.second), VT);
9481 // Match those names to the widest floating point register type available.
9482 if (IsFP) {
9483 unsigned RegNo = R.first;
9484 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9485 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9486 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9487 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9488 }
9489 }
9490 }
9491 return R;
9492 }
9493
9494 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9495}
9496
9497void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9498 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9499 SelectionDAG &DAG) const {
9500 // Currently only support length 1 constraints.
9501 if (Constraint.size() == 1) {
9502 switch (Constraint[0]) {
9503 case 'l':
9504 // Validate & create a 16-bit signed immediate operand.
9505 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9506 uint64_t CVal = C->getSExtValue();
9507 if (isInt<16>(CVal))
9508 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9509 Subtarget.getGRLenVT()));
9510 }
9511 return;
9512 case 'I':
9513 // Validate & create a 12-bit signed immediate operand.
9514 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9515 uint64_t CVal = C->getSExtValue();
9516 if (isInt<12>(CVal))
9517 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9518 Subtarget.getGRLenVT()));
9519 }
9520 return;
9521 case 'J':
9522 // Validate & create an integer zero operand.
9523 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9524 if (C->getZExtValue() == 0)
9525 Ops.push_back(
9526 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9527 return;
9528 case 'K':
9529 // Validate & create a 12-bit unsigned immediate operand.
9530 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9531 uint64_t CVal = C->getZExtValue();
9532 if (isUInt<12>(CVal))
9533 Ops.push_back(
9534 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9535 }
9536 return;
9537 default:
9538 break;
9539 }
9540 }
9542}
9543
9544#define GET_REGISTER_MATCHER
9545#include "LoongArchGenAsmMatcher.inc"
9546
9549 const MachineFunction &MF) const {
9550 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9551 std::string NewRegName = Name.second.str();
9552 Register Reg = MatchRegisterAltName(NewRegName);
9553 if (!Reg)
9554 Reg = MatchRegisterName(NewRegName);
9555 if (!Reg)
9556 return Reg;
9557 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9558 if (!ReservedRegs.test(Reg))
9559 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9560 StringRef(RegName) + "\"."));
9561 return Reg;
9562}
9563
9565 EVT VT, SDValue C) const {
9566 // TODO: Support vectors.
9567 if (!VT.isScalarInteger())
9568 return false;
9569
9570 // Omit the optimization if the data size exceeds GRLen.
9571 if (VT.getSizeInBits() > Subtarget.getGRLen())
9572 return false;
9573
9574 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9575 const APInt &Imm = ConstNode->getAPIntValue();
9576 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9577 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9578 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9579 return true;
9580 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9581 if (ConstNode->hasOneUse() &&
9582 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9583 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9584 return true;
9585 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9586 // in which the immediate has two set bits. Or Break (MUL x, imm)
9587 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9588 // equals to (1 << s0) - (1 << s1).
9589 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9590 unsigned Shifts = Imm.countr_zero();
9591 // Reject immediates which can be composed via a single LUI.
9592 if (Shifts >= 12)
9593 return false;
9594 // Reject multiplications can be optimized to
9595 // (SLLI (ALSL x, x, 1/2/3/4), s).
9596 APInt ImmPop = Imm.ashr(Shifts);
9597 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9598 return false;
9599 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9600 // since it needs one more instruction than other 3 cases.
9601 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9602 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9603 (ImmSmall - Imm).isPowerOf2())
9604 return true;
9605 }
9606 }
9607
9608 return false;
9609}
9610
9612 const AddrMode &AM,
9613 Type *Ty, unsigned AS,
9614 Instruction *I) const {
9615 // LoongArch has four basic addressing modes:
9616 // 1. reg
9617 // 2. reg + 12-bit signed offset
9618 // 3. reg + 14-bit signed offset left-shifted by 2
9619 // 4. reg1 + reg2
9620 // TODO: Add more checks after support vector extension.
9621
9622 // No global is ever allowed as a base.
9623 if (AM.BaseGV)
9624 return false;
9625
9626 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9627 // with `UAL` feature.
9628 if (!isInt<12>(AM.BaseOffs) &&
9629 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9630 return false;
9631
9632 switch (AM.Scale) {
9633 case 0:
9634 // "r+i" or just "i", depending on HasBaseReg.
9635 break;
9636 case 1:
9637 // "r+r+i" is not allowed.
9638 if (AM.HasBaseReg && AM.BaseOffs)
9639 return false;
9640 // Otherwise we have "r+r" or "r+i".
9641 break;
9642 case 2:
9643 // "2*r+r" or "2*r+i" is not allowed.
9644 if (AM.HasBaseReg || AM.BaseOffs)
9645 return false;
9646 // Allow "2*r" as "r+r".
9647 break;
9648 default:
9649 return false;
9650 }
9651
9652 return true;
9653}
9654
9656 return isInt<12>(Imm);
9657}
9658
9660 return isInt<12>(Imm);
9661}
9662
9664 // Zexts are free if they can be combined with a load.
9665 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9666 // poorly with type legalization of compares preferring sext.
9667 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9668 EVT MemVT = LD->getMemoryVT();
9669 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9670 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9671 LD->getExtensionType() == ISD::ZEXTLOAD))
9672 return true;
9673 }
9674
9675 return TargetLowering::isZExtFree(Val, VT2);
9676}
9677
9679 EVT DstVT) const {
9680 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9681}
9682
9684 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9685}
9686
9688 // TODO: Support vectors.
9689 if (Y.getValueType().isVector())
9690 return false;
9691
9692 return !isa<ConstantSDNode>(Y);
9693}
9694
9696 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9697 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9698}
9699
9701 Type *Ty, bool IsSigned) const {
9702 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9703 return true;
9704
9705 return IsSigned;
9706}
9707
9709 // Return false to suppress the unnecessary extensions if the LibCall
9710 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9711 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9712 Type.getSizeInBits() < Subtarget.getGRLen()))
9713 return false;
9714 return true;
9715}
9716
9717// memcpy, and other memory intrinsics, typically tries to use wider load/store
9718// if the source/dest is aligned and the copy size is large enough. We therefore
9719// want to align such objects passed to memory intrinsics.
9721 unsigned &MinSize,
9722 Align &PrefAlign) const {
9723 if (!isa<MemIntrinsic>(CI))
9724 return false;
9725
9726 if (Subtarget.is64Bit()) {
9727 MinSize = 8;
9728 PrefAlign = Align(8);
9729 } else {
9730 MinSize = 4;
9731 PrefAlign = Align(4);
9732 }
9733
9734 return true;
9735}
9736
9745
9746bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9747 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9748 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9749 bool IsABIRegCopy = CC.has_value();
9750 EVT ValueVT = Val.getValueType();
9751
9752 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9753 PartVT == MVT::f32) {
9754 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9755 // nan, and cast to f32.
9756 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9757 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9758 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9759 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9760 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9761 Parts[0] = Val;
9762 return true;
9763 }
9764
9765 return false;
9766}
9767
9768SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9769 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9770 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9771 bool IsABIRegCopy = CC.has_value();
9772
9773 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9774 PartVT == MVT::f32) {
9775 SDValue Val = Parts[0];
9776
9777 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9778 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9779 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9780 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9781 return Val;
9782 }
9783
9784 return SDValue();
9785}
9786
9787MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9788 CallingConv::ID CC,
9789 EVT VT) const {
9790 // Use f32 to pass f16.
9791 if (VT == MVT::f16 && Subtarget.hasBasicF())
9792 return MVT::f32;
9793
9795}
9796
9797unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9798 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9799 // Use f32 to pass f16.
9800 if (VT == MVT::f16 && Subtarget.hasBasicF())
9801 return 1;
9802
9804}
9805
9807 SDValue Op, const APInt &OriginalDemandedBits,
9808 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9809 unsigned Depth) const {
9810 EVT VT = Op.getValueType();
9811 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9812 unsigned Opc = Op.getOpcode();
9813 switch (Opc) {
9814 default:
9815 break;
9816 case LoongArchISD::VMSKLTZ:
9817 case LoongArchISD::XVMSKLTZ: {
9818 SDValue Src = Op.getOperand(0);
9819 MVT SrcVT = Src.getSimpleValueType();
9820 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9821 unsigned NumElts = SrcVT.getVectorNumElements();
9822
9823 // If we don't need the sign bits at all just return zero.
9824 if (OriginalDemandedBits.countr_zero() >= NumElts)
9825 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9826
9827 // Only demand the vector elements of the sign bits we need.
9828 APInt KnownUndef, KnownZero;
9829 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9830 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9831 TLO, Depth + 1))
9832 return true;
9833
9834 Known.Zero = KnownZero.zext(BitWidth);
9835 Known.Zero.setHighBits(BitWidth - NumElts);
9836
9837 // [X]VMSKLTZ only uses the MSB from each vector element.
9838 KnownBits KnownSrc;
9839 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9840 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9841 Depth + 1))
9842 return true;
9843
9844 if (KnownSrc.One[SrcBits - 1])
9845 Known.One.setLowBits(NumElts);
9846 else if (KnownSrc.Zero[SrcBits - 1])
9847 Known.Zero.setLowBits(NumElts);
9848
9849 // Attempt to avoid multi-use ops if we don't need anything from it.
9851 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9852 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9853 return false;
9854 }
9855 }
9856
9858 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9859}
9860
9862 unsigned Opc = VecOp.getOpcode();
9863
9864 // Assume target opcodes can't be scalarized.
9865 // TODO - do we have any exceptions?
9866 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9867 return false;
9868
9869 // If the vector op is not supported, try to convert to scalar.
9870 EVT VecVT = VecOp.getValueType();
9872 return true;
9873
9874 // If the vector op is supported, but the scalar op is not, the transform may
9875 // not be worthwhile.
9876 EVT ScalarVT = VecVT.getScalarType();
9877 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9878}
9879
9881 unsigned Index) const {
9883 return false;
9884
9885 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9886 return Index == 0;
9887}
9888
9890 unsigned Index) const {
9891 EVT EltVT = VT.getScalarType();
9892
9893 // Extract a scalar FP value from index 0 of a vector is free.
9894 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9895}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1495
bool isZero() const
Definition APFloat.h:1508
APInt bitcastToAPInt() const
Definition APFloat.h:1416
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1400
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1648
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1397
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:231
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:490
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:123
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2762
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:818
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:778
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:852
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:879
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:746
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:992
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:795
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:703
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:764
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:810
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:887
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:726
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:977
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:804
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:150
@ BF16_TO_FP
BF16_TO_FP, FP_TO_BF16 - These operators are used to perform promotions and truncation for bfloat16.
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:925
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:738
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:832
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:721
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned getKillRegState(bool B)
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ Other
Any other memory.
Definition ModRef.h:68
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...