LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
141
145 Custom);
147 }
148
149 // Set operations for LA32 only.
150
151 if (!Subtarget.is64Bit()) {
157 }
158
160
161 static const ISD::CondCode FPCCToExpand[] = {
164
165 // Set operations for 'F' feature.
166
167 if (Subtarget.hasBasicF()) {
168 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
169 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
170 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
171
187
188 if (Subtarget.is64Bit())
190
191 if (!Subtarget.hasBasicD()) {
193 if (Subtarget.is64Bit()) {
196 }
197 }
198 }
199
200 // Set operations for 'D' feature.
201
202 if (Subtarget.hasBasicD()) {
203 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
204 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
205 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
206 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
207 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
208
224
225 if (Subtarget.is64Bit())
227 }
228
229 // Set operations for 'LSX' feature.
230
231 if (Subtarget.hasExtLSX()) {
233 // Expand all truncating stores and extending loads.
234 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
235 setTruncStoreAction(VT, InnerVT, Expand);
238 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
239 }
240 // By default everything must be expanded. Then we will selectively turn
241 // on ones that can be effectively codegen'd.
242 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
244 }
245
246 for (MVT VT : LSXVTs) {
250
254
258 }
259 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
262 Legal);
264 VT, Legal);
271 Expand);
272 }
273 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
275 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
277 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
280 }
281 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
289 VT, Expand);
290 }
292 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
293 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
294 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
295 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
296 }
297
298 // Set operations for 'LASX' feature.
299
300 if (Subtarget.hasExtLASX()) {
301 for (MVT VT : LASXVTs) {
305
310
314 }
315 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
318 Legal);
320 VT, Legal);
327 Expand);
328 }
329 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
331 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
333 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
336 }
337 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
345 VT, Expand);
346 }
347 }
348
349 // Set DAG combine for LA32 and LA64.
350
355
356 // Set DAG combine for 'LSX' feature.
357
358 if (Subtarget.hasExtLSX())
360
361 // Compute derived properties from the register classes.
363
365
368
370
372
373 // Function alignments.
375 // Set preferred alignments.
379
380 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
381 if (Subtarget.hasLAMCAS())
383}
384
386 const GlobalAddressSDNode *GA) const {
387 // In order to maximise the opportunity for common subexpression elimination,
388 // keep a separate ADD node for the global address offset instead of folding
389 // it in the global address node. Later peephole optimisations may choose to
390 // fold it back in when profitable.
391 return false;
392}
393
395 SelectionDAG &DAG) const {
396 switch (Op.getOpcode()) {
398 return lowerATOMIC_FENCE(Op, DAG);
400 return lowerEH_DWARF_CFA(Op, DAG);
402 return lowerGlobalAddress(Op, DAG);
404 return lowerGlobalTLSAddress(Op, DAG);
406 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
408 return lowerINTRINSIC_W_CHAIN(Op, DAG);
410 return lowerINTRINSIC_VOID(Op, DAG);
412 return lowerBlockAddress(Op, DAG);
413 case ISD::JumpTable:
414 return lowerJumpTable(Op, DAG);
415 case ISD::SHL_PARTS:
416 return lowerShiftLeftParts(Op, DAG);
417 case ISD::SRA_PARTS:
418 return lowerShiftRightParts(Op, DAG, true);
419 case ISD::SRL_PARTS:
420 return lowerShiftRightParts(Op, DAG, false);
422 return lowerConstantPool(Op, DAG);
423 case ISD::FP_TO_SINT:
424 return lowerFP_TO_SINT(Op, DAG);
425 case ISD::BITCAST:
426 return lowerBITCAST(Op, DAG);
427 case ISD::UINT_TO_FP:
428 return lowerUINT_TO_FP(Op, DAG);
429 case ISD::SINT_TO_FP:
430 return lowerSINT_TO_FP(Op, DAG);
431 case ISD::VASTART:
432 return lowerVASTART(Op, DAG);
433 case ISD::FRAMEADDR:
434 return lowerFRAMEADDR(Op, DAG);
435 case ISD::RETURNADDR:
436 return lowerRETURNADDR(Op, DAG);
438 return lowerWRITE_REGISTER(Op, DAG);
440 return lowerINSERT_VECTOR_ELT(Op, DAG);
442 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
444 return lowerBUILD_VECTOR(Op, DAG);
446 return lowerVECTOR_SHUFFLE(Op, DAG);
447 case ISD::BITREVERSE:
448 return lowerBITREVERSE(Op, DAG);
449 }
450 return SDValue();
451}
452
453SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
454 SelectionDAG &DAG) const {
455 EVT ResTy = Op->getValueType(0);
456 SDValue Src = Op->getOperand(0);
457 SDLoc DL(Op);
458
459 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
460 unsigned int OrigEltNum = ResTy.getVectorNumElements();
461 unsigned int NewEltNum = NewVT.getVectorNumElements();
462
463 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
464
466 for (unsigned int i = 0; i < NewEltNum; i++) {
467 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
468 DAG.getConstant(i, DL, MVT::i64));
469 SDValue RevOp = DAG.getNode((ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
472 DL, MVT::i64, Op);
473 Ops.push_back(RevOp);
474 }
475 SDValue Res =
476 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
477
478 switch (ResTy.getSimpleVT().SimpleTy) {
479 default:
480 return SDValue();
481 case MVT::v16i8:
482 case MVT::v32i8:
483 return Res;
484 case MVT::v8i16:
485 case MVT::v16i16:
486 case MVT::v4i32:
487 case MVT::v8i32: {
489 for (unsigned int i = 0; i < NewEltNum; i++)
490 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
491 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
492 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
493 }
494 }
495}
496
497/// Determine whether a range fits a regular pattern of values.
498/// This function accounts for the possibility of jumping over the End iterator.
499template <typename ValType>
500static bool
502 unsigned CheckStride,
504 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
505 auto &I = Begin;
506
507 while (I != End) {
508 if (*I != -1 && *I != ExpectedIndex)
509 return false;
510 ExpectedIndex += ExpectedIndexStride;
511
512 // Incrementing past End is undefined behaviour so we must increment one
513 // step at a time and check for End at each step.
514 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
515 ; // Empty loop body.
516 }
517 return true;
518}
519
520/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
521///
522/// VREPLVEI performs vector broadcast based on an element specified by an
523/// integer immediate, with its mask being similar to:
524/// <x, x, x, ...>
525/// where x is any valid index.
526///
527/// When undef's appear in the mask they are treated as if they were whatever
528/// value is necessary in order to fit the above form.
530 MVT VT, SDValue V1, SDValue V2,
531 SelectionDAG &DAG) {
532 int SplatIndex = -1;
533 for (const auto &M : Mask) {
534 if (M != -1) {
535 SplatIndex = M;
536 break;
537 }
538 }
539
540 if (SplatIndex == -1)
541 return DAG.getUNDEF(VT);
542
543 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
544 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
545 APInt Imm(64, SplatIndex);
546 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
547 DAG.getConstant(Imm, DL, MVT::i64));
548 }
549
550 return SDValue();
551}
552
553/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
554///
555/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
556/// elements according to a <4 x i2> constant (encoded as an integer immediate).
557///
558/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
559/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
560/// When undef's appear they are treated as if they were whatever value is
561/// necessary in order to fit the above forms.
562///
563/// For example:
564/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
565/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
566/// i32 7, i32 6, i32 5, i32 4>
567/// is lowered to:
568/// (VSHUF4I_H $v0, $v1, 27)
569/// where the 27 comes from:
570/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
572 MVT VT, SDValue V1, SDValue V2,
573 SelectionDAG &DAG) {
574
575 // When the size is less than 4, lower cost instructions may be used.
576 if (Mask.size() < 4)
577 return SDValue();
578
579 int SubMask[4] = {-1, -1, -1, -1};
580 for (unsigned i = 0; i < 4; ++i) {
581 for (unsigned j = i; j < Mask.size(); j += 4) {
582 int Idx = Mask[j];
583
584 // Convert from vector index to 4-element subvector index
585 // If an index refers to an element outside of the subvector then give up
586 if (Idx != -1) {
587 Idx -= 4 * (j / 4);
588 if (Idx < 0 || Idx >= 4)
589 return SDValue();
590 }
591
592 // If the mask has an undef, replace it with the current index.
593 // Note that it might still be undef if the current index is also undef
594 if (SubMask[i] == -1)
595 SubMask[i] = Idx;
596 // Check that non-undef values are the same as in the mask. If they
597 // aren't then give up
598 else if (Idx != -1 && Idx != SubMask[i])
599 return SDValue();
600 }
601 }
602
603 // Calculate the immediate. Replace any remaining undefs with zero
604 APInt Imm(64, 0);
605 for (int i = 3; i >= 0; --i) {
606 int Idx = SubMask[i];
607
608 if (Idx == -1)
609 Idx = 0;
610
611 Imm <<= 2;
612 Imm |= Idx & 0x3;
613 }
614
615 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
616 DAG.getConstant(Imm, DL, MVT::i64));
617}
618
619/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
620///
621/// VPACKEV interleaves the even elements from each vector.
622///
623/// It is possible to lower into VPACKEV when the mask consists of two of the
624/// following forms interleaved:
625/// <0, 2, 4, ...>
626/// <n, n+2, n+4, ...>
627/// where n is the number of elements in the vector.
628/// For example:
629/// <0, 0, 2, 2, 4, 4, ...>
630/// <0, n, 2, n+2, 4, n+4, ...>
631///
632/// When undef's appear in the mask they are treated as if they were whatever
633/// value is necessary in order to fit the above forms.
635 MVT VT, SDValue V1, SDValue V2,
636 SelectionDAG &DAG) {
637
638 const auto &Begin = Mask.begin();
639 const auto &End = Mask.end();
640 SDValue OriV1 = V1, OriV2 = V2;
641
642 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
643 V1 = OriV1;
644 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
645 V1 = OriV2;
646 else
647 return SDValue();
648
649 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
650 V2 = OriV1;
651 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
652 V2 = OriV2;
653 else
654 return SDValue();
655
656 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
657}
658
659/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
660///
661/// VPACKOD interleaves the odd elements from each vector.
662///
663/// It is possible to lower into VPACKOD when the mask consists of two of the
664/// following forms interleaved:
665/// <1, 3, 5, ...>
666/// <n+1, n+3, n+5, ...>
667/// where n is the number of elements in the vector.
668/// For example:
669/// <1, 1, 3, 3, 5, 5, ...>
670/// <1, n+1, 3, n+3, 5, n+5, ...>
671///
672/// When undef's appear in the mask they are treated as if they were whatever
673/// value is necessary in order to fit the above forms.
675 MVT VT, SDValue V1, SDValue V2,
676 SelectionDAG &DAG) {
677
678 const auto &Begin = Mask.begin();
679 const auto &End = Mask.end();
680 SDValue OriV1 = V1, OriV2 = V2;
681
682 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
683 V1 = OriV1;
684 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
685 V1 = OriV2;
686 else
687 return SDValue();
688
689 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
690 V2 = OriV1;
691 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
692 V2 = OriV2;
693 else
694 return SDValue();
695
696 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
697}
698
699/// Lower VECTOR_SHUFFLE into VILVH (if possible).
700///
701/// VILVH interleaves consecutive elements from the left (highest-indexed) half
702/// of each vector.
703///
704/// It is possible to lower into VILVH when the mask consists of two of the
705/// following forms interleaved:
706/// <x, x+1, x+2, ...>
707/// <n+x, n+x+1, n+x+2, ...>
708/// where n is the number of elements in the vector and x is half n.
709/// For example:
710/// <x, x, x+1, x+1, x+2, x+2, ...>
711/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
712///
713/// When undef's appear in the mask they are treated as if they were whatever
714/// value is necessary in order to fit the above forms.
716 MVT VT, SDValue V1, SDValue V2,
717 SelectionDAG &DAG) {
718
719 const auto &Begin = Mask.begin();
720 const auto &End = Mask.end();
721 unsigned HalfSize = Mask.size() / 2;
722 SDValue OriV1 = V1, OriV2 = V2;
723
724 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
725 V1 = OriV1;
726 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
727 V1 = OriV2;
728 else
729 return SDValue();
730
731 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
732 V2 = OriV1;
733 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
734 1))
735 V2 = OriV2;
736 else
737 return SDValue();
738
739 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
740}
741
742/// Lower VECTOR_SHUFFLE into VILVL (if possible).
743///
744/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
745/// of each vector.
746///
747/// It is possible to lower into VILVL when the mask consists of two of the
748/// following forms interleaved:
749/// <0, 1, 2, ...>
750/// <n, n+1, n+2, ...>
751/// where n is the number of elements in the vector.
752/// For example:
753/// <0, 0, 1, 1, 2, 2, ...>
754/// <0, n, 1, n+1, 2, n+2, ...>
755///
756/// When undef's appear in the mask they are treated as if they were whatever
757/// value is necessary in order to fit the above forms.
759 MVT VT, SDValue V1, SDValue V2,
760 SelectionDAG &DAG) {
761
762 const auto &Begin = Mask.begin();
763 const auto &End = Mask.end();
764 SDValue OriV1 = V1, OriV2 = V2;
765
766 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
767 V1 = OriV1;
768 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
769 V1 = OriV2;
770 else
771 return SDValue();
772
773 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
774 V2 = OriV1;
775 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
776 V2 = OriV2;
777 else
778 return SDValue();
779
780 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
781}
782
783/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
784///
785/// VPICKEV copies the even elements of each vector into the result vector.
786///
787/// It is possible to lower into VPICKEV when the mask consists of two of the
788/// following forms concatenated:
789/// <0, 2, 4, ...>
790/// <n, n+2, n+4, ...>
791/// where n is the number of elements in the vector.
792/// For example:
793/// <0, 2, 4, ..., 0, 2, 4, ...>
794/// <0, 2, 4, ..., n, n+2, n+4, ...>
795///
796/// When undef's appear in the mask they are treated as if they were whatever
797/// value is necessary in order to fit the above forms.
799 MVT VT, SDValue V1, SDValue V2,
800 SelectionDAG &DAG) {
801
802 const auto &Begin = Mask.begin();
803 const auto &Mid = Mask.begin() + Mask.size() / 2;
804 const auto &End = Mask.end();
805 SDValue OriV1 = V1, OriV2 = V2;
806
807 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
808 V1 = OriV1;
809 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
810 V1 = OriV2;
811 else
812 return SDValue();
813
814 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
815 V2 = OriV1;
816 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
817 V2 = OriV2;
818
819 else
820 return SDValue();
821
822 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
823}
824
825/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
826///
827/// VPICKOD copies the odd elements of each vector into the result vector.
828///
829/// It is possible to lower into VPICKOD when the mask consists of two of the
830/// following forms concatenated:
831/// <1, 3, 5, ...>
832/// <n+1, n+3, n+5, ...>
833/// where n is the number of elements in the vector.
834/// For example:
835/// <1, 3, 5, ..., 1, 3, 5, ...>
836/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
837///
838/// When undef's appear in the mask they are treated as if they were whatever
839/// value is necessary in order to fit the above forms.
841 MVT VT, SDValue V1, SDValue V2,
842 SelectionDAG &DAG) {
843
844 const auto &Begin = Mask.begin();
845 const auto &Mid = Mask.begin() + Mask.size() / 2;
846 const auto &End = Mask.end();
847 SDValue OriV1 = V1, OriV2 = V2;
848
849 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
850 V1 = OriV1;
851 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
852 V1 = OriV2;
853 else
854 return SDValue();
855
856 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
857 V2 = OriV1;
858 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
859 V2 = OriV2;
860 else
861 return SDValue();
862
863 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
864}
865
866/// Lower VECTOR_SHUFFLE into VSHUF.
867///
868/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
869/// adding it as an operand to the resulting VSHUF.
871 MVT VT, SDValue V1, SDValue V2,
872 SelectionDAG &DAG) {
873
875 for (auto M : Mask)
876 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
877
878 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
879 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
880
881 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
882 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
883 // VSHF concatenates the vectors in a bitwise fashion:
884 // <0b00, 0b01> + <0b10, 0b11> ->
885 // 0b0100 + 0b1110 -> 0b01001110
886 // <0b10, 0b11, 0b00, 0b01>
887 // We must therefore swap the operands to get the correct result.
888 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
889}
890
891/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
892///
893/// This routine breaks down the specific type of 128-bit shuffle and
894/// dispatches to the lowering routines accordingly.
896 SDValue V1, SDValue V2, SelectionDAG &DAG) {
897 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
898 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
899 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
900 "Vector type is unsupported for lsx!");
901 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
902 "Two operands have different types!");
903 assert(VT.getVectorNumElements() == Mask.size() &&
904 "Unexpected mask size for shuffle!");
905 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
906
907 SDValue Result;
908 // TODO: Add more comparison patterns.
909 if (V2.isUndef()) {
910 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
911 return Result;
912 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
913 return Result;
914
915 // TODO: This comment may be enabled in the future to better match the
916 // pattern for instruction selection.
917 /* V2 = V1; */
918 }
919
920 // It is recommended not to change the pattern comparison order for better
921 // performance.
922 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
923 return Result;
924 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
925 return Result;
926 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
927 return Result;
928 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
929 return Result;
930 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
931 return Result;
932 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
933 return Result;
934 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
935 return Result;
936
937 return SDValue();
938}
939
940/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
941///
942/// It is a XVREPLVEI when the mask is:
943/// <x, x, x, ..., x+n, x+n, x+n, ...>
944/// where the number of x is equal to n and n is half the length of vector.
945///
946/// When undef's appear in the mask they are treated as if they were whatever
947/// value is necessary in order to fit the above form.
949 ArrayRef<int> Mask, MVT VT,
950 SDValue V1, SDValue V2,
951 SelectionDAG &DAG) {
952 int SplatIndex = -1;
953 for (const auto &M : Mask) {
954 if (M != -1) {
955 SplatIndex = M;
956 break;
957 }
958 }
959
960 if (SplatIndex == -1)
961 return DAG.getUNDEF(VT);
962
963 const auto &Begin = Mask.begin();
964 const auto &End = Mask.end();
965 unsigned HalfSize = Mask.size() / 2;
966
967 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
968 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
969 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
970 0)) {
971 APInt Imm(64, SplatIndex);
972 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
973 DAG.getConstant(Imm, DL, MVT::i64));
974 }
975
976 return SDValue();
977}
978
979/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
981 MVT VT, SDValue V1, SDValue V2,
982 SelectionDAG &DAG) {
983 // When the size is less than or equal to 4, lower cost instructions may be
984 // used.
985 if (Mask.size() <= 4)
986 return SDValue();
987 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
988}
989
990/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
992 MVT VT, SDValue V1, SDValue V2,
993 SelectionDAG &DAG) {
994 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
995}
996
997/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
999 MVT VT, SDValue V1, SDValue V2,
1000 SelectionDAG &DAG) {
1001 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1002}
1003
1004/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1006 MVT VT, SDValue V1, SDValue V2,
1007 SelectionDAG &DAG) {
1008
1009 const auto &Begin = Mask.begin();
1010 const auto &End = Mask.end();
1011 unsigned HalfSize = Mask.size() / 2;
1012 unsigned LeftSize = HalfSize / 2;
1013 SDValue OriV1 = V1, OriV2 = V2;
1014
1015 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1016 1) &&
1017 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1018 V1 = OriV1;
1019 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1020 Mask.size() + HalfSize - LeftSize, 1) &&
1021 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1022 Mask.size() + HalfSize + LeftSize, 1))
1023 V1 = OriV2;
1024 else
1025 return SDValue();
1026
1027 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1028 1) &&
1029 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1030 1))
1031 V2 = OriV1;
1032 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1033 Mask.size() + HalfSize - LeftSize, 1) &&
1034 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1035 Mask.size() + HalfSize + LeftSize, 1))
1036 V2 = OriV2;
1037 else
1038 return SDValue();
1039
1040 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1041}
1042
1043/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1045 MVT VT, SDValue V1, SDValue V2,
1046 SelectionDAG &DAG) {
1047
1048 const auto &Begin = Mask.begin();
1049 const auto &End = Mask.end();
1050 unsigned HalfSize = Mask.size() / 2;
1051 SDValue OriV1 = V1, OriV2 = V2;
1052
1053 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1054 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1055 V1 = OriV1;
1056 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1057 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1058 Mask.size() + HalfSize, 1))
1059 V1 = OriV2;
1060 else
1061 return SDValue();
1062
1063 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1064 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1065 V2 = OriV1;
1066 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1067 1) &&
1068 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1069 Mask.size() + HalfSize, 1))
1070 V2 = OriV2;
1071 else
1072 return SDValue();
1073
1074 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1075}
1076
1077/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1079 MVT VT, SDValue V1, SDValue V2,
1080 SelectionDAG &DAG) {
1081
1082 const auto &Begin = Mask.begin();
1083 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1084 const auto &Mid = Mask.begin() + Mask.size() / 2;
1085 const auto &RightMid = Mask.end() - Mask.size() / 4;
1086 const auto &End = Mask.end();
1087 unsigned HalfSize = Mask.size() / 2;
1088 SDValue OriV1 = V1, OriV2 = V2;
1089
1090 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1091 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1092 V1 = OriV1;
1093 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1094 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1095 V1 = OriV2;
1096 else
1097 return SDValue();
1098
1099 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1100 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1101 V2 = OriV1;
1102 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1103 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1104 V2 = OriV2;
1105
1106 else
1107 return SDValue();
1108
1109 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1110}
1111
1112/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1114 MVT VT, SDValue V1, SDValue V2,
1115 SelectionDAG &DAG) {
1116
1117 const auto &Begin = Mask.begin();
1118 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1119 const auto &Mid = Mask.begin() + Mask.size() / 2;
1120 const auto &RightMid = Mask.end() - Mask.size() / 4;
1121 const auto &End = Mask.end();
1122 unsigned HalfSize = Mask.size() / 2;
1123 SDValue OriV1 = V1, OriV2 = V2;
1124
1125 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1126 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1127 V1 = OriV1;
1128 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1129 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1130 2))
1131 V1 = OriV2;
1132 else
1133 return SDValue();
1134
1135 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1136 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1137 V2 = OriV1;
1138 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1139 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1140 2))
1141 V2 = OriV2;
1142 else
1143 return SDValue();
1144
1145 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1146}
1147
1148/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1150 MVT VT, SDValue V1, SDValue V2,
1151 SelectionDAG &DAG) {
1152
1153 int MaskSize = Mask.size();
1154 int HalfSize = Mask.size() / 2;
1155 const auto &Begin = Mask.begin();
1156 const auto &Mid = Mask.begin() + HalfSize;
1157 const auto &End = Mask.end();
1158
1159 // VECTOR_SHUFFLE concatenates the vectors:
1160 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1161 // shuffling ->
1162 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1163 //
1164 // XVSHUF concatenates the vectors:
1165 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1166 // shuffling ->
1167 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1168 SmallVector<SDValue, 8> MaskAlloc;
1169 for (auto it = Begin; it < Mid; it++) {
1170 if (*it < 0) // UNDEF
1171 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1172 else if ((*it >= 0 && *it < HalfSize) ||
1173 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1174 int M = *it < HalfSize ? *it : *it - HalfSize;
1175 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1176 } else
1177 return SDValue();
1178 }
1179 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1180
1181 for (auto it = Mid; it < End; it++) {
1182 if (*it < 0) // UNDEF
1183 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1184 else if ((*it >= HalfSize && *it < MaskSize) ||
1185 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1186 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1187 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1188 } else
1189 return SDValue();
1190 }
1191 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1192
1193 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1194 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1195 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1196}
1197
1198/// Shuffle vectors by lane to generate more optimized instructions.
1199/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1200///
1201/// Therefore, except for the following four cases, other cases are regarded
1202/// as cross-lane shuffles, where optimization is relatively limited.
1203///
1204/// - Shuffle high, low lanes of two inputs vector
1205/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1206/// - Shuffle low, high lanes of two inputs vector
1207/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1208/// - Shuffle low, low lanes of two inputs vector
1209/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1210/// - Shuffle high, high lanes of two inputs vector
1211/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1212///
1213/// The first case is the closest to LoongArch instructions and the other
1214/// cases need to be converted to it for processing.
1215///
1216/// This function may modify V1, V2 and Mask
1218 MutableArrayRef<int> Mask, MVT VT,
1219 SDValue &V1, SDValue &V2,
1220 SelectionDAG &DAG) {
1221
1222 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1223
1224 int MaskSize = Mask.size();
1225 int HalfSize = Mask.size() / 2;
1226
1227 HalfMaskType preMask = None, postMask = None;
1228
1229 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1230 return M < 0 || (M >= 0 && M < HalfSize) ||
1231 (M >= MaskSize && M < MaskSize + HalfSize);
1232 }))
1233 preMask = HighLaneTy;
1234 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1235 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1236 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1237 }))
1238 preMask = LowLaneTy;
1239
1240 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1241 return M < 0 || (M >= 0 && M < HalfSize) ||
1242 (M >= MaskSize && M < MaskSize + HalfSize);
1243 }))
1244 postMask = HighLaneTy;
1245 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1246 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1247 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1248 }))
1249 postMask = LowLaneTy;
1250
1251 // The pre-half of mask is high lane type, and the post-half of mask
1252 // is low lane type, which is closest to the LoongArch instructions.
1253 //
1254 // Note: In the LoongArch architecture, the high lane of mask corresponds
1255 // to the lower 128-bit of vector register, and the low lane of mask
1256 // corresponds the higher 128-bit of vector register.
1257 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1258 return;
1259 }
1260 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1261 V1 = DAG.getBitcast(MVT::v4i64, V1);
1262 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1263 DAG.getConstant(0b01001110, DL, MVT::i64));
1264 V1 = DAG.getBitcast(VT, V1);
1265
1266 if (!V2.isUndef()) {
1267 V2 = DAG.getBitcast(MVT::v4i64, V2);
1268 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1269 DAG.getConstant(0b01001110, DL, MVT::i64));
1270 V2 = DAG.getBitcast(VT, V2);
1271 }
1272
1273 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1274 *it = *it < 0 ? *it : *it - HalfSize;
1275 }
1276 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1277 *it = *it < 0 ? *it : *it + HalfSize;
1278 }
1279 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1280 V1 = DAG.getBitcast(MVT::v4i64, V1);
1281 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1282 DAG.getConstant(0b11101110, DL, MVT::i64));
1283 V1 = DAG.getBitcast(VT, V1);
1284
1285 if (!V2.isUndef()) {
1286 V2 = DAG.getBitcast(MVT::v4i64, V2);
1287 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1288 DAG.getConstant(0b11101110, DL, MVT::i64));
1289 V2 = DAG.getBitcast(VT, V2);
1290 }
1291
1292 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1293 *it = *it < 0 ? *it : *it - HalfSize;
1294 }
1295 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1296 V1 = DAG.getBitcast(MVT::v4i64, V1);
1297 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1298 DAG.getConstant(0b01000100, DL, MVT::i64));
1299 V1 = DAG.getBitcast(VT, V1);
1300
1301 if (!V2.isUndef()) {
1302 V2 = DAG.getBitcast(MVT::v4i64, V2);
1303 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1304 DAG.getConstant(0b01000100, DL, MVT::i64));
1305 V2 = DAG.getBitcast(VT, V2);
1306 }
1307
1308 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1309 *it = *it < 0 ? *it : *it + HalfSize;
1310 }
1311 } else { // cross-lane
1312 return;
1313 }
1314}
1315
1316/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1317///
1318/// This routine breaks down the specific type of 256-bit shuffle and
1319/// dispatches to the lowering routines accordingly.
1321 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1322 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1323 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1324 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1325 "Vector type is unsupported for lasx!");
1326 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1327 "Two operands have different types!");
1328 assert(VT.getVectorNumElements() == Mask.size() &&
1329 "Unexpected mask size for shuffle!");
1330 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1331 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1332
1333 // canonicalize non cross-lane shuffle vector
1334 SmallVector<int> NewMask(Mask);
1335 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1336
1337 SDValue Result;
1338 // TODO: Add more comparison patterns.
1339 if (V2.isUndef()) {
1340 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1341 return Result;
1342 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1343 return Result;
1344
1345 // TODO: This comment may be enabled in the future to better match the
1346 // pattern for instruction selection.
1347 /* V2 = V1; */
1348 }
1349
1350 // It is recommended not to change the pattern comparison order for better
1351 // performance.
1352 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1353 return Result;
1354 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1355 return Result;
1356 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1357 return Result;
1358 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1359 return Result;
1360 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1361 return Result;
1362 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1363 return Result;
1364 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1365 return Result;
1366
1367 return SDValue();
1368}
1369
1370SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1371 SelectionDAG &DAG) const {
1372 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1373 ArrayRef<int> OrigMask = SVOp->getMask();
1374 SDValue V1 = Op.getOperand(0);
1375 SDValue V2 = Op.getOperand(1);
1376 MVT VT = Op.getSimpleValueType();
1377 int NumElements = VT.getVectorNumElements();
1378 SDLoc DL(Op);
1379
1380 bool V1IsUndef = V1.isUndef();
1381 bool V2IsUndef = V2.isUndef();
1382 if (V1IsUndef && V2IsUndef)
1383 return DAG.getUNDEF(VT);
1384
1385 // When we create a shuffle node we put the UNDEF node to second operand,
1386 // but in some cases the first operand may be transformed to UNDEF.
1387 // In this case we should just commute the node.
1388 if (V1IsUndef)
1389 return DAG.getCommutedVectorShuffle(*SVOp);
1390
1391 // Check for non-undef masks pointing at an undef vector and make the masks
1392 // undef as well. This makes it easier to match the shuffle based solely on
1393 // the mask.
1394 if (V2IsUndef &&
1395 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1396 SmallVector<int, 8> NewMask(OrigMask);
1397 for (int &M : NewMask)
1398 if (M >= NumElements)
1399 M = -1;
1400 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1401 }
1402
1403 // Check for illegal shuffle mask element index values.
1404 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1405 (void)MaskUpperLimit;
1406 assert(llvm::all_of(OrigMask,
1407 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1408 "Out of bounds shuffle index");
1409
1410 // For each vector width, delegate to a specialized lowering routine.
1411 if (VT.is128BitVector())
1412 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1413
1414 if (VT.is256BitVector())
1415 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1416
1417 return SDValue();
1418}
1419
1420static bool isConstantOrUndef(const SDValue Op) {
1421 if (Op->isUndef())
1422 return true;
1423 if (isa<ConstantSDNode>(Op))
1424 return true;
1425 if (isa<ConstantFPSDNode>(Op))
1426 return true;
1427 return false;
1428}
1429
1431 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1432 if (isConstantOrUndef(Op->getOperand(i)))
1433 return true;
1434 return false;
1435}
1436
1437SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1438 SelectionDAG &DAG) const {
1439 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1440 EVT ResTy = Op->getValueType(0);
1441 SDLoc DL(Op);
1442 APInt SplatValue, SplatUndef;
1443 unsigned SplatBitSize;
1444 bool HasAnyUndefs;
1445 bool Is128Vec = ResTy.is128BitVector();
1446 bool Is256Vec = ResTy.is256BitVector();
1447
1448 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1449 (!Subtarget.hasExtLASX() || !Is256Vec))
1450 return SDValue();
1451
1452 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1453 /*MinSplatBits=*/8) &&
1454 SplatBitSize <= 64) {
1455 // We can only cope with 8, 16, 32, or 64-bit elements.
1456 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1457 SplatBitSize != 64)
1458 return SDValue();
1459
1460 EVT ViaVecTy;
1461
1462 switch (SplatBitSize) {
1463 default:
1464 return SDValue();
1465 case 8:
1466 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1467 break;
1468 case 16:
1469 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1470 break;
1471 case 32:
1472 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1473 break;
1474 case 64:
1475 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1476 break;
1477 }
1478
1479 // SelectionDAG::getConstant will promote SplatValue appropriately.
1480 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1481
1482 // Bitcast to the type we originally wanted.
1483 if (ViaVecTy != ResTy)
1484 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1485
1486 return Result;
1487 }
1488
1489 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1490 return Op;
1491
1493 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1494 // The resulting code is the same length as the expansion, but it doesn't
1495 // use memory operations.
1496 EVT ResTy = Node->getValueType(0);
1497
1498 assert(ResTy.isVector());
1499
1500 unsigned NumElts = ResTy.getVectorNumElements();
1501 SDValue Vector = DAG.getUNDEF(ResTy);
1502 for (unsigned i = 0; i < NumElts; ++i) {
1504 Node->getOperand(i),
1505 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1506 }
1507 return Vector;
1508 }
1509
1510 return SDValue();
1511}
1512
1513SDValue
1514LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1515 SelectionDAG &DAG) const {
1516 EVT VecTy = Op->getOperand(0)->getValueType(0);
1517 SDValue Idx = Op->getOperand(1);
1518 EVT EltTy = VecTy.getVectorElementType();
1519 unsigned NumElts = VecTy.getVectorNumElements();
1520
1521 if (isa<ConstantSDNode>(Idx) &&
1522 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1523 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1524 return Op;
1525
1526 return SDValue();
1527}
1528
1529SDValue
1530LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1531 SelectionDAG &DAG) const {
1532 if (isa<ConstantSDNode>(Op->getOperand(2)))
1533 return Op;
1534 return SDValue();
1535}
1536
1537SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1538 SelectionDAG &DAG) const {
1539 SDLoc DL(Op);
1540 SyncScope::ID FenceSSID =
1541 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1542
1543 // singlethread fences only synchronize with signal handlers on the same
1544 // thread and thus only need to preserve instruction order, not actually
1545 // enforce memory ordering.
1546 if (FenceSSID == SyncScope::SingleThread)
1547 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1548 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1549
1550 return Op;
1551}
1552
1553SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1554 SelectionDAG &DAG) const {
1555
1556 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1557 DAG.getContext()->emitError(
1558 "On LA64, only 64-bit registers can be written.");
1559 return Op.getOperand(0);
1560 }
1561
1562 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1563 DAG.getContext()->emitError(
1564 "On LA32, only 32-bit registers can be written.");
1565 return Op.getOperand(0);
1566 }
1567
1568 return Op;
1569}
1570
1571SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1572 SelectionDAG &DAG) const {
1573 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1574 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1575 "be a constant integer");
1576 return SDValue();
1577 }
1578
1581 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1582 EVT VT = Op.getValueType();
1583 SDLoc DL(Op);
1584 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1585 unsigned Depth = Op.getConstantOperandVal(0);
1586 int GRLenInBytes = Subtarget.getGRLen() / 8;
1587
1588 while (Depth--) {
1589 int Offset = -(GRLenInBytes * 2);
1590 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1591 DAG.getSignedConstant(Offset, DL, VT));
1592 FrameAddr =
1593 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1594 }
1595 return FrameAddr;
1596}
1597
1598SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1599 SelectionDAG &DAG) const {
1601 return SDValue();
1602
1603 // Currently only support lowering return address for current frame.
1604 if (Op.getConstantOperandVal(0) != 0) {
1605 DAG.getContext()->emitError(
1606 "return address can only be determined for the current frame");
1607 return SDValue();
1608 }
1609
1612 MVT GRLenVT = Subtarget.getGRLenVT();
1613
1614 // Return the value of the return address register, marking it an implicit
1615 // live-in.
1616 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1617 getRegClassFor(GRLenVT));
1618 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1619}
1620
1621SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1622 SelectionDAG &DAG) const {
1624 auto Size = Subtarget.getGRLen() / 8;
1625 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1626 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1627}
1628
1629SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1630 SelectionDAG &DAG) const {
1632 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1633
1634 SDLoc DL(Op);
1635 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1637
1638 // vastart just stores the address of the VarArgsFrameIndex slot into the
1639 // memory location argument.
1640 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1641 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1642 MachinePointerInfo(SV));
1643}
1644
1645SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1646 SelectionDAG &DAG) const {
1647 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1648 !Subtarget.hasBasicD() && "unexpected target features");
1649
1650 SDLoc DL(Op);
1651 SDValue Op0 = Op.getOperand(0);
1652 if (Op0->getOpcode() == ISD::AND) {
1653 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1654 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1655 return Op;
1656 }
1657
1658 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1659 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1660 Op0.getConstantOperandVal(2) == UINT64_C(0))
1661 return Op;
1662
1663 if (Op0.getOpcode() == ISD::AssertZext &&
1664 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1665 return Op;
1666
1667 EVT OpVT = Op0.getValueType();
1668 EVT RetVT = Op.getValueType();
1669 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1670 MakeLibCallOptions CallOptions;
1671 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1672 SDValue Chain = SDValue();
1674 std::tie(Result, Chain) =
1675 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1676 return Result;
1677}
1678
1679SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1680 SelectionDAG &DAG) const {
1681 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1682 !Subtarget.hasBasicD() && "unexpected target features");
1683
1684 SDLoc DL(Op);
1685 SDValue Op0 = Op.getOperand(0);
1686
1687 if ((Op0.getOpcode() == ISD::AssertSext ||
1689 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1690 return Op;
1691
1692 EVT OpVT = Op0.getValueType();
1693 EVT RetVT = Op.getValueType();
1694 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1695 MakeLibCallOptions CallOptions;
1696 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1697 SDValue Chain = SDValue();
1699 std::tie(Result, Chain) =
1700 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1701 return Result;
1702}
1703
1704SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1705 SelectionDAG &DAG) const {
1706
1707 SDLoc DL(Op);
1708 SDValue Op0 = Op.getOperand(0);
1709
1710 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1711 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1712 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1713 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1714 }
1715 return Op;
1716}
1717
1718SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1719 SelectionDAG &DAG) const {
1720
1721 SDLoc DL(Op);
1722 SDValue Op0 = Op.getOperand(0);
1723
1724 if (Op0.getValueType() == MVT::f16)
1725 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1726
1727 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1728 !Subtarget.hasBasicD()) {
1729 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1730 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1731 }
1732
1733 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1734 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1735 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1736}
1737
1739 SelectionDAG &DAG, unsigned Flags) {
1740 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1741}
1742
1744 SelectionDAG &DAG, unsigned Flags) {
1745 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1746 Flags);
1747}
1748
1750 SelectionDAG &DAG, unsigned Flags) {
1751 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1752 N->getOffset(), Flags);
1753}
1754
1756 SelectionDAG &DAG, unsigned Flags) {
1757 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1758}
1759
1760template <class NodeTy>
1761SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1763 bool IsLocal) const {
1764 SDLoc DL(N);
1765 EVT Ty = getPointerTy(DAG.getDataLayout());
1766 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1767 SDValue Load;
1768
1769 switch (M) {
1770 default:
1771 report_fatal_error("Unsupported code model");
1772
1773 case CodeModel::Large: {
1774 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1775
1776 // This is not actually used, but is necessary for successfully matching
1777 // the PseudoLA_*_LARGE nodes.
1778 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1779 if (IsLocal) {
1780 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1781 // eventually becomes the desired 5-insn code sequence.
1782 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1783 Tmp, Addr),
1784 0);
1785 } else {
1786 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1787 // eventually becomes the desired 5-insn code sequence.
1788 Load = SDValue(
1789 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1790 0);
1791 }
1792 break;
1793 }
1794
1795 case CodeModel::Small:
1796 case CodeModel::Medium:
1797 if (IsLocal) {
1798 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1799 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1800 Load = SDValue(
1801 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1802 } else {
1803 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1804 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1805 Load =
1806 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1807 }
1808 }
1809
1810 if (!IsLocal) {
1811 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1817 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1818 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1819 }
1820
1821 return Load;
1822}
1823
1824SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1825 SelectionDAG &DAG) const {
1826 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1827 DAG.getTarget().getCodeModel());
1828}
1829
1830SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1831 SelectionDAG &DAG) const {
1832 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1833 DAG.getTarget().getCodeModel());
1834}
1835
1836SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1837 SelectionDAG &DAG) const {
1838 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1839 DAG.getTarget().getCodeModel());
1840}
1841
1842SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1843 SelectionDAG &DAG) const {
1844 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1845 assert(N->getOffset() == 0 && "unexpected offset in global node");
1846 auto CM = DAG.getTarget().getCodeModel();
1847 const GlobalValue *GV = N->getGlobal();
1848
1849 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1850 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1851 CM = *GCM;
1852 }
1853
1854 return getAddr(N, DAG, CM, GV->isDSOLocal());
1855}
1856
1857SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1858 SelectionDAG &DAG,
1859 unsigned Opc, bool UseGOT,
1860 bool Large) const {
1861 SDLoc DL(N);
1862 EVT Ty = getPointerTy(DAG.getDataLayout());
1863 MVT GRLenVT = Subtarget.getGRLenVT();
1864
1865 // This is not actually used, but is necessary for successfully matching the
1866 // PseudoLA_*_LARGE nodes.
1867 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1868 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1870 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1871 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1872 if (UseGOT) {
1873 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1879 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1880 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1881 }
1882
1883 // Add the thread pointer.
1884 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1885 DAG.getRegister(LoongArch::R2, GRLenVT));
1886}
1887
1888SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1889 SelectionDAG &DAG,
1890 unsigned Opc,
1891 bool Large) const {
1892 SDLoc DL(N);
1893 EVT Ty = getPointerTy(DAG.getDataLayout());
1894 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1895
1896 // This is not actually used, but is necessary for successfully matching the
1897 // PseudoLA_*_LARGE nodes.
1898 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1899
1900 // Use a PC-relative addressing mode to access the dynamic GOT address.
1901 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1902 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1903 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1904
1905 // Prepare argument list to generate call.
1907 ArgListEntry Entry;
1908 Entry.Node = Load;
1909 Entry.Ty = CallTy;
1910 Args.push_back(Entry);
1911
1912 // Setup call to __tls_get_addr.
1914 CLI.setDebugLoc(DL)
1915 .setChain(DAG.getEntryNode())
1916 .setLibCallee(CallingConv::C, CallTy,
1917 DAG.getExternalSymbol("__tls_get_addr", Ty),
1918 std::move(Args));
1919
1920 return LowerCallTo(CLI).first;
1921}
1922
1923SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1924 SelectionDAG &DAG, unsigned Opc,
1925 bool Large) const {
1926 SDLoc DL(N);
1927 EVT Ty = getPointerTy(DAG.getDataLayout());
1928 const GlobalValue *GV = N->getGlobal();
1929
1930 // This is not actually used, but is necessary for successfully matching the
1931 // PseudoLA_*_LARGE nodes.
1932 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1933
1934 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1935 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1936 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1937 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1938 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1939}
1940
1941SDValue
1942LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1943 SelectionDAG &DAG) const {
1946 report_fatal_error("In GHC calling convention TLS is not supported");
1947
1948 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1949 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1950
1951 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1952 assert(N->getOffset() == 0 && "unexpected offset in global node");
1953
1954 if (DAG.getTarget().useEmulatedTLS())
1955 report_fatal_error("the emulated TLS is prohibited",
1956 /*GenCrashDiag=*/false);
1957
1958 bool IsDesc = DAG.getTarget().useTLSDESC();
1959
1960 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1962 // In this model, application code calls the dynamic linker function
1963 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1964 // runtime.
1965 if (!IsDesc)
1966 return getDynamicTLSAddr(N, DAG,
1967 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1968 : LoongArch::PseudoLA_TLS_GD,
1969 Large);
1970 break;
1972 // Same as GeneralDynamic, except for assembly modifiers and relocation
1973 // records.
1974 if (!IsDesc)
1975 return getDynamicTLSAddr(N, DAG,
1976 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1977 : LoongArch::PseudoLA_TLS_LD,
1978 Large);
1979 break;
1981 // This model uses the GOT to resolve TLS offsets.
1982 return getStaticTLSAddr(N, DAG,
1983 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1984 : LoongArch::PseudoLA_TLS_IE,
1985 /*UseGOT=*/true, Large);
1987 // This model is used when static linking as the TLS offsets are resolved
1988 // during program linking.
1989 //
1990 // This node doesn't need an extra argument for the large code model.
1991 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1992 /*UseGOT=*/false);
1993 }
1994
1995 return getTLSDescAddr(N, DAG,
1996 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
1997 : LoongArch::PseudoLA_TLS_DESC,
1998 Large);
1999}
2000
2001template <unsigned N>
2003 SelectionDAG &DAG, bool IsSigned = false) {
2004 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2005 // Check the ImmArg.
2006 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2007 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2008 DAG.getContext()->emitError(Op->getOperationName(0) +
2009 ": argument out of range.");
2010 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2011 }
2012 return SDValue();
2013}
2014
2015SDValue
2016LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2017 SelectionDAG &DAG) const {
2018 SDLoc DL(Op);
2019 switch (Op.getConstantOperandVal(0)) {
2020 default:
2021 return SDValue(); // Don't custom lower most intrinsics.
2022 case Intrinsic::thread_pointer: {
2023 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2024 return DAG.getRegister(LoongArch::R2, PtrVT);
2025 }
2026 case Intrinsic::loongarch_lsx_vpickve2gr_d:
2027 case Intrinsic::loongarch_lsx_vpickve2gr_du:
2028 case Intrinsic::loongarch_lsx_vreplvei_d:
2029 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2030 return checkIntrinsicImmArg<1>(Op, 2, DAG);
2031 case Intrinsic::loongarch_lsx_vreplvei_w:
2032 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2033 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2034 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2035 case Intrinsic::loongarch_lasx_xvpickve_d:
2036 case Intrinsic::loongarch_lasx_xvpickve_d_f:
2037 return checkIntrinsicImmArg<2>(Op, 2, DAG);
2038 case Intrinsic::loongarch_lasx_xvinsve0_d:
2039 return checkIntrinsicImmArg<2>(Op, 3, DAG);
2040 case Intrinsic::loongarch_lsx_vsat_b:
2041 case Intrinsic::loongarch_lsx_vsat_bu:
2042 case Intrinsic::loongarch_lsx_vrotri_b:
2043 case Intrinsic::loongarch_lsx_vsllwil_h_b:
2044 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2045 case Intrinsic::loongarch_lsx_vsrlri_b:
2046 case Intrinsic::loongarch_lsx_vsrari_b:
2047 case Intrinsic::loongarch_lsx_vreplvei_h:
2048 case Intrinsic::loongarch_lasx_xvsat_b:
2049 case Intrinsic::loongarch_lasx_xvsat_bu:
2050 case Intrinsic::loongarch_lasx_xvrotri_b:
2051 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2052 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2053 case Intrinsic::loongarch_lasx_xvsrlri_b:
2054 case Intrinsic::loongarch_lasx_xvsrari_b:
2055 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2056 case Intrinsic::loongarch_lasx_xvpickve_w:
2057 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2058 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2059 case Intrinsic::loongarch_lasx_xvinsve0_w:
2060 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2061 case Intrinsic::loongarch_lsx_vsat_h:
2062 case Intrinsic::loongarch_lsx_vsat_hu:
2063 case Intrinsic::loongarch_lsx_vrotri_h:
2064 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2065 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2066 case Intrinsic::loongarch_lsx_vsrlri_h:
2067 case Intrinsic::loongarch_lsx_vsrari_h:
2068 case Intrinsic::loongarch_lsx_vreplvei_b:
2069 case Intrinsic::loongarch_lasx_xvsat_h:
2070 case Intrinsic::loongarch_lasx_xvsat_hu:
2071 case Intrinsic::loongarch_lasx_xvrotri_h:
2072 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2073 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2074 case Intrinsic::loongarch_lasx_xvsrlri_h:
2075 case Intrinsic::loongarch_lasx_xvsrari_h:
2076 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2077 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2078 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2079 case Intrinsic::loongarch_lsx_vsrani_b_h:
2080 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2081 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2082 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2083 case Intrinsic::loongarch_lsx_vssrani_b_h:
2084 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2085 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2086 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2087 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2088 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2089 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2090 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2091 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2092 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2093 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2094 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2095 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2096 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2097 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2098 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2099 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2100 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2101 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2102 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2103 case Intrinsic::loongarch_lsx_vsat_w:
2104 case Intrinsic::loongarch_lsx_vsat_wu:
2105 case Intrinsic::loongarch_lsx_vrotri_w:
2106 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2107 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2108 case Intrinsic::loongarch_lsx_vsrlri_w:
2109 case Intrinsic::loongarch_lsx_vsrari_w:
2110 case Intrinsic::loongarch_lsx_vslei_bu:
2111 case Intrinsic::loongarch_lsx_vslei_hu:
2112 case Intrinsic::loongarch_lsx_vslei_wu:
2113 case Intrinsic::loongarch_lsx_vslei_du:
2114 case Intrinsic::loongarch_lsx_vslti_bu:
2115 case Intrinsic::loongarch_lsx_vslti_hu:
2116 case Intrinsic::loongarch_lsx_vslti_wu:
2117 case Intrinsic::loongarch_lsx_vslti_du:
2118 case Intrinsic::loongarch_lsx_vbsll_v:
2119 case Intrinsic::loongarch_lsx_vbsrl_v:
2120 case Intrinsic::loongarch_lasx_xvsat_w:
2121 case Intrinsic::loongarch_lasx_xvsat_wu:
2122 case Intrinsic::loongarch_lasx_xvrotri_w:
2123 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2124 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2125 case Intrinsic::loongarch_lasx_xvsrlri_w:
2126 case Intrinsic::loongarch_lasx_xvsrari_w:
2127 case Intrinsic::loongarch_lasx_xvslei_bu:
2128 case Intrinsic::loongarch_lasx_xvslei_hu:
2129 case Intrinsic::loongarch_lasx_xvslei_wu:
2130 case Intrinsic::loongarch_lasx_xvslei_du:
2131 case Intrinsic::loongarch_lasx_xvslti_bu:
2132 case Intrinsic::loongarch_lasx_xvslti_hu:
2133 case Intrinsic::loongarch_lasx_xvslti_wu:
2134 case Intrinsic::loongarch_lasx_xvslti_du:
2135 case Intrinsic::loongarch_lasx_xvbsll_v:
2136 case Intrinsic::loongarch_lasx_xvbsrl_v:
2137 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2138 case Intrinsic::loongarch_lsx_vseqi_b:
2139 case Intrinsic::loongarch_lsx_vseqi_h:
2140 case Intrinsic::loongarch_lsx_vseqi_w:
2141 case Intrinsic::loongarch_lsx_vseqi_d:
2142 case Intrinsic::loongarch_lsx_vslei_b:
2143 case Intrinsic::loongarch_lsx_vslei_h:
2144 case Intrinsic::loongarch_lsx_vslei_w:
2145 case Intrinsic::loongarch_lsx_vslei_d:
2146 case Intrinsic::loongarch_lsx_vslti_b:
2147 case Intrinsic::loongarch_lsx_vslti_h:
2148 case Intrinsic::loongarch_lsx_vslti_w:
2149 case Intrinsic::loongarch_lsx_vslti_d:
2150 case Intrinsic::loongarch_lasx_xvseqi_b:
2151 case Intrinsic::loongarch_lasx_xvseqi_h:
2152 case Intrinsic::loongarch_lasx_xvseqi_w:
2153 case Intrinsic::loongarch_lasx_xvseqi_d:
2154 case Intrinsic::loongarch_lasx_xvslei_b:
2155 case Intrinsic::loongarch_lasx_xvslei_h:
2156 case Intrinsic::loongarch_lasx_xvslei_w:
2157 case Intrinsic::loongarch_lasx_xvslei_d:
2158 case Intrinsic::loongarch_lasx_xvslti_b:
2159 case Intrinsic::loongarch_lasx_xvslti_h:
2160 case Intrinsic::loongarch_lasx_xvslti_w:
2161 case Intrinsic::loongarch_lasx_xvslti_d:
2162 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2163 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2164 case Intrinsic::loongarch_lsx_vsrani_h_w:
2165 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2166 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2167 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2168 case Intrinsic::loongarch_lsx_vssrani_h_w:
2169 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2170 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2171 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2172 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2173 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2174 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2175 case Intrinsic::loongarch_lsx_vfrstpi_b:
2176 case Intrinsic::loongarch_lsx_vfrstpi_h:
2177 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2178 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2179 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2180 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2181 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2182 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2183 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2184 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2185 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2186 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2187 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2188 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2189 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2190 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2191 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2192 case Intrinsic::loongarch_lsx_vsat_d:
2193 case Intrinsic::loongarch_lsx_vsat_du:
2194 case Intrinsic::loongarch_lsx_vrotri_d:
2195 case Intrinsic::loongarch_lsx_vsrlri_d:
2196 case Intrinsic::loongarch_lsx_vsrari_d:
2197 case Intrinsic::loongarch_lasx_xvsat_d:
2198 case Intrinsic::loongarch_lasx_xvsat_du:
2199 case Intrinsic::loongarch_lasx_xvrotri_d:
2200 case Intrinsic::loongarch_lasx_xvsrlri_d:
2201 case Intrinsic::loongarch_lasx_xvsrari_d:
2202 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2203 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2204 case Intrinsic::loongarch_lsx_vsrani_w_d:
2205 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2206 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2207 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2208 case Intrinsic::loongarch_lsx_vssrani_w_d:
2209 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2210 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2211 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2212 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2213 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2214 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2215 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2216 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2217 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2218 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2219 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2220 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2221 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2222 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2223 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2224 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2225 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2226 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2227 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2228 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2229 case Intrinsic::loongarch_lsx_vsrani_d_q:
2230 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2231 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2232 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2233 case Intrinsic::loongarch_lsx_vssrani_d_q:
2234 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2235 case Intrinsic::loongarch_lsx_vssrani_du_q:
2236 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2237 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2238 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2239 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2240 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2241 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2242 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2243 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2244 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2245 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2246 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2247 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2248 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2249 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2250 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2251 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2252 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2253 case Intrinsic::loongarch_lsx_vnori_b:
2254 case Intrinsic::loongarch_lsx_vshuf4i_b:
2255 case Intrinsic::loongarch_lsx_vshuf4i_h:
2256 case Intrinsic::loongarch_lsx_vshuf4i_w:
2257 case Intrinsic::loongarch_lasx_xvnori_b:
2258 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2259 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2260 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2261 case Intrinsic::loongarch_lasx_xvpermi_d:
2262 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2263 case Intrinsic::loongarch_lsx_vshuf4i_d:
2264 case Intrinsic::loongarch_lsx_vpermi_w:
2265 case Intrinsic::loongarch_lsx_vbitseli_b:
2266 case Intrinsic::loongarch_lsx_vextrins_b:
2267 case Intrinsic::loongarch_lsx_vextrins_h:
2268 case Intrinsic::loongarch_lsx_vextrins_w:
2269 case Intrinsic::loongarch_lsx_vextrins_d:
2270 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2271 case Intrinsic::loongarch_lasx_xvpermi_w:
2272 case Intrinsic::loongarch_lasx_xvpermi_q:
2273 case Intrinsic::loongarch_lasx_xvbitseli_b:
2274 case Intrinsic::loongarch_lasx_xvextrins_b:
2275 case Intrinsic::loongarch_lasx_xvextrins_h:
2276 case Intrinsic::loongarch_lasx_xvextrins_w:
2277 case Intrinsic::loongarch_lasx_xvextrins_d:
2278 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2279 case Intrinsic::loongarch_lsx_vrepli_b:
2280 case Intrinsic::loongarch_lsx_vrepli_h:
2281 case Intrinsic::loongarch_lsx_vrepli_w:
2282 case Intrinsic::loongarch_lsx_vrepli_d:
2283 case Intrinsic::loongarch_lasx_xvrepli_b:
2284 case Intrinsic::loongarch_lasx_xvrepli_h:
2285 case Intrinsic::loongarch_lasx_xvrepli_w:
2286 case Intrinsic::loongarch_lasx_xvrepli_d:
2287 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2288 case Intrinsic::loongarch_lsx_vldi:
2289 case Intrinsic::loongarch_lasx_xvldi:
2290 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2291 }
2292}
2293
2294// Helper function that emits error message for intrinsics with chain and return
2295// merge values of a UNDEF and the chain.
2297 StringRef ErrorMsg,
2298 SelectionDAG &DAG) {
2299 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2300 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2301 SDLoc(Op));
2302}
2303
2304SDValue
2305LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2306 SelectionDAG &DAG) const {
2307 SDLoc DL(Op);
2308 MVT GRLenVT = Subtarget.getGRLenVT();
2309 EVT VT = Op.getValueType();
2310 SDValue Chain = Op.getOperand(0);
2311 const StringRef ErrorMsgOOR = "argument out of range";
2312 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2313 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2314
2315 switch (Op.getConstantOperandVal(1)) {
2316 default:
2317 return Op;
2318 case Intrinsic::loongarch_crc_w_b_w:
2319 case Intrinsic::loongarch_crc_w_h_w:
2320 case Intrinsic::loongarch_crc_w_w_w:
2321 case Intrinsic::loongarch_crc_w_d_w:
2322 case Intrinsic::loongarch_crcc_w_b_w:
2323 case Intrinsic::loongarch_crcc_w_h_w:
2324 case Intrinsic::loongarch_crcc_w_w_w:
2325 case Intrinsic::loongarch_crcc_w_d_w:
2326 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2327 case Intrinsic::loongarch_csrrd_w:
2328 case Intrinsic::loongarch_csrrd_d: {
2329 unsigned Imm = Op.getConstantOperandVal(2);
2330 return !isUInt<14>(Imm)
2331 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2332 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2333 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2334 }
2335 case Intrinsic::loongarch_csrwr_w:
2336 case Intrinsic::loongarch_csrwr_d: {
2337 unsigned Imm = Op.getConstantOperandVal(3);
2338 return !isUInt<14>(Imm)
2339 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2340 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2341 {Chain, Op.getOperand(2),
2342 DAG.getConstant(Imm, DL, GRLenVT)});
2343 }
2344 case Intrinsic::loongarch_csrxchg_w:
2345 case Intrinsic::loongarch_csrxchg_d: {
2346 unsigned Imm = Op.getConstantOperandVal(4);
2347 return !isUInt<14>(Imm)
2348 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2349 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2350 {Chain, Op.getOperand(2), Op.getOperand(3),
2351 DAG.getConstant(Imm, DL, GRLenVT)});
2352 }
2353 case Intrinsic::loongarch_iocsrrd_d: {
2354 return DAG.getNode(
2355 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2356 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2357 }
2358#define IOCSRRD_CASE(NAME, NODE) \
2359 case Intrinsic::loongarch_##NAME: { \
2360 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2361 {Chain, Op.getOperand(2)}); \
2362 }
2363 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2364 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2365 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2366#undef IOCSRRD_CASE
2367 case Intrinsic::loongarch_cpucfg: {
2368 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2369 {Chain, Op.getOperand(2)});
2370 }
2371 case Intrinsic::loongarch_lddir_d: {
2372 unsigned Imm = Op.getConstantOperandVal(3);
2373 return !isUInt<8>(Imm)
2374 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2375 : Op;
2376 }
2377 case Intrinsic::loongarch_movfcsr2gr: {
2378 if (!Subtarget.hasBasicF())
2379 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2380 unsigned Imm = Op.getConstantOperandVal(2);
2381 return !isUInt<2>(Imm)
2382 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2383 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2384 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2385 }
2386 case Intrinsic::loongarch_lsx_vld:
2387 case Intrinsic::loongarch_lsx_vldrepl_b:
2388 case Intrinsic::loongarch_lasx_xvld:
2389 case Intrinsic::loongarch_lasx_xvldrepl_b:
2390 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2391 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2392 : SDValue();
2393 case Intrinsic::loongarch_lsx_vldrepl_h:
2394 case Intrinsic::loongarch_lasx_xvldrepl_h:
2395 return !isShiftedInt<11, 1>(
2396 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2398 Op, "argument out of range or not a multiple of 2", DAG)
2399 : SDValue();
2400 case Intrinsic::loongarch_lsx_vldrepl_w:
2401 case Intrinsic::loongarch_lasx_xvldrepl_w:
2402 return !isShiftedInt<10, 2>(
2403 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2405 Op, "argument out of range or not a multiple of 4", DAG)
2406 : SDValue();
2407 case Intrinsic::loongarch_lsx_vldrepl_d:
2408 case Intrinsic::loongarch_lasx_xvldrepl_d:
2409 return !isShiftedInt<9, 3>(
2410 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2412 Op, "argument out of range or not a multiple of 8", DAG)
2413 : SDValue();
2414 }
2415}
2416
2417// Helper function that emits error message for intrinsics with void return
2418// value and return the chain.
2420 SelectionDAG &DAG) {
2421
2422 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2423 return Op.getOperand(0);
2424}
2425
2426SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2427 SelectionDAG &DAG) const {
2428 SDLoc DL(Op);
2429 MVT GRLenVT = Subtarget.getGRLenVT();
2430 SDValue Chain = Op.getOperand(0);
2431 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2432 SDValue Op2 = Op.getOperand(2);
2433 const StringRef ErrorMsgOOR = "argument out of range";
2434 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2435 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2436 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2437
2438 switch (IntrinsicEnum) {
2439 default:
2440 // TODO: Add more Intrinsics.
2441 return SDValue();
2442 case Intrinsic::loongarch_cacop_d:
2443 case Intrinsic::loongarch_cacop_w: {
2444 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2445 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2446 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2447 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2448 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2449 unsigned Imm1 = Op2->getAsZExtVal();
2450 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2451 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2452 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2453 return Op;
2454 }
2455 case Intrinsic::loongarch_dbar: {
2456 unsigned Imm = Op2->getAsZExtVal();
2457 return !isUInt<15>(Imm)
2458 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2459 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2460 DAG.getConstant(Imm, DL, GRLenVT));
2461 }
2462 case Intrinsic::loongarch_ibar: {
2463 unsigned Imm = Op2->getAsZExtVal();
2464 return !isUInt<15>(Imm)
2465 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2466 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2467 DAG.getConstant(Imm, DL, GRLenVT));
2468 }
2469 case Intrinsic::loongarch_break: {
2470 unsigned Imm = Op2->getAsZExtVal();
2471 return !isUInt<15>(Imm)
2472 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2473 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2474 DAG.getConstant(Imm, DL, GRLenVT));
2475 }
2476 case Intrinsic::loongarch_movgr2fcsr: {
2477 if (!Subtarget.hasBasicF())
2478 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2479 unsigned Imm = Op2->getAsZExtVal();
2480 return !isUInt<2>(Imm)
2481 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2482 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2483 DAG.getConstant(Imm, DL, GRLenVT),
2484 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2485 Op.getOperand(3)));
2486 }
2487 case Intrinsic::loongarch_syscall: {
2488 unsigned Imm = Op2->getAsZExtVal();
2489 return !isUInt<15>(Imm)
2490 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2491 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2492 DAG.getConstant(Imm, DL, GRLenVT));
2493 }
2494#define IOCSRWR_CASE(NAME, NODE) \
2495 case Intrinsic::loongarch_##NAME: { \
2496 SDValue Op3 = Op.getOperand(3); \
2497 return Subtarget.is64Bit() \
2498 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2499 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2500 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2501 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2502 Op3); \
2503 }
2504 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2505 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2506 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2507#undef IOCSRWR_CASE
2508 case Intrinsic::loongarch_iocsrwr_d: {
2509 return !Subtarget.is64Bit()
2510 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2511 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2512 Op2,
2513 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2514 Op.getOperand(3)));
2515 }
2516#define ASRT_LE_GT_CASE(NAME) \
2517 case Intrinsic::loongarch_##NAME: { \
2518 return !Subtarget.is64Bit() \
2519 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2520 : Op; \
2521 }
2522 ASRT_LE_GT_CASE(asrtle_d)
2523 ASRT_LE_GT_CASE(asrtgt_d)
2524#undef ASRT_LE_GT_CASE
2525 case Intrinsic::loongarch_ldpte_d: {
2526 unsigned Imm = Op.getConstantOperandVal(3);
2527 return !Subtarget.is64Bit()
2528 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2529 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2530 : Op;
2531 }
2532 case Intrinsic::loongarch_lsx_vst:
2533 case Intrinsic::loongarch_lasx_xvst:
2534 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2535 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2536 : SDValue();
2537 case Intrinsic::loongarch_lasx_xvstelm_b:
2538 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2539 !isUInt<5>(Op.getConstantOperandVal(5)))
2540 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2541 : SDValue();
2542 case Intrinsic::loongarch_lsx_vstelm_b:
2543 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2544 !isUInt<4>(Op.getConstantOperandVal(5)))
2545 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2546 : SDValue();
2547 case Intrinsic::loongarch_lasx_xvstelm_h:
2548 return (!isShiftedInt<8, 1>(
2549 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2550 !isUInt<4>(Op.getConstantOperandVal(5)))
2552 Op, "argument out of range or not a multiple of 2", DAG)
2553 : SDValue();
2554 case Intrinsic::loongarch_lsx_vstelm_h:
2555 return (!isShiftedInt<8, 1>(
2556 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2557 !isUInt<3>(Op.getConstantOperandVal(5)))
2559 Op, "argument out of range or not a multiple of 2", DAG)
2560 : SDValue();
2561 case Intrinsic::loongarch_lasx_xvstelm_w:
2562 return (!isShiftedInt<8, 2>(
2563 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2564 !isUInt<3>(Op.getConstantOperandVal(5)))
2566 Op, "argument out of range or not a multiple of 4", DAG)
2567 : SDValue();
2568 case Intrinsic::loongarch_lsx_vstelm_w:
2569 return (!isShiftedInt<8, 2>(
2570 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2571 !isUInt<2>(Op.getConstantOperandVal(5)))
2573 Op, "argument out of range or not a multiple of 4", DAG)
2574 : SDValue();
2575 case Intrinsic::loongarch_lasx_xvstelm_d:
2576 return (!isShiftedInt<8, 3>(
2577 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2578 !isUInt<2>(Op.getConstantOperandVal(5)))
2580 Op, "argument out of range or not a multiple of 8", DAG)
2581 : SDValue();
2582 case Intrinsic::loongarch_lsx_vstelm_d:
2583 return (!isShiftedInt<8, 3>(
2584 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2585 !isUInt<1>(Op.getConstantOperandVal(5)))
2587 Op, "argument out of range or not a multiple of 8", DAG)
2588 : SDValue();
2589 }
2590}
2591
2592SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2593 SelectionDAG &DAG) const {
2594 SDLoc DL(Op);
2595 SDValue Lo = Op.getOperand(0);
2596 SDValue Hi = Op.getOperand(1);
2597 SDValue Shamt = Op.getOperand(2);
2598 EVT VT = Lo.getValueType();
2599
2600 // if Shamt-GRLen < 0: // Shamt < GRLen
2601 // Lo = Lo << Shamt
2602 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2603 // else:
2604 // Lo = 0
2605 // Hi = Lo << (Shamt-GRLen)
2606
2607 SDValue Zero = DAG.getConstant(0, DL, VT);
2608 SDValue One = DAG.getConstant(1, DL, VT);
2609 SDValue MinusGRLen =
2610 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2611 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2612 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2613 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2614
2615 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2616 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2617 SDValue ShiftRightLo =
2618 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2619 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2620 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2621 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2622
2623 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2624
2625 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2626 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2627
2628 SDValue Parts[2] = {Lo, Hi};
2629 return DAG.getMergeValues(Parts, DL);
2630}
2631
2632SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2633 SelectionDAG &DAG,
2634 bool IsSRA) const {
2635 SDLoc DL(Op);
2636 SDValue Lo = Op.getOperand(0);
2637 SDValue Hi = Op.getOperand(1);
2638 SDValue Shamt = Op.getOperand(2);
2639 EVT VT = Lo.getValueType();
2640
2641 // SRA expansion:
2642 // if Shamt-GRLen < 0: // Shamt < GRLen
2643 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2644 // Hi = Hi >>s Shamt
2645 // else:
2646 // Lo = Hi >>s (Shamt-GRLen);
2647 // Hi = Hi >>s (GRLen-1)
2648 //
2649 // SRL expansion:
2650 // if Shamt-GRLen < 0: // Shamt < GRLen
2651 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2652 // Hi = Hi >>u Shamt
2653 // else:
2654 // Lo = Hi >>u (Shamt-GRLen);
2655 // Hi = 0;
2656
2657 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2658
2659 SDValue Zero = DAG.getConstant(0, DL, VT);
2660 SDValue One = DAG.getConstant(1, DL, VT);
2661 SDValue MinusGRLen =
2662 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2663 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2664 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2665 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2666
2667 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2668 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2669 SDValue ShiftLeftHi =
2670 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2671 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2672 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2673 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2674 SDValue HiFalse =
2675 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2676
2677 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2678
2679 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2680 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2681
2682 SDValue Parts[2] = {Lo, Hi};
2683 return DAG.getMergeValues(Parts, DL);
2684}
2685
2686// Returns the opcode of the target-specific SDNode that implements the 32-bit
2687// form of the given Opcode.
2689 switch (Opcode) {
2690 default:
2691 llvm_unreachable("Unexpected opcode");
2692 case ISD::SDIV:
2693 return LoongArchISD::DIV_W;
2694 case ISD::UDIV:
2695 return LoongArchISD::DIV_WU;
2696 case ISD::SREM:
2697 return LoongArchISD::MOD_W;
2698 case ISD::UREM:
2699 return LoongArchISD::MOD_WU;
2700 case ISD::SHL:
2701 return LoongArchISD::SLL_W;
2702 case ISD::SRA:
2703 return LoongArchISD::SRA_W;
2704 case ISD::SRL:
2705 return LoongArchISD::SRL_W;
2706 case ISD::ROTL:
2707 case ISD::ROTR:
2708 return LoongArchISD::ROTR_W;
2709 case ISD::CTTZ:
2710 return LoongArchISD::CTZ_W;
2711 case ISD::CTLZ:
2712 return LoongArchISD::CLZ_W;
2713 }
2714}
2715
2716// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2717// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2718// otherwise be promoted to i64, making it difficult to select the
2719// SLL_W/.../*W later one because the fact the operation was originally of
2720// type i8/i16/i32 is lost.
2722 unsigned ExtOpc = ISD::ANY_EXTEND) {
2723 SDLoc DL(N);
2724 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2725 SDValue NewOp0, NewRes;
2726
2727 switch (NumOp) {
2728 default:
2729 llvm_unreachable("Unexpected NumOp");
2730 case 1: {
2731 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2732 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2733 break;
2734 }
2735 case 2: {
2736 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2737 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2738 if (N->getOpcode() == ISD::ROTL) {
2739 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2740 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2741 }
2742 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2743 break;
2744 }
2745 // TODO:Handle more NumOp.
2746 }
2747
2748 // ReplaceNodeResults requires we maintain the same type for the return
2749 // value.
2750 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2751}
2752
2753// Converts the given 32-bit operation to a i64 operation with signed extension
2754// semantic to reduce the signed extension instructions.
2756 SDLoc DL(N);
2757 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2758 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2759 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2760 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2761 DAG.getValueType(MVT::i32));
2762 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2763}
2764
2765// Helper function that emits error message for intrinsics with/without chain
2766// and return a UNDEF or and the chain as the results.
2769 StringRef ErrorMsg, bool WithChain = true) {
2770 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2771 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2772 if (!WithChain)
2773 return;
2774 Results.push_back(N->getOperand(0));
2775}
2776
2777template <unsigned N>
2778static void
2780 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2781 unsigned ResOp) {
2782 const StringRef ErrorMsgOOR = "argument out of range";
2783 unsigned Imm = Node->getConstantOperandVal(2);
2784 if (!isUInt<N>(Imm)) {
2786 /*WithChain=*/false);
2787 return;
2788 }
2789 SDLoc DL(Node);
2790 SDValue Vec = Node->getOperand(1);
2791
2792 SDValue PickElt =
2793 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2794 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2796 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2797 PickElt.getValue(0)));
2798}
2799
2802 SelectionDAG &DAG,
2803 const LoongArchSubtarget &Subtarget,
2804 unsigned ResOp) {
2805 SDLoc DL(N);
2806 SDValue Vec = N->getOperand(1);
2807
2808 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2809 Results.push_back(
2810 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2811}
2812
2813static void
2815 SelectionDAG &DAG,
2816 const LoongArchSubtarget &Subtarget) {
2817 switch (N->getConstantOperandVal(0)) {
2818 default:
2819 llvm_unreachable("Unexpected Intrinsic.");
2820 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2821 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2823 break;
2824 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2825 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2826 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2828 break;
2829 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2830 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2832 break;
2833 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2834 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2836 break;
2837 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2838 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2839 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2841 break;
2842 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2843 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2845 break;
2846 case Intrinsic::loongarch_lsx_bz_b:
2847 case Intrinsic::loongarch_lsx_bz_h:
2848 case Intrinsic::loongarch_lsx_bz_w:
2849 case Intrinsic::loongarch_lsx_bz_d:
2850 case Intrinsic::loongarch_lasx_xbz_b:
2851 case Intrinsic::loongarch_lasx_xbz_h:
2852 case Intrinsic::loongarch_lasx_xbz_w:
2853 case Intrinsic::loongarch_lasx_xbz_d:
2854 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2856 break;
2857 case Intrinsic::loongarch_lsx_bz_v:
2858 case Intrinsic::loongarch_lasx_xbz_v:
2859 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2861 break;
2862 case Intrinsic::loongarch_lsx_bnz_b:
2863 case Intrinsic::loongarch_lsx_bnz_h:
2864 case Intrinsic::loongarch_lsx_bnz_w:
2865 case Intrinsic::loongarch_lsx_bnz_d:
2866 case Intrinsic::loongarch_lasx_xbnz_b:
2867 case Intrinsic::loongarch_lasx_xbnz_h:
2868 case Intrinsic::loongarch_lasx_xbnz_w:
2869 case Intrinsic::loongarch_lasx_xbnz_d:
2870 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2872 break;
2873 case Intrinsic::loongarch_lsx_bnz_v:
2874 case Intrinsic::loongarch_lasx_xbnz_v:
2875 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2877 break;
2878 }
2879}
2880
2883 SDLoc DL(N);
2884 EVT VT = N->getValueType(0);
2885 switch (N->getOpcode()) {
2886 default:
2887 llvm_unreachable("Don't know how to legalize this operation");
2888 case ISD::ADD:
2889 case ISD::SUB:
2890 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2891 "Unexpected custom legalisation");
2892 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2893 break;
2894 case ISD::SDIV:
2895 case ISD::UDIV:
2896 case ISD::SREM:
2897 case ISD::UREM:
2898 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2899 "Unexpected custom legalisation");
2900 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2901 Subtarget.hasDiv32() && VT == MVT::i32
2903 : ISD::SIGN_EXTEND));
2904 break;
2905 case ISD::SHL:
2906 case ISD::SRA:
2907 case ISD::SRL:
2908 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2909 "Unexpected custom legalisation");
2910 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2911 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2912 break;
2913 }
2914 break;
2915 case ISD::ROTL:
2916 case ISD::ROTR:
2917 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2918 "Unexpected custom legalisation");
2919 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2920 break;
2921 case ISD::FP_TO_SINT: {
2922 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2923 "Unexpected custom legalisation");
2924 SDValue Src = N->getOperand(0);
2925 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2926 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2928 if (Src.getValueType() == MVT::f16)
2929 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2930 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2931 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2932 return;
2933 }
2934 // If the FP type needs to be softened, emit a library call using the 'si'
2935 // version. If we left it to default legalization we'd end up with 'di'.
2936 RTLIB::Libcall LC;
2937 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2938 MakeLibCallOptions CallOptions;
2939 EVT OpVT = Src.getValueType();
2940 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2941 SDValue Chain = SDValue();
2942 SDValue Result;
2943 std::tie(Result, Chain) =
2944 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2945 Results.push_back(Result);
2946 break;
2947 }
2948 case ISD::BITCAST: {
2949 SDValue Src = N->getOperand(0);
2950 EVT SrcVT = Src.getValueType();
2951 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2952 Subtarget.hasBasicF()) {
2953 SDValue Dst =
2954 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2955 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2956 }
2957 break;
2958 }
2959 case ISD::FP_TO_UINT: {
2960 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2961 "Unexpected custom legalisation");
2962 auto &TLI = DAG.getTargetLoweringInfo();
2963 SDValue Tmp1, Tmp2;
2964 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2965 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2966 break;
2967 }
2968 case ISD::BSWAP: {
2969 SDValue Src = N->getOperand(0);
2970 assert((VT == MVT::i16 || VT == MVT::i32) &&
2971 "Unexpected custom legalization");
2972 MVT GRLenVT = Subtarget.getGRLenVT();
2973 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2974 SDValue Tmp;
2975 switch (VT.getSizeInBits()) {
2976 default:
2977 llvm_unreachable("Unexpected operand width");
2978 case 16:
2979 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2980 break;
2981 case 32:
2982 // Only LA64 will get to here due to the size mismatch between VT and
2983 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2984 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2985 break;
2986 }
2987 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2988 break;
2989 }
2990 case ISD::BITREVERSE: {
2991 SDValue Src = N->getOperand(0);
2992 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2993 "Unexpected custom legalization");
2994 MVT GRLenVT = Subtarget.getGRLenVT();
2995 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2996 SDValue Tmp;
2997 switch (VT.getSizeInBits()) {
2998 default:
2999 llvm_unreachable("Unexpected operand width");
3000 case 8:
3001 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3002 break;
3003 case 32:
3004 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3005 break;
3006 }
3007 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3008 break;
3009 }
3010 case ISD::CTLZ:
3011 case ISD::CTTZ: {
3012 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3013 "Unexpected custom legalisation");
3014 Results.push_back(customLegalizeToWOp(N, DAG, 1));
3015 break;
3016 }
3018 SDValue Chain = N->getOperand(0);
3019 SDValue Op2 = N->getOperand(2);
3020 MVT GRLenVT = Subtarget.getGRLenVT();
3021 const StringRef ErrorMsgOOR = "argument out of range";
3022 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3023 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3024
3025 switch (N->getConstantOperandVal(1)) {
3026 default:
3027 llvm_unreachable("Unexpected Intrinsic.");
3028 case Intrinsic::loongarch_movfcsr2gr: {
3029 if (!Subtarget.hasBasicF()) {
3030 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3031 return;
3032 }
3033 unsigned Imm = Op2->getAsZExtVal();
3034 if (!isUInt<2>(Imm)) {
3035 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3036 return;
3037 }
3038 SDValue MOVFCSR2GRResults = DAG.getNode(
3039 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3040 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3041 Results.push_back(
3042 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3043 Results.push_back(MOVFCSR2GRResults.getValue(1));
3044 break;
3045 }
3046#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
3047 case Intrinsic::loongarch_##NAME: { \
3048 SDValue NODE = DAG.getNode( \
3049 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3050 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3051 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3052 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3053 Results.push_back(NODE.getValue(1)); \
3054 break; \
3055 }
3056 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3057 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3058 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3059 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3060 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3061 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3062#undef CRC_CASE_EXT_BINARYOP
3063
3064#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3065 case Intrinsic::loongarch_##NAME: { \
3066 SDValue NODE = DAG.getNode( \
3067 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3068 {Chain, Op2, \
3069 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3070 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3071 Results.push_back(NODE.getValue(1)); \
3072 break; \
3073 }
3074 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3075 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3076#undef CRC_CASE_EXT_UNARYOP
3077#define CSR_CASE(ID) \
3078 case Intrinsic::loongarch_##ID: { \
3079 if (!Subtarget.is64Bit()) \
3080 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3081 break; \
3082 }
3083 CSR_CASE(csrrd_d);
3084 CSR_CASE(csrwr_d);
3085 CSR_CASE(csrxchg_d);
3086 CSR_CASE(iocsrrd_d);
3087#undef CSR_CASE
3088 case Intrinsic::loongarch_csrrd_w: {
3089 unsigned Imm = Op2->getAsZExtVal();
3090 if (!isUInt<14>(Imm)) {
3091 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3092 return;
3093 }
3094 SDValue CSRRDResults =
3095 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3096 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3097 Results.push_back(
3098 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3099 Results.push_back(CSRRDResults.getValue(1));
3100 break;
3101 }
3102 case Intrinsic::loongarch_csrwr_w: {
3103 unsigned Imm = N->getConstantOperandVal(3);
3104 if (!isUInt<14>(Imm)) {
3105 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3106 return;
3107 }
3108 SDValue CSRWRResults =
3109 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3110 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3111 DAG.getConstant(Imm, DL, GRLenVT)});
3112 Results.push_back(
3113 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3114 Results.push_back(CSRWRResults.getValue(1));
3115 break;
3116 }
3117 case Intrinsic::loongarch_csrxchg_w: {
3118 unsigned Imm = N->getConstantOperandVal(4);
3119 if (!isUInt<14>(Imm)) {
3120 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3121 return;
3122 }
3123 SDValue CSRXCHGResults = DAG.getNode(
3124 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3125 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3126 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3127 DAG.getConstant(Imm, DL, GRLenVT)});
3128 Results.push_back(
3129 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3130 Results.push_back(CSRXCHGResults.getValue(1));
3131 break;
3132 }
3133#define IOCSRRD_CASE(NAME, NODE) \
3134 case Intrinsic::loongarch_##NAME: { \
3135 SDValue IOCSRRDResults = \
3136 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3137 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3138 Results.push_back( \
3139 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3140 Results.push_back(IOCSRRDResults.getValue(1)); \
3141 break; \
3142 }
3143 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3144 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3145 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3146#undef IOCSRRD_CASE
3147 case Intrinsic::loongarch_cpucfg: {
3148 SDValue CPUCFGResults =
3149 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3150 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3151 Results.push_back(
3152 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3153 Results.push_back(CPUCFGResults.getValue(1));
3154 break;
3155 }
3156 case Intrinsic::loongarch_lddir_d: {
3157 if (!Subtarget.is64Bit()) {
3158 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3159 return;
3160 }
3161 break;
3162 }
3163 }
3164 break;
3165 }
3166 case ISD::READ_REGISTER: {
3167 if (Subtarget.is64Bit())
3168 DAG.getContext()->emitError(
3169 "On LA64, only 64-bit registers can be read.");
3170 else
3171 DAG.getContext()->emitError(
3172 "On LA32, only 32-bit registers can be read.");
3173 Results.push_back(DAG.getUNDEF(VT));
3174 Results.push_back(N->getOperand(0));
3175 break;
3176 }
3178 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3179 break;
3180 }
3181 case ISD::LROUND: {
3182 SDValue Op0 = N->getOperand(0);
3183 EVT OpVT = Op0.getValueType();
3184 RTLIB::Libcall LC =
3185 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3186 MakeLibCallOptions CallOptions;
3187 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3188 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3189 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3190 Results.push_back(Result);
3191 break;
3192 }
3193 }
3194}
3195
3198 const LoongArchSubtarget &Subtarget) {
3199 if (DCI.isBeforeLegalizeOps())
3200 return SDValue();
3201
3202 SDValue FirstOperand = N->getOperand(0);
3203 SDValue SecondOperand = N->getOperand(1);
3204 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3205 EVT ValTy = N->getValueType(0);
3206 SDLoc DL(N);
3207 uint64_t lsb, msb;
3208 unsigned SMIdx, SMLen;
3209 ConstantSDNode *CN;
3210 SDValue NewOperand;
3211 MVT GRLenVT = Subtarget.getGRLenVT();
3212
3213 // Op's second operand must be a shifted mask.
3214 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3215 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3216 return SDValue();
3217
3218 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3219 // Pattern match BSTRPICK.
3220 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3221 // => BSTRPICK $dst, $src, msb, lsb
3222 // where msb = lsb + len - 1
3223
3224 // The second operand of the shift must be an immediate.
3225 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3226 return SDValue();
3227
3228 lsb = CN->getZExtValue();
3229
3230 // Return if the shifted mask does not start at bit 0 or the sum of its
3231 // length and lsb exceeds the word's size.
3232 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3233 return SDValue();
3234
3235 NewOperand = FirstOperand.getOperand(0);
3236 } else {
3237 // Pattern match BSTRPICK.
3238 // $dst = and $src, (2**len- 1) , if len > 12
3239 // => BSTRPICK $dst, $src, msb, lsb
3240 // where lsb = 0 and msb = len - 1
3241
3242 // If the mask is <= 0xfff, andi can be used instead.
3243 if (CN->getZExtValue() <= 0xfff)
3244 return SDValue();
3245
3246 // Return if the MSB exceeds.
3247 if (SMIdx + SMLen > ValTy.getSizeInBits())
3248 return SDValue();
3249
3250 if (SMIdx > 0) {
3251 // Omit if the constant has more than 2 uses. This a conservative
3252 // decision. Whether it is a win depends on the HW microarchitecture.
3253 // However it should always be better for 1 and 2 uses.
3254 if (CN->use_size() > 2)
3255 return SDValue();
3256 // Return if the constant can be composed by a single LU12I.W.
3257 if ((CN->getZExtValue() & 0xfff) == 0)
3258 return SDValue();
3259 // Return if the constand can be composed by a single ADDI with
3260 // the zero register.
3261 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3262 return SDValue();
3263 }
3264
3265 lsb = SMIdx;
3266 NewOperand = FirstOperand;
3267 }
3268
3269 msb = lsb + SMLen - 1;
3270 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3271 DAG.getConstant(msb, DL, GRLenVT),
3272 DAG.getConstant(lsb, DL, GRLenVT));
3273 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3274 return NR0;
3275 // Try to optimize to
3276 // bstrpick $Rd, $Rs, msb, lsb
3277 // slli $Rd, $Rd, lsb
3278 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3279 DAG.getConstant(lsb, DL, GRLenVT));
3280}
3281
3284 const LoongArchSubtarget &Subtarget) {
3285 if (DCI.isBeforeLegalizeOps())
3286 return SDValue();
3287
3288 // $dst = srl (and $src, Mask), Shamt
3289 // =>
3290 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3291 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3292 //
3293
3294 SDValue FirstOperand = N->getOperand(0);
3295 ConstantSDNode *CN;
3296 EVT ValTy = N->getValueType(0);
3297 SDLoc DL(N);
3298 MVT GRLenVT = Subtarget.getGRLenVT();
3299 unsigned MaskIdx, MaskLen;
3300 uint64_t Shamt;
3301
3302 // The first operand must be an AND and the second operand of the AND must be
3303 // a shifted mask.
3304 if (FirstOperand.getOpcode() != ISD::AND ||
3305 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3306 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3307 return SDValue();
3308
3309 // The second operand (shift amount) must be an immediate.
3310 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3311 return SDValue();
3312
3313 Shamt = CN->getZExtValue();
3314 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3315 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3316 FirstOperand->getOperand(0),
3317 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3318 DAG.getConstant(Shamt, DL, GRLenVT));
3319
3320 return SDValue();
3321}
3322
3325 const LoongArchSubtarget &Subtarget) {
3326 MVT GRLenVT = Subtarget.getGRLenVT();
3327 EVT ValTy = N->getValueType(0);
3328 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3329 ConstantSDNode *CN0, *CN1;
3330 SDLoc DL(N);
3331 unsigned ValBits = ValTy.getSizeInBits();
3332 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3333 unsigned Shamt;
3334 bool SwapAndRetried = false;
3335
3336 if (DCI.isBeforeLegalizeOps())
3337 return SDValue();
3338
3339 if (ValBits != 32 && ValBits != 64)
3340 return SDValue();
3341
3342Retry:
3343 // 1st pattern to match BSTRINS:
3344 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3345 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3346 // =>
3347 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3348 if (N0.getOpcode() == ISD::AND &&
3349 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3350 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3351 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3352 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3353 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3354 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3355 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3356 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3357 (MaskIdx0 + MaskLen0 <= ValBits)) {
3358 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3359 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3360 N1.getOperand(0).getOperand(0),
3361 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3362 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3363 }
3364
3365 // 2nd pattern to match BSTRINS:
3366 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3367 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3368 // =>
3369 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3370 if (N0.getOpcode() == ISD::AND &&
3371 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3372 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3373 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3374 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3375 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3376 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3377 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3378 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3379 (MaskIdx0 + MaskLen0 <= ValBits)) {
3380 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3381 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3382 N1.getOperand(0).getOperand(0),
3383 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3384 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3385 }
3386
3387 // 3rd pattern to match BSTRINS:
3388 // R = or (and X, mask0), (and Y, mask1)
3389 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3390 // =>
3391 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3392 // where msb = lsb + size - 1
3393 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3394 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3395 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3396 (MaskIdx0 + MaskLen0 <= 64) &&
3397 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3398 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3399 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3400 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3401 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3402 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3403 DAG.getConstant(ValBits == 32
3404 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3405 : (MaskIdx0 + MaskLen0 - 1),
3406 DL, GRLenVT),
3407 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3408 }
3409
3410 // 4th pattern to match BSTRINS:
3411 // R = or (and X, mask), (shl Y, shamt)
3412 // where mask = (2**shamt - 1)
3413 // =>
3414 // R = BSTRINS X, Y, ValBits - 1, shamt
3415 // where ValBits = 32 or 64
3416 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3417 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3418 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3419 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3420 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3421 (MaskIdx0 + MaskLen0 <= ValBits)) {
3422 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3423 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3424 N1.getOperand(0),
3425 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3426 DAG.getConstant(Shamt, DL, GRLenVT));
3427 }
3428
3429 // 5th pattern to match BSTRINS:
3430 // R = or (and X, mask), const
3431 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3432 // =>
3433 // R = BSTRINS X, (const >> lsb), msb, lsb
3434 // where msb = lsb + size - 1
3435 if (N0.getOpcode() == ISD::AND &&
3436 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3437 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3438 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3439 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3440 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3441 return DAG.getNode(
3442 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3443 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3444 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3445 : (MaskIdx0 + MaskLen0 - 1),
3446 DL, GRLenVT),
3447 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3448 }
3449
3450 // 6th pattern.
3451 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3452 // by the incoming bits are known to be zero.
3453 // =>
3454 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3455 //
3456 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3457 // pattern is more common than the 1st. So we put the 1st before the 6th in
3458 // order to match as many nodes as possible.
3459 ConstantSDNode *CNMask, *CNShamt;
3460 unsigned MaskIdx, MaskLen;
3461 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3462 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3463 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3464 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3465 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3466 Shamt = CNShamt->getZExtValue();
3467 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3468 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3469 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3470 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3471 N1.getOperand(0).getOperand(0),
3472 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3473 DAG.getConstant(Shamt, DL, GRLenVT));
3474 }
3475 }
3476
3477 // 7th pattern.
3478 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3479 // overwritten by the incoming bits are known to be zero.
3480 // =>
3481 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3482 //
3483 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3484 // before the 7th in order to match as many nodes as possible.
3485 if (N1.getOpcode() == ISD::AND &&
3486 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3487 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3488 N1.getOperand(0).getOpcode() == ISD::SHL &&
3489 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3490 CNShamt->getZExtValue() == MaskIdx) {
3491 APInt ShMask(ValBits, CNMask->getZExtValue());
3492 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3493 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3494 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3495 N1.getOperand(0).getOperand(0),
3496 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3497 DAG.getConstant(MaskIdx, DL, GRLenVT));
3498 }
3499 }
3500
3501 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3502 if (!SwapAndRetried) {
3503 std::swap(N0, N1);
3504 SwapAndRetried = true;
3505 goto Retry;
3506 }
3507
3508 SwapAndRetried = false;
3509Retry2:
3510 // 8th pattern.
3511 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3512 // the incoming bits are known to be zero.
3513 // =>
3514 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3515 //
3516 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3517 // we put it here in order to match as many nodes as possible or generate less
3518 // instructions.
3519 if (N1.getOpcode() == ISD::AND &&
3520 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3521 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3522 APInt ShMask(ValBits, CNMask->getZExtValue());
3523 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3524 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3525 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3526 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3527 N1->getOperand(0),
3528 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3529 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3530 DAG.getConstant(MaskIdx, DL, GRLenVT));
3531 }
3532 }
3533 // Swap N0/N1 and retry.
3534 if (!SwapAndRetried) {
3535 std::swap(N0, N1);
3536 SwapAndRetried = true;
3537 goto Retry2;
3538 }
3539
3540 return SDValue();
3541}
3542
3543static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3544 ExtType = ISD::NON_EXTLOAD;
3545
3546 switch (V.getNode()->getOpcode()) {
3547 case ISD::LOAD: {
3548 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3549 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3550 (LoadNode->getMemoryVT() == MVT::i16)) {
3551 ExtType = LoadNode->getExtensionType();
3552 return true;
3553 }
3554 return false;
3555 }
3556 case ISD::AssertSext: {
3557 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3558 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3559 ExtType = ISD::SEXTLOAD;
3560 return true;
3561 }
3562 return false;
3563 }
3564 case ISD::AssertZext: {
3565 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3566 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3567 ExtType = ISD::ZEXTLOAD;
3568 return true;
3569 }
3570 return false;
3571 }
3572 default:
3573 return false;
3574 }
3575
3576 return false;
3577}
3578
3579// Eliminate redundant truncation and zero-extension nodes.
3580// * Case 1:
3581// +------------+ +------------+ +------------+
3582// | Input1 | | Input2 | | CC |
3583// +------------+ +------------+ +------------+
3584// | | |
3585// V V +----+
3586// +------------+ +------------+ |
3587// | TRUNCATE | | TRUNCATE | |
3588// +------------+ +------------+ |
3589// | | |
3590// V V |
3591// +------------+ +------------+ |
3592// | ZERO_EXT | | ZERO_EXT | |
3593// +------------+ +------------+ |
3594// | | |
3595// | +-------------+ |
3596// V V | |
3597// +----------------+ | |
3598// | AND | | |
3599// +----------------+ | |
3600// | | |
3601// +---------------+ | |
3602// | | |
3603// V V V
3604// +-------------+
3605// | CMP |
3606// +-------------+
3607// * Case 2:
3608// +------------+ +------------+ +-------------+ +------------+ +------------+
3609// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3610// +------------+ +------------+ +-------------+ +------------+ +------------+
3611// | | | | |
3612// V | | | |
3613// +------------+ | | | |
3614// | XOR |<---------------------+ | |
3615// +------------+ | | |
3616// | | | |
3617// V V +---------------+ |
3618// +------------+ +------------+ | |
3619// | TRUNCATE | | TRUNCATE | | +-------------------------+
3620// +------------+ +------------+ | |
3621// | | | |
3622// V V | |
3623// +------------+ +------------+ | |
3624// | ZERO_EXT | | ZERO_EXT | | |
3625// +------------+ +------------+ | |
3626// | | | |
3627// V V | |
3628// +----------------+ | |
3629// | AND | | |
3630// +----------------+ | |
3631// | | |
3632// +---------------+ | |
3633// | | |
3634// V V V
3635// +-------------+
3636// | CMP |
3637// +-------------+
3640 const LoongArchSubtarget &Subtarget) {
3641 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3642
3643 SDNode *AndNode = N->getOperand(0).getNode();
3644 if (AndNode->getOpcode() != ISD::AND)
3645 return SDValue();
3646
3647 SDValue AndInputValue2 = AndNode->getOperand(1);
3648 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3649 return SDValue();
3650
3651 SDValue CmpInputValue = N->getOperand(1);
3652 SDValue AndInputValue1 = AndNode->getOperand(0);
3653 if (AndInputValue1.getOpcode() == ISD::XOR) {
3654 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3655 return SDValue();
3656 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3657 if (!CN || CN->getSExtValue() != -1)
3658 return SDValue();
3659 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3660 if (!CN || CN->getSExtValue() != 0)
3661 return SDValue();
3662 AndInputValue1 = AndInputValue1.getOperand(0);
3663 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3664 return SDValue();
3665 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3666 if (AndInputValue2 != CmpInputValue)
3667 return SDValue();
3668 } else {
3669 return SDValue();
3670 }
3671
3672 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3673 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3674 return SDValue();
3675
3676 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3677 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3678 return SDValue();
3679
3680 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3681 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3682 ISD::LoadExtType ExtType1;
3683 ISD::LoadExtType ExtType2;
3684
3685 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3686 !checkValueWidth(TruncInputValue2, ExtType2))
3687 return SDValue();
3688
3689 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3690 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3691 return SDValue();
3692
3693 if ((ExtType2 != ISD::ZEXTLOAD) &&
3694 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3695 return SDValue();
3696
3697 // These truncation and zero-extension nodes are not necessary, remove them.
3698 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3699 TruncInputValue1, TruncInputValue2);
3700 SDValue NewSetCC =
3701 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3702 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3703 return SDValue(N, 0);
3704}
3705
3706// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3709 const LoongArchSubtarget &Subtarget) {
3710 if (DCI.isBeforeLegalizeOps())
3711 return SDValue();
3712
3713 SDValue Src = N->getOperand(0);
3714 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3715 return SDValue();
3716
3717 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3718 Src.getOperand(0));
3719}
3720
3721template <unsigned N>
3723 SelectionDAG &DAG,
3724 const LoongArchSubtarget &Subtarget,
3725 bool IsSigned = false) {
3726 SDLoc DL(Node);
3727 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3728 // Check the ImmArg.
3729 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3730 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3731 DAG.getContext()->emitError(Node->getOperationName(0) +
3732 ": argument out of range.");
3733 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3734 }
3735 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3736}
3737
3738template <unsigned N>
3739static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3740 SelectionDAG &DAG, bool IsSigned = false) {
3741 SDLoc DL(Node);
3742 EVT ResTy = Node->getValueType(0);
3743 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3744
3745 // Check the ImmArg.
3746 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3747 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3748 DAG.getContext()->emitError(Node->getOperationName(0) +
3749 ": argument out of range.");
3750 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3751 }
3752 return DAG.getConstant(
3754 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3755 DL, ResTy);
3756}
3757
3759 SDLoc DL(Node);
3760 EVT ResTy = Node->getValueType(0);
3761 SDValue Vec = Node->getOperand(2);
3762 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3763 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3764}
3765
3767 SDLoc DL(Node);
3768 EVT ResTy = Node->getValueType(0);
3769 SDValue One = DAG.getConstant(1, DL, ResTy);
3770 SDValue Bit =
3771 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3772
3773 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3774 DAG.getNOT(DL, Bit, ResTy));
3775}
3776
3777template <unsigned N>
3779 SDLoc DL(Node);
3780 EVT ResTy = Node->getValueType(0);
3781 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3782 // Check the unsigned ImmArg.
3783 if (!isUInt<N>(CImm->getZExtValue())) {
3784 DAG.getContext()->emitError(Node->getOperationName(0) +
3785 ": argument out of range.");
3786 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3787 }
3788
3789 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3790 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3791
3792 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3793}
3794
3795template <unsigned N>
3797 SDLoc DL(Node);
3798 EVT ResTy = Node->getValueType(0);
3799 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3800 // Check the unsigned ImmArg.
3801 if (!isUInt<N>(CImm->getZExtValue())) {
3802 DAG.getContext()->emitError(Node->getOperationName(0) +
3803 ": argument out of range.");
3804 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3805 }
3806
3807 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3808 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3809 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3810}
3811
3812template <unsigned N>
3814 SDLoc DL(Node);
3815 EVT ResTy = Node->getValueType(0);
3816 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3817 // Check the unsigned ImmArg.
3818 if (!isUInt<N>(CImm->getZExtValue())) {
3819 DAG.getContext()->emitError(Node->getOperationName(0) +
3820 ": argument out of range.");
3821 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3822 }
3823
3824 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3825 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3826 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3827}
3828
3829static SDValue
3832 const LoongArchSubtarget &Subtarget) {
3833 SDLoc DL(N);
3834 switch (N->getConstantOperandVal(0)) {
3835 default:
3836 break;
3837 case Intrinsic::loongarch_lsx_vadd_b:
3838 case Intrinsic::loongarch_lsx_vadd_h:
3839 case Intrinsic::loongarch_lsx_vadd_w:
3840 case Intrinsic::loongarch_lsx_vadd_d:
3841 case Intrinsic::loongarch_lasx_xvadd_b:
3842 case Intrinsic::loongarch_lasx_xvadd_h:
3843 case Intrinsic::loongarch_lasx_xvadd_w:
3844 case Intrinsic::loongarch_lasx_xvadd_d:
3845 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3846 N->getOperand(2));
3847 case Intrinsic::loongarch_lsx_vaddi_bu:
3848 case Intrinsic::loongarch_lsx_vaddi_hu:
3849 case Intrinsic::loongarch_lsx_vaddi_wu:
3850 case Intrinsic::loongarch_lsx_vaddi_du:
3851 case Intrinsic::loongarch_lasx_xvaddi_bu:
3852 case Intrinsic::loongarch_lasx_xvaddi_hu:
3853 case Intrinsic::loongarch_lasx_xvaddi_wu:
3854 case Intrinsic::loongarch_lasx_xvaddi_du:
3855 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3856 lowerVectorSplatImm<5>(N, 2, DAG));
3857 case Intrinsic::loongarch_lsx_vsub_b:
3858 case Intrinsic::loongarch_lsx_vsub_h:
3859 case Intrinsic::loongarch_lsx_vsub_w:
3860 case Intrinsic::loongarch_lsx_vsub_d:
3861 case Intrinsic::loongarch_lasx_xvsub_b:
3862 case Intrinsic::loongarch_lasx_xvsub_h:
3863 case Intrinsic::loongarch_lasx_xvsub_w:
3864 case Intrinsic::loongarch_lasx_xvsub_d:
3865 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3866 N->getOperand(2));
3867 case Intrinsic::loongarch_lsx_vsubi_bu:
3868 case Intrinsic::loongarch_lsx_vsubi_hu:
3869 case Intrinsic::loongarch_lsx_vsubi_wu:
3870 case Intrinsic::loongarch_lsx_vsubi_du:
3871 case Intrinsic::loongarch_lasx_xvsubi_bu:
3872 case Intrinsic::loongarch_lasx_xvsubi_hu:
3873 case Intrinsic::loongarch_lasx_xvsubi_wu:
3874 case Intrinsic::loongarch_lasx_xvsubi_du:
3875 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3876 lowerVectorSplatImm<5>(N, 2, DAG));
3877 case Intrinsic::loongarch_lsx_vneg_b:
3878 case Intrinsic::loongarch_lsx_vneg_h:
3879 case Intrinsic::loongarch_lsx_vneg_w:
3880 case Intrinsic::loongarch_lsx_vneg_d:
3881 case Intrinsic::loongarch_lasx_xvneg_b:
3882 case Intrinsic::loongarch_lasx_xvneg_h:
3883 case Intrinsic::loongarch_lasx_xvneg_w:
3884 case Intrinsic::loongarch_lasx_xvneg_d:
3885 return DAG.getNode(
3886 ISD::SUB, DL, N->getValueType(0),
3887 DAG.getConstant(
3888 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3889 /*isSigned=*/true),
3890 SDLoc(N), N->getValueType(0)),
3891 N->getOperand(1));
3892 case Intrinsic::loongarch_lsx_vmax_b:
3893 case Intrinsic::loongarch_lsx_vmax_h:
3894 case Intrinsic::loongarch_lsx_vmax_w:
3895 case Intrinsic::loongarch_lsx_vmax_d:
3896 case Intrinsic::loongarch_lasx_xvmax_b:
3897 case Intrinsic::loongarch_lasx_xvmax_h:
3898 case Intrinsic::loongarch_lasx_xvmax_w:
3899 case Intrinsic::loongarch_lasx_xvmax_d:
3900 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3901 N->getOperand(2));
3902 case Intrinsic::loongarch_lsx_vmax_bu:
3903 case Intrinsic::loongarch_lsx_vmax_hu:
3904 case Intrinsic::loongarch_lsx_vmax_wu:
3905 case Intrinsic::loongarch_lsx_vmax_du:
3906 case Intrinsic::loongarch_lasx_xvmax_bu:
3907 case Intrinsic::loongarch_lasx_xvmax_hu:
3908 case Intrinsic::loongarch_lasx_xvmax_wu:
3909 case Intrinsic::loongarch_lasx_xvmax_du:
3910 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3911 N->getOperand(2));
3912 case Intrinsic::loongarch_lsx_vmaxi_b:
3913 case Intrinsic::loongarch_lsx_vmaxi_h:
3914 case Intrinsic::loongarch_lsx_vmaxi_w:
3915 case Intrinsic::loongarch_lsx_vmaxi_d:
3916 case Intrinsic::loongarch_lasx_xvmaxi_b:
3917 case Intrinsic::loongarch_lasx_xvmaxi_h:
3918 case Intrinsic::loongarch_lasx_xvmaxi_w:
3919 case Intrinsic::loongarch_lasx_xvmaxi_d:
3920 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3921 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3922 case Intrinsic::loongarch_lsx_vmaxi_bu:
3923 case Intrinsic::loongarch_lsx_vmaxi_hu:
3924 case Intrinsic::loongarch_lsx_vmaxi_wu:
3925 case Intrinsic::loongarch_lsx_vmaxi_du:
3926 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3927 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3928 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3929 case Intrinsic::loongarch_lasx_xvmaxi_du:
3930 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3931 lowerVectorSplatImm<5>(N, 2, DAG));
3932 case Intrinsic::loongarch_lsx_vmin_b:
3933 case Intrinsic::loongarch_lsx_vmin_h:
3934 case Intrinsic::loongarch_lsx_vmin_w:
3935 case Intrinsic::loongarch_lsx_vmin_d:
3936 case Intrinsic::loongarch_lasx_xvmin_b:
3937 case Intrinsic::loongarch_lasx_xvmin_h:
3938 case Intrinsic::loongarch_lasx_xvmin_w:
3939 case Intrinsic::loongarch_lasx_xvmin_d:
3940 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3941 N->getOperand(2));
3942 case Intrinsic::loongarch_lsx_vmin_bu:
3943 case Intrinsic::loongarch_lsx_vmin_hu:
3944 case Intrinsic::loongarch_lsx_vmin_wu:
3945 case Intrinsic::loongarch_lsx_vmin_du:
3946 case Intrinsic::loongarch_lasx_xvmin_bu:
3947 case Intrinsic::loongarch_lasx_xvmin_hu:
3948 case Intrinsic::loongarch_lasx_xvmin_wu:
3949 case Intrinsic::loongarch_lasx_xvmin_du:
3950 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3951 N->getOperand(2));
3952 case Intrinsic::loongarch_lsx_vmini_b:
3953 case Intrinsic::loongarch_lsx_vmini_h:
3954 case Intrinsic::loongarch_lsx_vmini_w:
3955 case Intrinsic::loongarch_lsx_vmini_d:
3956 case Intrinsic::loongarch_lasx_xvmini_b:
3957 case Intrinsic::loongarch_lasx_xvmini_h:
3958 case Intrinsic::loongarch_lasx_xvmini_w:
3959 case Intrinsic::loongarch_lasx_xvmini_d:
3960 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3961 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3962 case Intrinsic::loongarch_lsx_vmini_bu:
3963 case Intrinsic::loongarch_lsx_vmini_hu:
3964 case Intrinsic::loongarch_lsx_vmini_wu:
3965 case Intrinsic::loongarch_lsx_vmini_du:
3966 case Intrinsic::loongarch_lasx_xvmini_bu:
3967 case Intrinsic::loongarch_lasx_xvmini_hu:
3968 case Intrinsic::loongarch_lasx_xvmini_wu:
3969 case Intrinsic::loongarch_lasx_xvmini_du:
3970 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3971 lowerVectorSplatImm<5>(N, 2, DAG));
3972 case Intrinsic::loongarch_lsx_vmul_b:
3973 case Intrinsic::loongarch_lsx_vmul_h:
3974 case Intrinsic::loongarch_lsx_vmul_w:
3975 case Intrinsic::loongarch_lsx_vmul_d:
3976 case Intrinsic::loongarch_lasx_xvmul_b:
3977 case Intrinsic::loongarch_lasx_xvmul_h:
3978 case Intrinsic::loongarch_lasx_xvmul_w:
3979 case Intrinsic::loongarch_lasx_xvmul_d:
3980 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3981 N->getOperand(2));
3982 case Intrinsic::loongarch_lsx_vmadd_b:
3983 case Intrinsic::loongarch_lsx_vmadd_h:
3984 case Intrinsic::loongarch_lsx_vmadd_w:
3985 case Intrinsic::loongarch_lsx_vmadd_d:
3986 case Intrinsic::loongarch_lasx_xvmadd_b:
3987 case Intrinsic::loongarch_lasx_xvmadd_h:
3988 case Intrinsic::loongarch_lasx_xvmadd_w:
3989 case Intrinsic::loongarch_lasx_xvmadd_d: {
3990 EVT ResTy = N->getValueType(0);
3991 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3992 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3993 N->getOperand(3)));
3994 }
3995 case Intrinsic::loongarch_lsx_vmsub_b:
3996 case Intrinsic::loongarch_lsx_vmsub_h:
3997 case Intrinsic::loongarch_lsx_vmsub_w:
3998 case Intrinsic::loongarch_lsx_vmsub_d:
3999 case Intrinsic::loongarch_lasx_xvmsub_b:
4000 case Intrinsic::loongarch_lasx_xvmsub_h:
4001 case Intrinsic::loongarch_lasx_xvmsub_w:
4002 case Intrinsic::loongarch_lasx_xvmsub_d: {
4003 EVT ResTy = N->getValueType(0);
4004 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4005 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4006 N->getOperand(3)));
4007 }
4008 case Intrinsic::loongarch_lsx_vdiv_b:
4009 case Intrinsic::loongarch_lsx_vdiv_h:
4010 case Intrinsic::loongarch_lsx_vdiv_w:
4011 case Intrinsic::loongarch_lsx_vdiv_d:
4012 case Intrinsic::loongarch_lasx_xvdiv_b:
4013 case Intrinsic::loongarch_lasx_xvdiv_h:
4014 case Intrinsic::loongarch_lasx_xvdiv_w:
4015 case Intrinsic::loongarch_lasx_xvdiv_d:
4016 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4017 N->getOperand(2));
4018 case Intrinsic::loongarch_lsx_vdiv_bu:
4019 case Intrinsic::loongarch_lsx_vdiv_hu:
4020 case Intrinsic::loongarch_lsx_vdiv_wu:
4021 case Intrinsic::loongarch_lsx_vdiv_du:
4022 case Intrinsic::loongarch_lasx_xvdiv_bu:
4023 case Intrinsic::loongarch_lasx_xvdiv_hu:
4024 case Intrinsic::loongarch_lasx_xvdiv_wu:
4025 case Intrinsic::loongarch_lasx_xvdiv_du:
4026 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4027 N->getOperand(2));
4028 case Intrinsic::loongarch_lsx_vmod_b:
4029 case Intrinsic::loongarch_lsx_vmod_h:
4030 case Intrinsic::loongarch_lsx_vmod_w:
4031 case Intrinsic::loongarch_lsx_vmod_d:
4032 case Intrinsic::loongarch_lasx_xvmod_b:
4033 case Intrinsic::loongarch_lasx_xvmod_h:
4034 case Intrinsic::loongarch_lasx_xvmod_w:
4035 case Intrinsic::loongarch_lasx_xvmod_d:
4036 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4037 N->getOperand(2));
4038 case Intrinsic::loongarch_lsx_vmod_bu:
4039 case Intrinsic::loongarch_lsx_vmod_hu:
4040 case Intrinsic::loongarch_lsx_vmod_wu:
4041 case Intrinsic::loongarch_lsx_vmod_du:
4042 case Intrinsic::loongarch_lasx_xvmod_bu:
4043 case Intrinsic::loongarch_lasx_xvmod_hu:
4044 case Intrinsic::loongarch_lasx_xvmod_wu:
4045 case Intrinsic::loongarch_lasx_xvmod_du:
4046 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4047 N->getOperand(2));
4048 case Intrinsic::loongarch_lsx_vand_v:
4049 case Intrinsic::loongarch_lasx_xvand_v:
4050 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4051 N->getOperand(2));
4052 case Intrinsic::loongarch_lsx_vor_v:
4053 case Intrinsic::loongarch_lasx_xvor_v:
4054 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4055 N->getOperand(2));
4056 case Intrinsic::loongarch_lsx_vxor_v:
4057 case Intrinsic::loongarch_lasx_xvxor_v:
4058 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4059 N->getOperand(2));
4060 case Intrinsic::loongarch_lsx_vnor_v:
4061 case Intrinsic::loongarch_lasx_xvnor_v: {
4062 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4063 N->getOperand(2));
4064 return DAG.getNOT(DL, Res, Res->getValueType(0));
4065 }
4066 case Intrinsic::loongarch_lsx_vandi_b:
4067 case Intrinsic::loongarch_lasx_xvandi_b:
4068 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4069 lowerVectorSplatImm<8>(N, 2, DAG));
4070 case Intrinsic::loongarch_lsx_vori_b:
4071 case Intrinsic::loongarch_lasx_xvori_b:
4072 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4073 lowerVectorSplatImm<8>(N, 2, DAG));
4074 case Intrinsic::loongarch_lsx_vxori_b:
4075 case Intrinsic::loongarch_lasx_xvxori_b:
4076 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4077 lowerVectorSplatImm<8>(N, 2, DAG));
4078 case Intrinsic::loongarch_lsx_vsll_b:
4079 case Intrinsic::loongarch_lsx_vsll_h:
4080 case Intrinsic::loongarch_lsx_vsll_w:
4081 case Intrinsic::loongarch_lsx_vsll_d:
4082 case Intrinsic::loongarch_lasx_xvsll_b:
4083 case Intrinsic::loongarch_lasx_xvsll_h:
4084 case Intrinsic::loongarch_lasx_xvsll_w:
4085 case Intrinsic::loongarch_lasx_xvsll_d:
4086 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4087 truncateVecElts(N, DAG));
4088 case Intrinsic::loongarch_lsx_vslli_b:
4089 case Intrinsic::loongarch_lasx_xvslli_b:
4090 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4091 lowerVectorSplatImm<3>(N, 2, DAG));
4092 case Intrinsic::loongarch_lsx_vslli_h:
4093 case Intrinsic::loongarch_lasx_xvslli_h:
4094 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4095 lowerVectorSplatImm<4>(N, 2, DAG));
4096 case Intrinsic::loongarch_lsx_vslli_w:
4097 case Intrinsic::loongarch_lasx_xvslli_w:
4098 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4099 lowerVectorSplatImm<5>(N, 2, DAG));
4100 case Intrinsic::loongarch_lsx_vslli_d:
4101 case Intrinsic::loongarch_lasx_xvslli_d:
4102 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4103 lowerVectorSplatImm<6>(N, 2, DAG));
4104 case Intrinsic::loongarch_lsx_vsrl_b:
4105 case Intrinsic::loongarch_lsx_vsrl_h:
4106 case Intrinsic::loongarch_lsx_vsrl_w:
4107 case Intrinsic::loongarch_lsx_vsrl_d:
4108 case Intrinsic::loongarch_lasx_xvsrl_b:
4109 case Intrinsic::loongarch_lasx_xvsrl_h:
4110 case Intrinsic::loongarch_lasx_xvsrl_w:
4111 case Intrinsic::loongarch_lasx_xvsrl_d:
4112 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4113 truncateVecElts(N, DAG));
4114 case Intrinsic::loongarch_lsx_vsrli_b:
4115 case Intrinsic::loongarch_lasx_xvsrli_b:
4116 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4117 lowerVectorSplatImm<3>(N, 2, DAG));
4118 case Intrinsic::loongarch_lsx_vsrli_h:
4119 case Intrinsic::loongarch_lasx_xvsrli_h:
4120 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4121 lowerVectorSplatImm<4>(N, 2, DAG));
4122 case Intrinsic::loongarch_lsx_vsrli_w:
4123 case Intrinsic::loongarch_lasx_xvsrli_w:
4124 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4125 lowerVectorSplatImm<5>(N, 2, DAG));
4126 case Intrinsic::loongarch_lsx_vsrli_d:
4127 case Intrinsic::loongarch_lasx_xvsrli_d:
4128 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4129 lowerVectorSplatImm<6>(N, 2, DAG));
4130 case Intrinsic::loongarch_lsx_vsra_b:
4131 case Intrinsic::loongarch_lsx_vsra_h:
4132 case Intrinsic::loongarch_lsx_vsra_w:
4133 case Intrinsic::loongarch_lsx_vsra_d:
4134 case Intrinsic::loongarch_lasx_xvsra_b:
4135 case Intrinsic::loongarch_lasx_xvsra_h:
4136 case Intrinsic::loongarch_lasx_xvsra_w:
4137 case Intrinsic::loongarch_lasx_xvsra_d:
4138 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4139 truncateVecElts(N, DAG));
4140 case Intrinsic::loongarch_lsx_vsrai_b:
4141 case Intrinsic::loongarch_lasx_xvsrai_b:
4142 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4143 lowerVectorSplatImm<3>(N, 2, DAG));
4144 case Intrinsic::loongarch_lsx_vsrai_h:
4145 case Intrinsic::loongarch_lasx_xvsrai_h:
4146 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4147 lowerVectorSplatImm<4>(N, 2, DAG));
4148 case Intrinsic::loongarch_lsx_vsrai_w:
4149 case Intrinsic::loongarch_lasx_xvsrai_w:
4150 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4151 lowerVectorSplatImm<5>(N, 2, DAG));
4152 case Intrinsic::loongarch_lsx_vsrai_d:
4153 case Intrinsic::loongarch_lasx_xvsrai_d:
4154 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4155 lowerVectorSplatImm<6>(N, 2, DAG));
4156 case Intrinsic::loongarch_lsx_vclz_b:
4157 case Intrinsic::loongarch_lsx_vclz_h:
4158 case Intrinsic::loongarch_lsx_vclz_w:
4159 case Intrinsic::loongarch_lsx_vclz_d:
4160 case Intrinsic::loongarch_lasx_xvclz_b:
4161 case Intrinsic::loongarch_lasx_xvclz_h:
4162 case Intrinsic::loongarch_lasx_xvclz_w:
4163 case Intrinsic::loongarch_lasx_xvclz_d:
4164 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4165 case Intrinsic::loongarch_lsx_vpcnt_b:
4166 case Intrinsic::loongarch_lsx_vpcnt_h:
4167 case Intrinsic::loongarch_lsx_vpcnt_w:
4168 case Intrinsic::loongarch_lsx_vpcnt_d:
4169 case Intrinsic::loongarch_lasx_xvpcnt_b:
4170 case Intrinsic::loongarch_lasx_xvpcnt_h:
4171 case Intrinsic::loongarch_lasx_xvpcnt_w:
4172 case Intrinsic::loongarch_lasx_xvpcnt_d:
4173 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4174 case Intrinsic::loongarch_lsx_vbitclr_b:
4175 case Intrinsic::loongarch_lsx_vbitclr_h:
4176 case Intrinsic::loongarch_lsx_vbitclr_w:
4177 case Intrinsic::loongarch_lsx_vbitclr_d:
4178 case Intrinsic::loongarch_lasx_xvbitclr_b:
4179 case Intrinsic::loongarch_lasx_xvbitclr_h:
4180 case Intrinsic::loongarch_lasx_xvbitclr_w:
4181 case Intrinsic::loongarch_lasx_xvbitclr_d:
4182 return lowerVectorBitClear(N, DAG);
4183 case Intrinsic::loongarch_lsx_vbitclri_b:
4184 case Intrinsic::loongarch_lasx_xvbitclri_b:
4185 return lowerVectorBitClearImm<3>(N, DAG);
4186 case Intrinsic::loongarch_lsx_vbitclri_h:
4187 case Intrinsic::loongarch_lasx_xvbitclri_h:
4188 return lowerVectorBitClearImm<4>(N, DAG);
4189 case Intrinsic::loongarch_lsx_vbitclri_w:
4190 case Intrinsic::loongarch_lasx_xvbitclri_w:
4191 return lowerVectorBitClearImm<5>(N, DAG);
4192 case Intrinsic::loongarch_lsx_vbitclri_d:
4193 case Intrinsic::loongarch_lasx_xvbitclri_d:
4194 return lowerVectorBitClearImm<6>(N, DAG);
4195 case Intrinsic::loongarch_lsx_vbitset_b:
4196 case Intrinsic::loongarch_lsx_vbitset_h:
4197 case Intrinsic::loongarch_lsx_vbitset_w:
4198 case Intrinsic::loongarch_lsx_vbitset_d:
4199 case Intrinsic::loongarch_lasx_xvbitset_b:
4200 case Intrinsic::loongarch_lasx_xvbitset_h:
4201 case Intrinsic::loongarch_lasx_xvbitset_w:
4202 case Intrinsic::loongarch_lasx_xvbitset_d: {
4203 EVT VecTy = N->getValueType(0);
4204 SDValue One = DAG.getConstant(1, DL, VecTy);
4205 return DAG.getNode(
4206 ISD::OR, DL, VecTy, N->getOperand(1),
4207 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4208 }
4209 case Intrinsic::loongarch_lsx_vbitseti_b:
4210 case Intrinsic::loongarch_lasx_xvbitseti_b:
4211 return lowerVectorBitSetImm<3>(N, DAG);
4212 case Intrinsic::loongarch_lsx_vbitseti_h:
4213 case Intrinsic::loongarch_lasx_xvbitseti_h:
4214 return lowerVectorBitSetImm<4>(N, DAG);
4215 case Intrinsic::loongarch_lsx_vbitseti_w:
4216 case Intrinsic::loongarch_lasx_xvbitseti_w:
4217 return lowerVectorBitSetImm<5>(N, DAG);
4218 case Intrinsic::loongarch_lsx_vbitseti_d:
4219 case Intrinsic::loongarch_lasx_xvbitseti_d:
4220 return lowerVectorBitSetImm<6>(N, DAG);
4221 case Intrinsic::loongarch_lsx_vbitrev_b:
4222 case Intrinsic::loongarch_lsx_vbitrev_h:
4223 case Intrinsic::loongarch_lsx_vbitrev_w:
4224 case Intrinsic::loongarch_lsx_vbitrev_d:
4225 case Intrinsic::loongarch_lasx_xvbitrev_b:
4226 case Intrinsic::loongarch_lasx_xvbitrev_h:
4227 case Intrinsic::loongarch_lasx_xvbitrev_w:
4228 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4229 EVT VecTy = N->getValueType(0);
4230 SDValue One = DAG.getConstant(1, DL, VecTy);
4231 return DAG.getNode(
4232 ISD::XOR, DL, VecTy, N->getOperand(1),
4233 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4234 }
4235 case Intrinsic::loongarch_lsx_vbitrevi_b:
4236 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4237 return lowerVectorBitRevImm<3>(N, DAG);
4238 case Intrinsic::loongarch_lsx_vbitrevi_h:
4239 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4240 return lowerVectorBitRevImm<4>(N, DAG);
4241 case Intrinsic::loongarch_lsx_vbitrevi_w:
4242 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4243 return lowerVectorBitRevImm<5>(N, DAG);
4244 case Intrinsic::loongarch_lsx_vbitrevi_d:
4245 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4246 return lowerVectorBitRevImm<6>(N, DAG);
4247 case Intrinsic::loongarch_lsx_vfadd_s:
4248 case Intrinsic::loongarch_lsx_vfadd_d:
4249 case Intrinsic::loongarch_lasx_xvfadd_s:
4250 case Intrinsic::loongarch_lasx_xvfadd_d:
4251 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4252 N->getOperand(2));
4253 case Intrinsic::loongarch_lsx_vfsub_s:
4254 case Intrinsic::loongarch_lsx_vfsub_d:
4255 case Intrinsic::loongarch_lasx_xvfsub_s:
4256 case Intrinsic::loongarch_lasx_xvfsub_d:
4257 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4258 N->getOperand(2));
4259 case Intrinsic::loongarch_lsx_vfmul_s:
4260 case Intrinsic::loongarch_lsx_vfmul_d:
4261 case Intrinsic::loongarch_lasx_xvfmul_s:
4262 case Intrinsic::loongarch_lasx_xvfmul_d:
4263 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4264 N->getOperand(2));
4265 case Intrinsic::loongarch_lsx_vfdiv_s:
4266 case Intrinsic::loongarch_lsx_vfdiv_d:
4267 case Intrinsic::loongarch_lasx_xvfdiv_s:
4268 case Intrinsic::loongarch_lasx_xvfdiv_d:
4269 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4270 N->getOperand(2));
4271 case Intrinsic::loongarch_lsx_vfmadd_s:
4272 case Intrinsic::loongarch_lsx_vfmadd_d: