LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
103
104 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
105 // we get to know which of sll and revb.2h is faster.
108
109 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
110 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
111 // and i32 could still be byte-swapped relatively cheaply.
113
119
122
123 // Set operations for LA64 only.
124
125 if (Subtarget.is64Bit()) {
143
147 Custom);
149 }
150
151 // Set operations for LA32 only.
152
153 if (!Subtarget.is64Bit()) {
159 }
160
162
163 static const ISD::CondCode FPCCToExpand[] = {
166
167 // Set operations for 'F' feature.
168
169 if (Subtarget.hasBasicF()) {
170 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
171 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
172 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
173
189
190 if (Subtarget.is64Bit())
192
193 if (!Subtarget.hasBasicD()) {
195 if (Subtarget.is64Bit()) {
198 }
199 }
200 }
201
202 // Set operations for 'D' feature.
203
204 if (Subtarget.hasBasicD()) {
205 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
206 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
207 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
208 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
209 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
210
226
227 if (Subtarget.is64Bit())
229 }
230
231 // Set operations for 'LSX' feature.
232
233 if (Subtarget.hasExtLSX()) {
235 // Expand all truncating stores and extending loads.
236 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
237 setTruncStoreAction(VT, InnerVT, Expand);
240 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
241 }
242 // By default everything must be expanded. Then we will selectively turn
243 // on ones that can be effectively codegen'd.
244 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
246 }
247
248 for (MVT VT : LSXVTs) {
252
256
260 }
261 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
264 Legal);
266 VT, Legal);
273 Expand);
275 }
276 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
278 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
280 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
283 }
284 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
292 VT, Expand);
294 }
296 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
297 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
298 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
299 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
300 }
301
302 // Set operations for 'LASX' feature.
303
304 if (Subtarget.hasExtLASX()) {
305 for (MVT VT : LASXVTs) {
309
314
318 }
319 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
322 Legal);
324 VT, Legal);
331 Expand);
333 }
334 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
336 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
338 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
341 }
342 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
350 VT, Expand);
352 }
353 }
354
355 // Set DAG combine for LA32 and LA64.
356
361
362 // Set DAG combine for 'LSX' feature.
363
364 if (Subtarget.hasExtLSX())
366
367 // Compute derived properties from the register classes.
369
371
374
376
378
379 // Function alignments.
381 // Set preferred alignments.
385
386 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
387 if (Subtarget.hasLAMCAS())
389}
390
392 const GlobalAddressSDNode *GA) const {
393 // In order to maximise the opportunity for common subexpression elimination,
394 // keep a separate ADD node for the global address offset instead of folding
395 // it in the global address node. Later peephole optimisations may choose to
396 // fold it back in when profitable.
397 return false;
398}
399
401 SelectionDAG &DAG) const {
402 switch (Op.getOpcode()) {
404 return lowerATOMIC_FENCE(Op, DAG);
406 return lowerEH_DWARF_CFA(Op, DAG);
408 return lowerGlobalAddress(Op, DAG);
410 return lowerGlobalTLSAddress(Op, DAG);
412 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
414 return lowerINTRINSIC_W_CHAIN(Op, DAG);
416 return lowerINTRINSIC_VOID(Op, DAG);
418 return lowerBlockAddress(Op, DAG);
419 case ISD::JumpTable:
420 return lowerJumpTable(Op, DAG);
421 case ISD::SHL_PARTS:
422 return lowerShiftLeftParts(Op, DAG);
423 case ISD::SRA_PARTS:
424 return lowerShiftRightParts(Op, DAG, true);
425 case ISD::SRL_PARTS:
426 return lowerShiftRightParts(Op, DAG, false);
428 return lowerConstantPool(Op, DAG);
429 case ISD::FP_TO_SINT:
430 return lowerFP_TO_SINT(Op, DAG);
431 case ISD::BITCAST:
432 return lowerBITCAST(Op, DAG);
433 case ISD::UINT_TO_FP:
434 return lowerUINT_TO_FP(Op, DAG);
435 case ISD::SINT_TO_FP:
436 return lowerSINT_TO_FP(Op, DAG);
437 case ISD::VASTART:
438 return lowerVASTART(Op, DAG);
439 case ISD::FRAMEADDR:
440 return lowerFRAMEADDR(Op, DAG);
441 case ISD::RETURNADDR:
442 return lowerRETURNADDR(Op, DAG);
444 return lowerWRITE_REGISTER(Op, DAG);
446 return lowerINSERT_VECTOR_ELT(Op, DAG);
448 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
450 return lowerBUILD_VECTOR(Op, DAG);
452 return lowerVECTOR_SHUFFLE(Op, DAG);
453 case ISD::BITREVERSE:
454 return lowerBITREVERSE(Op, DAG);
456 return lowerSCALAR_TO_VECTOR(Op, DAG);
457 }
458 return SDValue();
459}
460
462LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
463 SelectionDAG &DAG) const {
464 SDLoc DL(Op);
465 MVT OpVT = Op.getSimpleValueType();
466
467 SDValue Vector = DAG.getUNDEF(OpVT);
468 SDValue Val = Op.getOperand(0);
469 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
470
471 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
472}
473
474SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
475 SelectionDAG &DAG) const {
476 EVT ResTy = Op->getValueType(0);
477 SDValue Src = Op->getOperand(0);
478 SDLoc DL(Op);
479
480 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
481 unsigned int OrigEltNum = ResTy.getVectorNumElements();
482 unsigned int NewEltNum = NewVT.getVectorNumElements();
483
484 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
485
487 for (unsigned int i = 0; i < NewEltNum; i++) {
488 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
489 DAG.getConstant(i, DL, MVT::i64));
490 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
491 ? (unsigned)LoongArchISD::BITREV_8B
493 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
494 }
495 SDValue Res =
496 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
497
498 switch (ResTy.getSimpleVT().SimpleTy) {
499 default:
500 return SDValue();
501 case MVT::v16i8:
502 case MVT::v32i8:
503 return Res;
504 case MVT::v8i16:
505 case MVT::v16i16:
506 case MVT::v4i32:
507 case MVT::v8i32: {
509 for (unsigned int i = 0; i < NewEltNum; i++)
510 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
511 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
512 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
513 }
514 }
515}
516
517/// Determine whether a range fits a regular pattern of values.
518/// This function accounts for the possibility of jumping over the End iterator.
519template <typename ValType>
520static bool
522 unsigned CheckStride,
524 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
525 auto &I = Begin;
526
527 while (I != End) {
528 if (*I != -1 && *I != ExpectedIndex)
529 return false;
530 ExpectedIndex += ExpectedIndexStride;
531
532 // Incrementing past End is undefined behaviour so we must increment one
533 // step at a time and check for End at each step.
534 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
535 ; // Empty loop body.
536 }
537 return true;
538}
539
540/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
541///
542/// VREPLVEI performs vector broadcast based on an element specified by an
543/// integer immediate, with its mask being similar to:
544/// <x, x, x, ...>
545/// where x is any valid index.
546///
547/// When undef's appear in the mask they are treated as if they were whatever
548/// value is necessary in order to fit the above form.
550 MVT VT, SDValue V1, SDValue V2,
551 SelectionDAG &DAG) {
552 int SplatIndex = -1;
553 for (const auto &M : Mask) {
554 if (M != -1) {
555 SplatIndex = M;
556 break;
557 }
558 }
559
560 if (SplatIndex == -1)
561 return DAG.getUNDEF(VT);
562
563 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
564 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
565 APInt Imm(64, SplatIndex);
566 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
567 DAG.getConstant(Imm, DL, MVT::i64));
568 }
569
570 return SDValue();
571}
572
573/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
574///
575/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
576/// elements according to a <4 x i2> constant (encoded as an integer immediate).
577///
578/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
579/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
580/// When undef's appear they are treated as if they were whatever value is
581/// necessary in order to fit the above forms.
582///
583/// For example:
584/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
585/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
586/// i32 7, i32 6, i32 5, i32 4>
587/// is lowered to:
588/// (VSHUF4I_H $v0, $v1, 27)
589/// where the 27 comes from:
590/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
592 MVT VT, SDValue V1, SDValue V2,
593 SelectionDAG &DAG) {
594
595 // When the size is less than 4, lower cost instructions may be used.
596 if (Mask.size() < 4)
597 return SDValue();
598
599 int SubMask[4] = {-1, -1, -1, -1};
600 for (unsigned i = 0; i < 4; ++i) {
601 for (unsigned j = i; j < Mask.size(); j += 4) {
602 int Idx = Mask[j];
603
604 // Convert from vector index to 4-element subvector index
605 // If an index refers to an element outside of the subvector then give up
606 if (Idx != -1) {
607 Idx -= 4 * (j / 4);
608 if (Idx < 0 || Idx >= 4)
609 return SDValue();
610 }
611
612 // If the mask has an undef, replace it with the current index.
613 // Note that it might still be undef if the current index is also undef
614 if (SubMask[i] == -1)
615 SubMask[i] = Idx;
616 // Check that non-undef values are the same as in the mask. If they
617 // aren't then give up
618 else if (Idx != -1 && Idx != SubMask[i])
619 return SDValue();
620 }
621 }
622
623 // Calculate the immediate. Replace any remaining undefs with zero
624 APInt Imm(64, 0);
625 for (int i = 3; i >= 0; --i) {
626 int Idx = SubMask[i];
627
628 if (Idx == -1)
629 Idx = 0;
630
631 Imm <<= 2;
632 Imm |= Idx & 0x3;
633 }
634
635 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
636 DAG.getConstant(Imm, DL, MVT::i64));
637}
638
639/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
640///
641/// VPACKEV interleaves the even elements from each vector.
642///
643/// It is possible to lower into VPACKEV when the mask consists of two of the
644/// following forms interleaved:
645/// <0, 2, 4, ...>
646/// <n, n+2, n+4, ...>
647/// where n is the number of elements in the vector.
648/// For example:
649/// <0, 0, 2, 2, 4, 4, ...>
650/// <0, n, 2, n+2, 4, n+4, ...>
651///
652/// When undef's appear in the mask they are treated as if they were whatever
653/// value is necessary in order to fit the above forms.
655 MVT VT, SDValue V1, SDValue V2,
656 SelectionDAG &DAG) {
657
658 const auto &Begin = Mask.begin();
659 const auto &End = Mask.end();
660 SDValue OriV1 = V1, OriV2 = V2;
661
662 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
663 V1 = OriV1;
664 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
665 V1 = OriV2;
666 else
667 return SDValue();
668
669 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
670 V2 = OriV1;
671 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
672 V2 = OriV2;
673 else
674 return SDValue();
675
676 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
677}
678
679/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
680///
681/// VPACKOD interleaves the odd elements from each vector.
682///
683/// It is possible to lower into VPACKOD when the mask consists of two of the
684/// following forms interleaved:
685/// <1, 3, 5, ...>
686/// <n+1, n+3, n+5, ...>
687/// where n is the number of elements in the vector.
688/// For example:
689/// <1, 1, 3, 3, 5, 5, ...>
690/// <1, n+1, 3, n+3, 5, n+5, ...>
691///
692/// When undef's appear in the mask they are treated as if they were whatever
693/// value is necessary in order to fit the above forms.
695 MVT VT, SDValue V1, SDValue V2,
696 SelectionDAG &DAG) {
697
698 const auto &Begin = Mask.begin();
699 const auto &End = Mask.end();
700 SDValue OriV1 = V1, OriV2 = V2;
701
702 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
703 V1 = OriV1;
704 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
705 V1 = OriV2;
706 else
707 return SDValue();
708
709 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
710 V2 = OriV1;
711 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
712 V2 = OriV2;
713 else
714 return SDValue();
715
716 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
717}
718
719/// Lower VECTOR_SHUFFLE into VILVH (if possible).
720///
721/// VILVH interleaves consecutive elements from the left (highest-indexed) half
722/// of each vector.
723///
724/// It is possible to lower into VILVH when the mask consists of two of the
725/// following forms interleaved:
726/// <x, x+1, x+2, ...>
727/// <n+x, n+x+1, n+x+2, ...>
728/// where n is the number of elements in the vector and x is half n.
729/// For example:
730/// <x, x, x+1, x+1, x+2, x+2, ...>
731/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
732///
733/// When undef's appear in the mask they are treated as if they were whatever
734/// value is necessary in order to fit the above forms.
736 MVT VT, SDValue V1, SDValue V2,
737 SelectionDAG &DAG) {
738
739 const auto &Begin = Mask.begin();
740 const auto &End = Mask.end();
741 unsigned HalfSize = Mask.size() / 2;
742 SDValue OriV1 = V1, OriV2 = V2;
743
744 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
745 V1 = OriV1;
746 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
747 V1 = OriV2;
748 else
749 return SDValue();
750
751 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
752 V2 = OriV1;
753 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
754 1))
755 V2 = OriV2;
756 else
757 return SDValue();
758
759 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
760}
761
762/// Lower VECTOR_SHUFFLE into VILVL (if possible).
763///
764/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
765/// of each vector.
766///
767/// It is possible to lower into VILVL when the mask consists of two of the
768/// following forms interleaved:
769/// <0, 1, 2, ...>
770/// <n, n+1, n+2, ...>
771/// where n is the number of elements in the vector.
772/// For example:
773/// <0, 0, 1, 1, 2, 2, ...>
774/// <0, n, 1, n+1, 2, n+2, ...>
775///
776/// When undef's appear in the mask they are treated as if they were whatever
777/// value is necessary in order to fit the above forms.
779 MVT VT, SDValue V1, SDValue V2,
780 SelectionDAG &DAG) {
781
782 const auto &Begin = Mask.begin();
783 const auto &End = Mask.end();
784 SDValue OriV1 = V1, OriV2 = V2;
785
786 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
787 V1 = OriV1;
788 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
789 V1 = OriV2;
790 else
791 return SDValue();
792
793 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
794 V2 = OriV1;
795 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
796 V2 = OriV2;
797 else
798 return SDValue();
799
800 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
801}
802
803/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
804///
805/// VPICKEV copies the even elements of each vector into the result vector.
806///
807/// It is possible to lower into VPICKEV when the mask consists of two of the
808/// following forms concatenated:
809/// <0, 2, 4, ...>
810/// <n, n+2, n+4, ...>
811/// where n is the number of elements in the vector.
812/// For example:
813/// <0, 2, 4, ..., 0, 2, 4, ...>
814/// <0, 2, 4, ..., n, n+2, n+4, ...>
815///
816/// When undef's appear in the mask they are treated as if they were whatever
817/// value is necessary in order to fit the above forms.
819 MVT VT, SDValue V1, SDValue V2,
820 SelectionDAG &DAG) {
821
822 const auto &Begin = Mask.begin();
823 const auto &Mid = Mask.begin() + Mask.size() / 2;
824 const auto &End = Mask.end();
825 SDValue OriV1 = V1, OriV2 = V2;
826
827 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
828 V1 = OriV1;
829 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
830 V1 = OriV2;
831 else
832 return SDValue();
833
834 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
835 V2 = OriV1;
836 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
837 V2 = OriV2;
838
839 else
840 return SDValue();
841
842 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
843}
844
845/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
846///
847/// VPICKOD copies the odd elements of each vector into the result vector.
848///
849/// It is possible to lower into VPICKOD when the mask consists of two of the
850/// following forms concatenated:
851/// <1, 3, 5, ...>
852/// <n+1, n+3, n+5, ...>
853/// where n is the number of elements in the vector.
854/// For example:
855/// <1, 3, 5, ..., 1, 3, 5, ...>
856/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
857///
858/// When undef's appear in the mask they are treated as if they were whatever
859/// value is necessary in order to fit the above forms.
861 MVT VT, SDValue V1, SDValue V2,
862 SelectionDAG &DAG) {
863
864 const auto &Begin = Mask.begin();
865 const auto &Mid = Mask.begin() + Mask.size() / 2;
866 const auto &End = Mask.end();
867 SDValue OriV1 = V1, OriV2 = V2;
868
869 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
870 V1 = OriV1;
871 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
872 V1 = OriV2;
873 else
874 return SDValue();
875
876 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
877 V2 = OriV1;
878 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
879 V2 = OriV2;
880 else
881 return SDValue();
882
883 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
884}
885
886/// Lower VECTOR_SHUFFLE into VSHUF.
887///
888/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
889/// adding it as an operand to the resulting VSHUF.
891 MVT VT, SDValue V1, SDValue V2,
892 SelectionDAG &DAG) {
893
895 for (auto M : Mask)
896 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
897
898 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
899 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
900
901 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
902 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
903 // VSHF concatenates the vectors in a bitwise fashion:
904 // <0b00, 0b01> + <0b10, 0b11> ->
905 // 0b0100 + 0b1110 -> 0b01001110
906 // <0b10, 0b11, 0b00, 0b01>
907 // We must therefore swap the operands to get the correct result.
908 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
909}
910
911/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
912///
913/// This routine breaks down the specific type of 128-bit shuffle and
914/// dispatches to the lowering routines accordingly.
916 SDValue V1, SDValue V2, SelectionDAG &DAG) {
917 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
918 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
919 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
920 "Vector type is unsupported for lsx!");
921 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
922 "Two operands have different types!");
923 assert(VT.getVectorNumElements() == Mask.size() &&
924 "Unexpected mask size for shuffle!");
925 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
926
927 SDValue Result;
928 // TODO: Add more comparison patterns.
929 if (V2.isUndef()) {
930 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
931 return Result;
932 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
933 return Result;
934
935 // TODO: This comment may be enabled in the future to better match the
936 // pattern for instruction selection.
937 /* V2 = V1; */
938 }
939
940 // It is recommended not to change the pattern comparison order for better
941 // performance.
942 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
943 return Result;
944 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
945 return Result;
946 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
947 return Result;
948 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
949 return Result;
950 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
951 return Result;
952 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
953 return Result;
954 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
955 return Result;
956
957 return SDValue();
958}
959
960/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
961///
962/// It is a XVREPLVEI when the mask is:
963/// <x, x, x, ..., x+n, x+n, x+n, ...>
964/// where the number of x is equal to n and n is half the length of vector.
965///
966/// When undef's appear in the mask they are treated as if they were whatever
967/// value is necessary in order to fit the above form.
969 ArrayRef<int> Mask, MVT VT,
970 SDValue V1, SDValue V2,
971 SelectionDAG &DAG) {
972 int SplatIndex = -1;
973 for (const auto &M : Mask) {
974 if (M != -1) {
975 SplatIndex = M;
976 break;
977 }
978 }
979
980 if (SplatIndex == -1)
981 return DAG.getUNDEF(VT);
982
983 const auto &Begin = Mask.begin();
984 const auto &End = Mask.end();
985 unsigned HalfSize = Mask.size() / 2;
986
987 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
988 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
989 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
990 0)) {
991 APInt Imm(64, SplatIndex);
992 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
993 DAG.getConstant(Imm, DL, MVT::i64));
994 }
995
996 return SDValue();
997}
998
999/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1001 MVT VT, SDValue V1, SDValue V2,
1002 SelectionDAG &DAG) {
1003 // When the size is less than or equal to 4, lower cost instructions may be
1004 // used.
1005 if (Mask.size() <= 4)
1006 return SDValue();
1007 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1008}
1009
1010/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1012 MVT VT, SDValue V1, SDValue V2,
1013 SelectionDAG &DAG) {
1014 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1015}
1016
1017/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1019 MVT VT, SDValue V1, SDValue V2,
1020 SelectionDAG &DAG) {
1021 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1022}
1023
1024/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1026 MVT VT, SDValue V1, SDValue V2,
1027 SelectionDAG &DAG) {
1028
1029 const auto &Begin = Mask.begin();
1030 const auto &End = Mask.end();
1031 unsigned HalfSize = Mask.size() / 2;
1032 unsigned LeftSize = HalfSize / 2;
1033 SDValue OriV1 = V1, OriV2 = V2;
1034
1035 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1036 1) &&
1037 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1038 V1 = OriV1;
1039 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1040 Mask.size() + HalfSize - LeftSize, 1) &&
1041 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1042 Mask.size() + HalfSize + LeftSize, 1))
1043 V1 = OriV2;
1044 else
1045 return SDValue();
1046
1047 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1048 1) &&
1049 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1050 1))
1051 V2 = OriV1;
1052 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1053 Mask.size() + HalfSize - LeftSize, 1) &&
1054 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1055 Mask.size() + HalfSize + LeftSize, 1))
1056 V2 = OriV2;
1057 else
1058 return SDValue();
1059
1060 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1061}
1062
1063/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1065 MVT VT, SDValue V1, SDValue V2,
1066 SelectionDAG &DAG) {
1067
1068 const auto &Begin = Mask.begin();
1069 const auto &End = Mask.end();
1070 unsigned HalfSize = Mask.size() / 2;
1071 SDValue OriV1 = V1, OriV2 = V2;
1072
1073 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1074 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1075 V1 = OriV1;
1076 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1077 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1078 Mask.size() + HalfSize, 1))
1079 V1 = OriV2;
1080 else
1081 return SDValue();
1082
1083 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1084 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1085 V2 = OriV1;
1086 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1087 1) &&
1088 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1089 Mask.size() + HalfSize, 1))
1090 V2 = OriV2;
1091 else
1092 return SDValue();
1093
1094 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1095}
1096
1097/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1099 MVT VT, SDValue V1, SDValue V2,
1100 SelectionDAG &DAG) {
1101
1102 const auto &Begin = Mask.begin();
1103 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1104 const auto &Mid = Mask.begin() + Mask.size() / 2;
1105 const auto &RightMid = Mask.end() - Mask.size() / 4;
1106 const auto &End = Mask.end();
1107 unsigned HalfSize = Mask.size() / 2;
1108 SDValue OriV1 = V1, OriV2 = V2;
1109
1110 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1111 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1112 V1 = OriV1;
1113 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1114 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1115 V1 = OriV2;
1116 else
1117 return SDValue();
1118
1119 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1120 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1121 V2 = OriV1;
1122 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1123 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1124 V2 = OriV2;
1125
1126 else
1127 return SDValue();
1128
1129 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1130}
1131
1132/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1134 MVT VT, SDValue V1, SDValue V2,
1135 SelectionDAG &DAG) {
1136
1137 const auto &Begin = Mask.begin();
1138 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1139 const auto &Mid = Mask.begin() + Mask.size() / 2;
1140 const auto &RightMid = Mask.end() - Mask.size() / 4;
1141 const auto &End = Mask.end();
1142 unsigned HalfSize = Mask.size() / 2;
1143 SDValue OriV1 = V1, OriV2 = V2;
1144
1145 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1146 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1147 V1 = OriV1;
1148 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1149 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1150 2))
1151 V1 = OriV2;
1152 else
1153 return SDValue();
1154
1155 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1156 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1157 V2 = OriV1;
1158 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1159 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1160 2))
1161 V2 = OriV2;
1162 else
1163 return SDValue();
1164
1165 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1166}
1167
1168/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1170 MVT VT, SDValue V1, SDValue V2,
1171 SelectionDAG &DAG) {
1172
1173 int MaskSize = Mask.size();
1174 int HalfSize = Mask.size() / 2;
1175 const auto &Begin = Mask.begin();
1176 const auto &Mid = Mask.begin() + HalfSize;
1177 const auto &End = Mask.end();
1178
1179 // VECTOR_SHUFFLE concatenates the vectors:
1180 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1181 // shuffling ->
1182 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1183 //
1184 // XVSHUF concatenates the vectors:
1185 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1186 // shuffling ->
1187 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1188 SmallVector<SDValue, 8> MaskAlloc;
1189 for (auto it = Begin; it < Mid; it++) {
1190 if (*it < 0) // UNDEF
1191 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1192 else if ((*it >= 0 && *it < HalfSize) ||
1193 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1194 int M = *it < HalfSize ? *it : *it - HalfSize;
1195 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1196 } else
1197 return SDValue();
1198 }
1199 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1200
1201 for (auto it = Mid; it < End; it++) {
1202 if (*it < 0) // UNDEF
1203 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1204 else if ((*it >= HalfSize && *it < MaskSize) ||
1205 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1206 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1207 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1208 } else
1209 return SDValue();
1210 }
1211 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1212
1213 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1214 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1215 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1216}
1217
1218/// Shuffle vectors by lane to generate more optimized instructions.
1219/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1220///
1221/// Therefore, except for the following four cases, other cases are regarded
1222/// as cross-lane shuffles, where optimization is relatively limited.
1223///
1224/// - Shuffle high, low lanes of two inputs vector
1225/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1226/// - Shuffle low, high lanes of two inputs vector
1227/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1228/// - Shuffle low, low lanes of two inputs vector
1229/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1230/// - Shuffle high, high lanes of two inputs vector
1231/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1232///
1233/// The first case is the closest to LoongArch instructions and the other
1234/// cases need to be converted to it for processing.
1235///
1236/// This function may modify V1, V2 and Mask
1238 MutableArrayRef<int> Mask, MVT VT,
1239 SDValue &V1, SDValue &V2,
1240 SelectionDAG &DAG) {
1241
1242 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1243
1244 int MaskSize = Mask.size();
1245 int HalfSize = Mask.size() / 2;
1246
1247 HalfMaskType preMask = None, postMask = None;
1248
1249 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1250 return M < 0 || (M >= 0 && M < HalfSize) ||
1251 (M >= MaskSize && M < MaskSize + HalfSize);
1252 }))
1253 preMask = HighLaneTy;
1254 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1255 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1256 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1257 }))
1258 preMask = LowLaneTy;
1259
1260 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1261 return M < 0 || (M >= 0 && M < HalfSize) ||
1262 (M >= MaskSize && M < MaskSize + HalfSize);
1263 }))
1264 postMask = HighLaneTy;
1265 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1266 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1267 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1268 }))
1269 postMask = LowLaneTy;
1270
1271 // The pre-half of mask is high lane type, and the post-half of mask
1272 // is low lane type, which is closest to the LoongArch instructions.
1273 //
1274 // Note: In the LoongArch architecture, the high lane of mask corresponds
1275 // to the lower 128-bit of vector register, and the low lane of mask
1276 // corresponds the higher 128-bit of vector register.
1277 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1278 return;
1279 }
1280 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1281 V1 = DAG.getBitcast(MVT::v4i64, V1);
1282 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1283 DAG.getConstant(0b01001110, DL, MVT::i64));
1284 V1 = DAG.getBitcast(VT, V1);
1285
1286 if (!V2.isUndef()) {
1287 V2 = DAG.getBitcast(MVT::v4i64, V2);
1288 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1289 DAG.getConstant(0b01001110, DL, MVT::i64));
1290 V2 = DAG.getBitcast(VT, V2);
1291 }
1292
1293 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1294 *it = *it < 0 ? *it : *it - HalfSize;
1295 }
1296 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1297 *it = *it < 0 ? *it : *it + HalfSize;
1298 }
1299 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1300 V1 = DAG.getBitcast(MVT::v4i64, V1);
1301 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1302 DAG.getConstant(0b11101110, DL, MVT::i64));
1303 V1 = DAG.getBitcast(VT, V1);
1304
1305 if (!V2.isUndef()) {
1306 V2 = DAG.getBitcast(MVT::v4i64, V2);
1307 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1308 DAG.getConstant(0b11101110, DL, MVT::i64));
1309 V2 = DAG.getBitcast(VT, V2);
1310 }
1311
1312 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1313 *it = *it < 0 ? *it : *it - HalfSize;
1314 }
1315 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1316 V1 = DAG.getBitcast(MVT::v4i64, V1);
1317 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1318 DAG.getConstant(0b01000100, DL, MVT::i64));
1319 V1 = DAG.getBitcast(VT, V1);
1320
1321 if (!V2.isUndef()) {
1322 V2 = DAG.getBitcast(MVT::v4i64, V2);
1323 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1324 DAG.getConstant(0b01000100, DL, MVT::i64));
1325 V2 = DAG.getBitcast(VT, V2);
1326 }
1327
1328 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1329 *it = *it < 0 ? *it : *it + HalfSize;
1330 }
1331 } else { // cross-lane
1332 return;
1333 }
1334}
1335
1336/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1337///
1338/// This routine breaks down the specific type of 256-bit shuffle and
1339/// dispatches to the lowering routines accordingly.
1341 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1342 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1343 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1344 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1345 "Vector type is unsupported for lasx!");
1346 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1347 "Two operands have different types!");
1348 assert(VT.getVectorNumElements() == Mask.size() &&
1349 "Unexpected mask size for shuffle!");
1350 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1351 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1352
1353 // canonicalize non cross-lane shuffle vector
1354 SmallVector<int> NewMask(Mask);
1355 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1356
1357 SDValue Result;
1358 // TODO: Add more comparison patterns.
1359 if (V2.isUndef()) {
1360 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1361 return Result;
1362 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1363 return Result;
1364
1365 // TODO: This comment may be enabled in the future to better match the
1366 // pattern for instruction selection.
1367 /* V2 = V1; */
1368 }
1369
1370 // It is recommended not to change the pattern comparison order for better
1371 // performance.
1372 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1373 return Result;
1374 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1375 return Result;
1376 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1377 return Result;
1378 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1379 return Result;
1380 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1381 return Result;
1382 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1383 return Result;
1384 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1385 return Result;
1386
1387 return SDValue();
1388}
1389
1390SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1391 SelectionDAG &DAG) const {
1392 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1393 ArrayRef<int> OrigMask = SVOp->getMask();
1394 SDValue V1 = Op.getOperand(0);
1395 SDValue V2 = Op.getOperand(1);
1396 MVT VT = Op.getSimpleValueType();
1397 int NumElements = VT.getVectorNumElements();
1398 SDLoc DL(Op);
1399
1400 bool V1IsUndef = V1.isUndef();
1401 bool V2IsUndef = V2.isUndef();
1402 if (V1IsUndef && V2IsUndef)
1403 return DAG.getUNDEF(VT);
1404
1405 // When we create a shuffle node we put the UNDEF node to second operand,
1406 // but in some cases the first operand may be transformed to UNDEF.
1407 // In this case we should just commute the node.
1408 if (V1IsUndef)
1409 return DAG.getCommutedVectorShuffle(*SVOp);
1410
1411 // Check for non-undef masks pointing at an undef vector and make the masks
1412 // undef as well. This makes it easier to match the shuffle based solely on
1413 // the mask.
1414 if (V2IsUndef &&
1415 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1416 SmallVector<int, 8> NewMask(OrigMask);
1417 for (int &M : NewMask)
1418 if (M >= NumElements)
1419 M = -1;
1420 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1421 }
1422
1423 // Check for illegal shuffle mask element index values.
1424 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1425 (void)MaskUpperLimit;
1426 assert(llvm::all_of(OrigMask,
1427 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1428 "Out of bounds shuffle index");
1429
1430 // For each vector width, delegate to a specialized lowering routine.
1431 if (VT.is128BitVector())
1432 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1433
1434 if (VT.is256BitVector())
1435 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1436
1437 return SDValue();
1438}
1439
1440static bool isConstantOrUndef(const SDValue Op) {
1441 if (Op->isUndef())
1442 return true;
1443 if (isa<ConstantSDNode>(Op))
1444 return true;
1445 if (isa<ConstantFPSDNode>(Op))
1446 return true;
1447 return false;
1448}
1449
1451 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1452 if (isConstantOrUndef(Op->getOperand(i)))
1453 return true;
1454 return false;
1455}
1456
1457SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1458 SelectionDAG &DAG) const {
1459 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1460 EVT ResTy = Op->getValueType(0);
1461 SDLoc DL(Op);
1462 APInt SplatValue, SplatUndef;
1463 unsigned SplatBitSize;
1464 bool HasAnyUndefs;
1465 bool Is128Vec = ResTy.is128BitVector();
1466 bool Is256Vec = ResTy.is256BitVector();
1467
1468 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1469 (!Subtarget.hasExtLASX() || !Is256Vec))
1470 return SDValue();
1471
1472 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1473 /*MinSplatBits=*/8) &&
1474 SplatBitSize <= 64) {
1475 // We can only cope with 8, 16, 32, or 64-bit elements.
1476 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1477 SplatBitSize != 64)
1478 return SDValue();
1479
1480 EVT ViaVecTy;
1481
1482 switch (SplatBitSize) {
1483 default:
1484 return SDValue();
1485 case 8:
1486 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1487 break;
1488 case 16:
1489 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1490 break;
1491 case 32:
1492 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1493 break;
1494 case 64:
1495 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1496 break;
1497 }
1498
1499 // SelectionDAG::getConstant will promote SplatValue appropriately.
1500 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1501
1502 // Bitcast to the type we originally wanted.
1503 if (ViaVecTy != ResTy)
1504 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1505
1506 return Result;
1507 }
1508
1509 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1510 return Op;
1511
1513 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1514 // The resulting code is the same length as the expansion, but it doesn't
1515 // use memory operations.
1516 EVT ResTy = Node->getValueType(0);
1517
1518 assert(ResTy.isVector());
1519
1520 unsigned NumElts = ResTy.getVectorNumElements();
1521 SDValue Vector = DAG.getUNDEF(ResTy);
1522 for (unsigned i = 0; i < NumElts; ++i) {
1524 Node->getOperand(i),
1525 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1526 }
1527 return Vector;
1528 }
1529
1530 return SDValue();
1531}
1532
1533SDValue
1534LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1535 SelectionDAG &DAG) const {
1536 EVT VecTy = Op->getOperand(0)->getValueType(0);
1537 SDValue Idx = Op->getOperand(1);
1538 EVT EltTy = VecTy.getVectorElementType();
1539 unsigned NumElts = VecTy.getVectorNumElements();
1540
1541 if (isa<ConstantSDNode>(Idx) &&
1542 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1543 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1544 return Op;
1545
1546 return SDValue();
1547}
1548
1549SDValue
1550LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1551 SelectionDAG &DAG) const {
1552 if (isa<ConstantSDNode>(Op->getOperand(2)))
1553 return Op;
1554 return SDValue();
1555}
1556
1557SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1558 SelectionDAG &DAG) const {
1559 SDLoc DL(Op);
1560 SyncScope::ID FenceSSID =
1561 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1562
1563 // singlethread fences only synchronize with signal handlers on the same
1564 // thread and thus only need to preserve instruction order, not actually
1565 // enforce memory ordering.
1566 if (FenceSSID == SyncScope::SingleThread)
1567 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1568 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1569
1570 return Op;
1571}
1572
1573SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1574 SelectionDAG &DAG) const {
1575
1576 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1577 DAG.getContext()->emitError(
1578 "On LA64, only 64-bit registers can be written.");
1579 return Op.getOperand(0);
1580 }
1581
1582 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1583 DAG.getContext()->emitError(
1584 "On LA32, only 32-bit registers can be written.");
1585 return Op.getOperand(0);
1586 }
1587
1588 return Op;
1589}
1590
1591SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1592 SelectionDAG &DAG) const {
1593 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1594 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1595 "be a constant integer");
1596 return SDValue();
1597 }
1598
1601 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1602 EVT VT = Op.getValueType();
1603 SDLoc DL(Op);
1604 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1605 unsigned Depth = Op.getConstantOperandVal(0);
1606 int GRLenInBytes = Subtarget.getGRLen() / 8;
1607
1608 while (Depth--) {
1609 int Offset = -(GRLenInBytes * 2);
1610 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1611 DAG.getSignedConstant(Offset, DL, VT));
1612 FrameAddr =
1613 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1614 }
1615 return FrameAddr;
1616}
1617
1618SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1619 SelectionDAG &DAG) const {
1621 return SDValue();
1622
1623 // Currently only support lowering return address for current frame.
1624 if (Op.getConstantOperandVal(0) != 0) {
1625 DAG.getContext()->emitError(
1626 "return address can only be determined for the current frame");
1627 return SDValue();
1628 }
1629
1632 MVT GRLenVT = Subtarget.getGRLenVT();
1633
1634 // Return the value of the return address register, marking it an implicit
1635 // live-in.
1636 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1637 getRegClassFor(GRLenVT));
1638 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1639}
1640
1641SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1642 SelectionDAG &DAG) const {
1644 auto Size = Subtarget.getGRLen() / 8;
1645 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1646 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1647}
1648
1649SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1650 SelectionDAG &DAG) const {
1652 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1653
1654 SDLoc DL(Op);
1655 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1657
1658 // vastart just stores the address of the VarArgsFrameIndex slot into the
1659 // memory location argument.
1660 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1661 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1662 MachinePointerInfo(SV));
1663}
1664
1665SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1666 SelectionDAG &DAG) const {
1667 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1668 !Subtarget.hasBasicD() && "unexpected target features");
1669
1670 SDLoc DL(Op);
1671 SDValue Op0 = Op.getOperand(0);
1672 if (Op0->getOpcode() == ISD::AND) {
1673 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1674 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1675 return Op;
1676 }
1677
1678 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1679 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1680 Op0.getConstantOperandVal(2) == UINT64_C(0))
1681 return Op;
1682
1683 if (Op0.getOpcode() == ISD::AssertZext &&
1684 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1685 return Op;
1686
1687 EVT OpVT = Op0.getValueType();
1688 EVT RetVT = Op.getValueType();
1689 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1690 MakeLibCallOptions CallOptions;
1691 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1692 SDValue Chain = SDValue();
1694 std::tie(Result, Chain) =
1695 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1696 return Result;
1697}
1698
1699SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1700 SelectionDAG &DAG) const {
1701 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1702 !Subtarget.hasBasicD() && "unexpected target features");
1703
1704 SDLoc DL(Op);
1705 SDValue Op0 = Op.getOperand(0);
1706
1707 if ((Op0.getOpcode() == ISD::AssertSext ||
1709 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1710 return Op;
1711
1712 EVT OpVT = Op0.getValueType();
1713 EVT RetVT = Op.getValueType();
1714 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1715 MakeLibCallOptions CallOptions;
1716 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1717 SDValue Chain = SDValue();
1719 std::tie(Result, Chain) =
1720 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1721 return Result;
1722}
1723
1724SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1725 SelectionDAG &DAG) const {
1726
1727 SDLoc DL(Op);
1728 SDValue Op0 = Op.getOperand(0);
1729
1730 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1731 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1732 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1733 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1734 }
1735 return Op;
1736}
1737
1738SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1739 SelectionDAG &DAG) const {
1740
1741 SDLoc DL(Op);
1742 SDValue Op0 = Op.getOperand(0);
1743
1744 if (Op0.getValueType() == MVT::f16)
1745 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1746
1747 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1748 !Subtarget.hasBasicD()) {
1749 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1750 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1751 }
1752
1753 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1754 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1755 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1756}
1757
1759 SelectionDAG &DAG, unsigned Flags) {
1760 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1761}
1762
1764 SelectionDAG &DAG, unsigned Flags) {
1765 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1766 Flags);
1767}
1768
1770 SelectionDAG &DAG, unsigned Flags) {
1771 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1772 N->getOffset(), Flags);
1773}
1774
1776 SelectionDAG &DAG, unsigned Flags) {
1777 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1778}
1779
1780template <class NodeTy>
1781SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1783 bool IsLocal) const {
1784 SDLoc DL(N);
1785 EVT Ty = getPointerTy(DAG.getDataLayout());
1786 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1787 SDValue Load;
1788
1789 switch (M) {
1790 default:
1791 report_fatal_error("Unsupported code model");
1792
1793 case CodeModel::Large: {
1794 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1795
1796 // This is not actually used, but is necessary for successfully matching
1797 // the PseudoLA_*_LARGE nodes.
1798 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1799 if (IsLocal) {
1800 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1801 // eventually becomes the desired 5-insn code sequence.
1802 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1803 Tmp, Addr),
1804 0);
1805 } else {
1806 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1807 // eventually becomes the desired 5-insn code sequence.
1808 Load = SDValue(
1809 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1810 0);
1811 }
1812 break;
1813 }
1814
1815 case CodeModel::Small:
1816 case CodeModel::Medium:
1817 if (IsLocal) {
1818 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1819 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1820 Load = SDValue(
1821 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1822 } else {
1823 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1824 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1825 Load =
1826 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1827 }
1828 }
1829
1830 if (!IsLocal) {
1831 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1837 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1838 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1839 }
1840
1841 return Load;
1842}
1843
1844SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1845 SelectionDAG &DAG) const {
1846 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1847 DAG.getTarget().getCodeModel());
1848}
1849
1850SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1851 SelectionDAG &DAG) const {
1852 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1853 DAG.getTarget().getCodeModel());
1854}
1855
1856SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1857 SelectionDAG &DAG) const {
1858 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1859 DAG.getTarget().getCodeModel());
1860}
1861
1862SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1863 SelectionDAG &DAG) const {
1864 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1865 assert(N->getOffset() == 0 && "unexpected offset in global node");
1866 auto CM = DAG.getTarget().getCodeModel();
1867 const GlobalValue *GV = N->getGlobal();
1868
1869 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1870 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1871 CM = *GCM;
1872 }
1873
1874 return getAddr(N, DAG, CM, GV->isDSOLocal());
1875}
1876
1877SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1878 SelectionDAG &DAG,
1879 unsigned Opc, bool UseGOT,
1880 bool Large) const {
1881 SDLoc DL(N);
1882 EVT Ty = getPointerTy(DAG.getDataLayout());
1883 MVT GRLenVT = Subtarget.getGRLenVT();
1884
1885 // This is not actually used, but is necessary for successfully matching the
1886 // PseudoLA_*_LARGE nodes.
1887 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1888 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1889
1890 // Only IE needs an extra argument for large code model.
1891 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
1892 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1893 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1894
1895 // If it is LE for normal/medium code model, the add tp operation will occur
1896 // during the pseudo-instruction expansion.
1897 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
1898 return Offset;
1899
1900 if (UseGOT) {
1901 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1907 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1908 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1909 }
1910
1911 // Add the thread pointer.
1912 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1913 DAG.getRegister(LoongArch::R2, GRLenVT));
1914}
1915
1916SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1917 SelectionDAG &DAG,
1918 unsigned Opc,
1919 bool Large) const {
1920 SDLoc DL(N);
1921 EVT Ty = getPointerTy(DAG.getDataLayout());
1922 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1923
1924 // This is not actually used, but is necessary for successfully matching the
1925 // PseudoLA_*_LARGE nodes.
1926 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1927
1928 // Use a PC-relative addressing mode to access the dynamic GOT address.
1929 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1930 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1931 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1932
1933 // Prepare argument list to generate call.
1935 ArgListEntry Entry;
1936 Entry.Node = Load;
1937 Entry.Ty = CallTy;
1938 Args.push_back(Entry);
1939
1940 // Setup call to __tls_get_addr.
1942 CLI.setDebugLoc(DL)
1943 .setChain(DAG.getEntryNode())
1944 .setLibCallee(CallingConv::C, CallTy,
1945 DAG.getExternalSymbol("__tls_get_addr", Ty),
1946 std::move(Args));
1947
1948 return LowerCallTo(CLI).first;
1949}
1950
1951SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1952 SelectionDAG &DAG, unsigned Opc,
1953 bool Large) const {
1954 SDLoc DL(N);
1955 EVT Ty = getPointerTy(DAG.getDataLayout());
1956 const GlobalValue *GV = N->getGlobal();
1957
1958 // This is not actually used, but is necessary for successfully matching the
1959 // PseudoLA_*_LARGE nodes.
1960 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1961
1962 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1963 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1964 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1965 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1966 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1967}
1968
1969SDValue
1970LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1971 SelectionDAG &DAG) const {
1974 report_fatal_error("In GHC calling convention TLS is not supported");
1975
1976 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1977 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1978
1979 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1980 assert(N->getOffset() == 0 && "unexpected offset in global node");
1981
1982 if (DAG.getTarget().useEmulatedTLS())
1983 report_fatal_error("the emulated TLS is prohibited",
1984 /*GenCrashDiag=*/false);
1985
1986 bool IsDesc = DAG.getTarget().useTLSDESC();
1987
1988 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1990 // In this model, application code calls the dynamic linker function
1991 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1992 // runtime.
1993 if (!IsDesc)
1994 return getDynamicTLSAddr(N, DAG,
1995 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1996 : LoongArch::PseudoLA_TLS_GD,
1997 Large);
1998 break;
2000 // Same as GeneralDynamic, except for assembly modifiers and relocation
2001 // records.
2002 if (!IsDesc)
2003 return getDynamicTLSAddr(N, DAG,
2004 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
2005 : LoongArch::PseudoLA_TLS_LD,
2006 Large);
2007 break;
2009 // This model uses the GOT to resolve TLS offsets.
2010 return getStaticTLSAddr(N, DAG,
2011 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
2012 : LoongArch::PseudoLA_TLS_IE,
2013 /*UseGOT=*/true, Large);
2015 // This model is used when static linking as the TLS offsets are resolved
2016 // during program linking.
2017 //
2018 // This node doesn't need an extra argument for the large code model.
2019 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
2020 /*UseGOT=*/false, Large);
2021 }
2022
2023 return getTLSDescAddr(N, DAG,
2024 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
2025 : LoongArch::PseudoLA_TLS_DESC,
2026 Large);
2027}
2028
2029template <unsigned N>
2031 SelectionDAG &DAG, bool IsSigned = false) {
2032 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2033 // Check the ImmArg.
2034 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2035 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2036 DAG.getContext()->emitError(Op->getOperationName(0) +
2037 ": argument out of range.");
2038 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2039 }
2040 return SDValue();
2041}
2042
2043SDValue
2044LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2045 SelectionDAG &DAG) const {
2046 SDLoc DL(Op);
2047 switch (Op.getConstantOperandVal(0)) {
2048 default:
2049 return SDValue(); // Don't custom lower most intrinsics.
2050 case Intrinsic::thread_pointer: {
2051 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2052 return DAG.getRegister(LoongArch::R2, PtrVT);
2053 }
2054 case Intrinsic::loongarch_lsx_vpickve2gr_d:
2055 case Intrinsic::loongarch_lsx_vpickve2gr_du:
2056 case Intrinsic::loongarch_lsx_vreplvei_d:
2057 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2058 return checkIntrinsicImmArg<1>(Op, 2, DAG);
2059 case Intrinsic::loongarch_lsx_vreplvei_w:
2060 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2061 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2062 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2063 case Intrinsic::loongarch_lasx_xvpickve_d:
2064 case Intrinsic::loongarch_lasx_xvpickve_d_f:
2065 return checkIntrinsicImmArg<2>(Op, 2, DAG);
2066 case Intrinsic::loongarch_lasx_xvinsve0_d:
2067 return checkIntrinsicImmArg<2>(Op, 3, DAG);
2068 case Intrinsic::loongarch_lsx_vsat_b:
2069 case Intrinsic::loongarch_lsx_vsat_bu:
2070 case Intrinsic::loongarch_lsx_vrotri_b:
2071 case Intrinsic::loongarch_lsx_vsllwil_h_b:
2072 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2073 case Intrinsic::loongarch_lsx_vsrlri_b:
2074 case Intrinsic::loongarch_lsx_vsrari_b:
2075 case Intrinsic::loongarch_lsx_vreplvei_h:
2076 case Intrinsic::loongarch_lasx_xvsat_b:
2077 case Intrinsic::loongarch_lasx_xvsat_bu:
2078 case Intrinsic::loongarch_lasx_xvrotri_b:
2079 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2080 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2081 case Intrinsic::loongarch_lasx_xvsrlri_b:
2082 case Intrinsic::loongarch_lasx_xvsrari_b:
2083 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2084 case Intrinsic::loongarch_lasx_xvpickve_w:
2085 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2086 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2087 case Intrinsic::loongarch_lasx_xvinsve0_w:
2088 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2089 case Intrinsic::loongarch_lsx_vsat_h:
2090 case Intrinsic::loongarch_lsx_vsat_hu:
2091 case Intrinsic::loongarch_lsx_vrotri_h:
2092 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2093 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2094 case Intrinsic::loongarch_lsx_vsrlri_h:
2095 case Intrinsic::loongarch_lsx_vsrari_h:
2096 case Intrinsic::loongarch_lsx_vreplvei_b:
2097 case Intrinsic::loongarch_lasx_xvsat_h:
2098 case Intrinsic::loongarch_lasx_xvsat_hu:
2099 case Intrinsic::loongarch_lasx_xvrotri_h:
2100 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2101 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2102 case Intrinsic::loongarch_lasx_xvsrlri_h:
2103 case Intrinsic::loongarch_lasx_xvsrari_h:
2104 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2105 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2106 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2107 case Intrinsic::loongarch_lsx_vsrani_b_h:
2108 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2109 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2110 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2111 case Intrinsic::loongarch_lsx_vssrani_b_h:
2112 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2113 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2114 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2115 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2116 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2117 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2118 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2119 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2120 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2121 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2122 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2123 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2124 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2125 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2126 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2127 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2128 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2129 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2130 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2131 case Intrinsic::loongarch_lsx_vsat_w:
2132 case Intrinsic::loongarch_lsx_vsat_wu:
2133 case Intrinsic::loongarch_lsx_vrotri_w:
2134 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2135 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2136 case Intrinsic::loongarch_lsx_vsrlri_w:
2137 case Intrinsic::loongarch_lsx_vsrari_w:
2138 case Intrinsic::loongarch_lsx_vslei_bu:
2139 case Intrinsic::loongarch_lsx_vslei_hu:
2140 case Intrinsic::loongarch_lsx_vslei_wu:
2141 case Intrinsic::loongarch_lsx_vslei_du:
2142 case Intrinsic::loongarch_lsx_vslti_bu:
2143 case Intrinsic::loongarch_lsx_vslti_hu:
2144 case Intrinsic::loongarch_lsx_vslti_wu:
2145 case Intrinsic::loongarch_lsx_vslti_du:
2146 case Intrinsic::loongarch_lsx_vbsll_v:
2147 case Intrinsic::loongarch_lsx_vbsrl_v:
2148 case Intrinsic::loongarch_lasx_xvsat_w:
2149 case Intrinsic::loongarch_lasx_xvsat_wu:
2150 case Intrinsic::loongarch_lasx_xvrotri_w:
2151 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2152 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2153 case Intrinsic::loongarch_lasx_xvsrlri_w:
2154 case Intrinsic::loongarch_lasx_xvsrari_w:
2155 case Intrinsic::loongarch_lasx_xvslei_bu:
2156 case Intrinsic::loongarch_lasx_xvslei_hu:
2157 case Intrinsic::loongarch_lasx_xvslei_wu:
2158 case Intrinsic::loongarch_lasx_xvslei_du:
2159 case Intrinsic::loongarch_lasx_xvslti_bu:
2160 case Intrinsic::loongarch_lasx_xvslti_hu:
2161 case Intrinsic::loongarch_lasx_xvslti_wu:
2162 case Intrinsic::loongarch_lasx_xvslti_du:
2163 case Intrinsic::loongarch_lasx_xvbsll_v:
2164 case Intrinsic::loongarch_lasx_xvbsrl_v:
2165 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2166 case Intrinsic::loongarch_lsx_vseqi_b:
2167 case Intrinsic::loongarch_lsx_vseqi_h:
2168 case Intrinsic::loongarch_lsx_vseqi_w:
2169 case Intrinsic::loongarch_lsx_vseqi_d:
2170 case Intrinsic::loongarch_lsx_vslei_b:
2171 case Intrinsic::loongarch_lsx_vslei_h:
2172 case Intrinsic::loongarch_lsx_vslei_w:
2173 case Intrinsic::loongarch_lsx_vslei_d:
2174 case Intrinsic::loongarch_lsx_vslti_b:
2175 case Intrinsic::loongarch_lsx_vslti_h:
2176 case Intrinsic::loongarch_lsx_vslti_w:
2177 case Intrinsic::loongarch_lsx_vslti_d:
2178 case Intrinsic::loongarch_lasx_xvseqi_b:
2179 case Intrinsic::loongarch_lasx_xvseqi_h:
2180 case Intrinsic::loongarch_lasx_xvseqi_w:
2181 case Intrinsic::loongarch_lasx_xvseqi_d:
2182 case Intrinsic::loongarch_lasx_xvslei_b:
2183 case Intrinsic::loongarch_lasx_xvslei_h:
2184 case Intrinsic::loongarch_lasx_xvslei_w:
2185 case Intrinsic::loongarch_lasx_xvslei_d:
2186 case Intrinsic::loongarch_lasx_xvslti_b:
2187 case Intrinsic::loongarch_lasx_xvslti_h:
2188 case Intrinsic::loongarch_lasx_xvslti_w:
2189 case Intrinsic::loongarch_lasx_xvslti_d:
2190 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2191 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2192 case Intrinsic::loongarch_lsx_vsrani_h_w:
2193 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2194 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2195 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2196 case Intrinsic::loongarch_lsx_vssrani_h_w:
2197 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2198 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2199 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2200 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2201 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2202 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2203 case Intrinsic::loongarch_lsx_vfrstpi_b:
2204 case Intrinsic::loongarch_lsx_vfrstpi_h:
2205 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2206 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2207 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2208 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2209 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2210 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2211 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2212 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2213 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2214 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2215 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2216 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2217 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2218 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2219 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2220 case Intrinsic::loongarch_lsx_vsat_d:
2221 case Intrinsic::loongarch_lsx_vsat_du:
2222 case Intrinsic::loongarch_lsx_vrotri_d:
2223 case Intrinsic::loongarch_lsx_vsrlri_d:
2224 case Intrinsic::loongarch_lsx_vsrari_d:
2225 case Intrinsic::loongarch_lasx_xvsat_d:
2226 case Intrinsic::loongarch_lasx_xvsat_du:
2227 case Intrinsic::loongarch_lasx_xvrotri_d:
2228 case Intrinsic::loongarch_lasx_xvsrlri_d:
2229 case Intrinsic::loongarch_lasx_xvsrari_d:
2230 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2231 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2232 case Intrinsic::loongarch_lsx_vsrani_w_d:
2233 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2234 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2235 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2236 case Intrinsic::loongarch_lsx_vssrani_w_d:
2237 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2238 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2239 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2240 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2241 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2242 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2243 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2244 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2245 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2246 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2247 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2248 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2249 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2250 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2251 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2252 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2253 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2254 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2255 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2256 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2257 case Intrinsic::loongarch_lsx_vsrani_d_q:
2258 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2259 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2260 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2261 case Intrinsic::loongarch_lsx_vssrani_d_q:
2262 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2263 case Intrinsic::loongarch_lsx_vssrani_du_q:
2264 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2265 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2266 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2267 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2268 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2269 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2270 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2271 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2272 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2273 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2274 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2275 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2276 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2277 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2278 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2279 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2280 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2281 case Intrinsic::loongarch_lsx_vnori_b:
2282 case Intrinsic::loongarch_lsx_vshuf4i_b:
2283 case Intrinsic::loongarch_lsx_vshuf4i_h:
2284 case Intrinsic::loongarch_lsx_vshuf4i_w:
2285 case Intrinsic::loongarch_lasx_xvnori_b:
2286 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2287 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2288 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2289 case Intrinsic::loongarch_lasx_xvpermi_d:
2290 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2291 case Intrinsic::loongarch_lsx_vshuf4i_d:
2292 case Intrinsic::loongarch_lsx_vpermi_w:
2293 case Intrinsic::loongarch_lsx_vbitseli_b:
2294 case Intrinsic::loongarch_lsx_vextrins_b:
2295 case Intrinsic::loongarch_lsx_vextrins_h:
2296 case Intrinsic::loongarch_lsx_vextrins_w:
2297 case Intrinsic::loongarch_lsx_vextrins_d:
2298 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2299 case Intrinsic::loongarch_lasx_xvpermi_w:
2300 case Intrinsic::loongarch_lasx_xvpermi_q:
2301 case Intrinsic::loongarch_lasx_xvbitseli_b:
2302 case Intrinsic::loongarch_lasx_xvextrins_b:
2303 case Intrinsic::loongarch_lasx_xvextrins_h:
2304 case Intrinsic::loongarch_lasx_xvextrins_w:
2305 case Intrinsic::loongarch_lasx_xvextrins_d:
2306 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2307 case Intrinsic::loongarch_lsx_vrepli_b:
2308 case Intrinsic::loongarch_lsx_vrepli_h:
2309 case Intrinsic::loongarch_lsx_vrepli_w:
2310 case Intrinsic::loongarch_lsx_vrepli_d:
2311 case Intrinsic::loongarch_lasx_xvrepli_b:
2312 case Intrinsic::loongarch_lasx_xvrepli_h:
2313 case Intrinsic::loongarch_lasx_xvrepli_w:
2314 case Intrinsic::loongarch_lasx_xvrepli_d:
2315 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2316 case Intrinsic::loongarch_lsx_vldi:
2317 case Intrinsic::loongarch_lasx_xvldi:
2318 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2319 }
2320}
2321
2322// Helper function that emits error message for intrinsics with chain and return
2323// merge values of a UNDEF and the chain.
2325 StringRef ErrorMsg,
2326 SelectionDAG &DAG) {
2327 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2328 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2329 SDLoc(Op));
2330}
2331
2332SDValue
2333LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2334 SelectionDAG &DAG) const {
2335 SDLoc DL(Op);
2336 MVT GRLenVT = Subtarget.getGRLenVT();
2337 EVT VT = Op.getValueType();
2338 SDValue Chain = Op.getOperand(0);
2339 const StringRef ErrorMsgOOR = "argument out of range";
2340 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2341 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2342
2343 switch (Op.getConstantOperandVal(1)) {
2344 default:
2345 return Op;
2346 case Intrinsic::loongarch_crc_w_b_w:
2347 case Intrinsic::loongarch_crc_w_h_w:
2348 case Intrinsic::loongarch_crc_w_w_w:
2349 case Intrinsic::loongarch_crc_w_d_w:
2350 case Intrinsic::loongarch_crcc_w_b_w:
2351 case Intrinsic::loongarch_crcc_w_h_w:
2352 case Intrinsic::loongarch_crcc_w_w_w:
2353 case Intrinsic::loongarch_crcc_w_d_w:
2354 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2355 case Intrinsic::loongarch_csrrd_w:
2356 case Intrinsic::loongarch_csrrd_d: {
2357 unsigned Imm = Op.getConstantOperandVal(2);
2358 return !isUInt<14>(Imm)
2359 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2360 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2361 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2362 }
2363 case Intrinsic::loongarch_csrwr_w:
2364 case Intrinsic::loongarch_csrwr_d: {
2365 unsigned Imm = Op.getConstantOperandVal(3);
2366 return !isUInt<14>(Imm)
2367 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2368 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2369 {Chain, Op.getOperand(2),
2370 DAG.getConstant(Imm, DL, GRLenVT)});
2371 }
2372 case Intrinsic::loongarch_csrxchg_w:
2373 case Intrinsic::loongarch_csrxchg_d: {
2374 unsigned Imm = Op.getConstantOperandVal(4);
2375 return !isUInt<14>(Imm)
2376 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2377 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2378 {Chain, Op.getOperand(2), Op.getOperand(3),
2379 DAG.getConstant(Imm, DL, GRLenVT)});
2380 }
2381 case Intrinsic::loongarch_iocsrrd_d: {
2382 return DAG.getNode(
2383 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2384 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2385 }
2386#define IOCSRRD_CASE(NAME, NODE) \
2387 case Intrinsic::loongarch_##NAME: { \
2388 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2389 {Chain, Op.getOperand(2)}); \
2390 }
2391 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2392 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2393 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2394#undef IOCSRRD_CASE
2395 case Intrinsic::loongarch_cpucfg: {
2396 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2397 {Chain, Op.getOperand(2)});
2398 }
2399 case Intrinsic::loongarch_lddir_d: {
2400 unsigned Imm = Op.getConstantOperandVal(3);
2401 return !isUInt<8>(Imm)
2402 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2403 : Op;
2404 }
2405 case Intrinsic::loongarch_movfcsr2gr: {
2406 if (!Subtarget.hasBasicF())
2407 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2408 unsigned Imm = Op.getConstantOperandVal(2);
2409 return !isUInt<2>(Imm)
2410 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2411 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2412 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2413 }
2414 case Intrinsic::loongarch_lsx_vld:
2415 case Intrinsic::loongarch_lsx_vldrepl_b:
2416 case Intrinsic::loongarch_lasx_xvld:
2417 case Intrinsic::loongarch_lasx_xvldrepl_b:
2418 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2419 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2420 : SDValue();
2421 case Intrinsic::loongarch_lsx_vldrepl_h:
2422 case Intrinsic::loongarch_lasx_xvldrepl_h:
2423 return !isShiftedInt<11, 1>(
2424 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2426 Op, "argument out of range or not a multiple of 2", DAG)
2427 : SDValue();
2428 case Intrinsic::loongarch_lsx_vldrepl_w:
2429 case Intrinsic::loongarch_lasx_xvldrepl_w:
2430 return !isShiftedInt<10, 2>(
2431 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2433 Op, "argument out of range or not a multiple of 4", DAG)
2434 : SDValue();
2435 case Intrinsic::loongarch_lsx_vldrepl_d:
2436 case Intrinsic::loongarch_lasx_xvldrepl_d:
2437 return !isShiftedInt<9, 3>(
2438 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2440 Op, "argument out of range or not a multiple of 8", DAG)
2441 : SDValue();
2442 }
2443}
2444
2445// Helper function that emits error message for intrinsics with void return
2446// value and return the chain.
2448 SelectionDAG &DAG) {
2449
2450 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2451 return Op.getOperand(0);
2452}
2453
2454SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2455 SelectionDAG &DAG) const {
2456 SDLoc DL(Op);
2457 MVT GRLenVT = Subtarget.getGRLenVT();
2458 SDValue Chain = Op.getOperand(0);
2459 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2460 SDValue Op2 = Op.getOperand(2);
2461 const StringRef ErrorMsgOOR = "argument out of range";
2462 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2463 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2464 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2465
2466 switch (IntrinsicEnum) {
2467 default:
2468 // TODO: Add more Intrinsics.
2469 return SDValue();
2470 case Intrinsic::loongarch_cacop_d:
2471 case Intrinsic::loongarch_cacop_w: {
2472 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2473 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2474 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2475 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2476 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2477 unsigned Imm1 = Op2->getAsZExtVal();
2478 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2479 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2480 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2481 return Op;
2482 }
2483 case Intrinsic::loongarch_dbar: {
2484 unsigned Imm = Op2->getAsZExtVal();
2485 return !isUInt<15>(Imm)
2486 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2487 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2488 DAG.getConstant(Imm, DL, GRLenVT));
2489 }
2490 case Intrinsic::loongarch_ibar: {
2491 unsigned Imm = Op2->getAsZExtVal();
2492 return !isUInt<15>(Imm)
2493 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2494 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2495 DAG.getConstant(Imm, DL, GRLenVT));
2496 }
2497 case Intrinsic::loongarch_break: {
2498 unsigned Imm = Op2->getAsZExtVal();
2499 return !isUInt<15>(Imm)
2500 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2501 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2502 DAG.getConstant(Imm, DL, GRLenVT));
2503 }
2504 case Intrinsic::loongarch_movgr2fcsr: {
2505 if (!Subtarget.hasBasicF())
2506 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2507 unsigned Imm = Op2->getAsZExtVal();
2508 return !isUInt<2>(Imm)
2509 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2510 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2511 DAG.getConstant(Imm, DL, GRLenVT),
2512 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2513 Op.getOperand(3)));
2514 }
2515 case Intrinsic::loongarch_syscall: {
2516 unsigned Imm = Op2->getAsZExtVal();
2517 return !isUInt<15>(Imm)
2518 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2519 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2520 DAG.getConstant(Imm, DL, GRLenVT));
2521 }
2522#define IOCSRWR_CASE(NAME, NODE) \
2523 case Intrinsic::loongarch_##NAME: { \
2524 SDValue Op3 = Op.getOperand(3); \
2525 return Subtarget.is64Bit() \
2526 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2527 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2528 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2529 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2530 Op3); \
2531 }
2532 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2533 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2534 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2535#undef IOCSRWR_CASE
2536 case Intrinsic::loongarch_iocsrwr_d: {
2537 return !Subtarget.is64Bit()
2538 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2539 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2540 Op2,
2541 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2542 Op.getOperand(3)));
2543 }
2544#define ASRT_LE_GT_CASE(NAME) \
2545 case Intrinsic::loongarch_##NAME: { \
2546 return !Subtarget.is64Bit() \
2547 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2548 : Op; \
2549 }
2550 ASRT_LE_GT_CASE(asrtle_d)
2551 ASRT_LE_GT_CASE(asrtgt_d)
2552#undef ASRT_LE_GT_CASE
2553 case Intrinsic::loongarch_ldpte_d: {
2554 unsigned Imm = Op.getConstantOperandVal(3);
2555 return !Subtarget.is64Bit()
2556 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2557 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2558 : Op;
2559 }
2560 case Intrinsic::loongarch_lsx_vst:
2561 case Intrinsic::loongarch_lasx_xvst:
2562 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2563 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2564 : SDValue();
2565 case Intrinsic::loongarch_lasx_xvstelm_b:
2566 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2567 !isUInt<5>(Op.getConstantOperandVal(5)))
2568 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2569 : SDValue();
2570 case Intrinsic::loongarch_lsx_vstelm_b:
2571 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2572 !isUInt<4>(Op.getConstantOperandVal(5)))
2573 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2574 : SDValue();
2575 case Intrinsic::loongarch_lasx_xvstelm_h:
2576 return (!isShiftedInt<8, 1>(
2577 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2578 !isUInt<4>(Op.getConstantOperandVal(5)))
2580 Op, "argument out of range or not a multiple of 2", DAG)
2581 : SDValue();
2582 case Intrinsic::loongarch_lsx_vstelm_h:
2583 return (!isShiftedInt<8, 1>(
2584 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2585 !isUInt<3>(Op.getConstantOperandVal(5)))
2587 Op, "argument out of range or not a multiple of 2", DAG)
2588 : SDValue();
2589 case Intrinsic::loongarch_lasx_xvstelm_w:
2590 return (!isShiftedInt<8, 2>(
2591 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2592 !isUInt<3>(Op.getConstantOperandVal(5)))
2594 Op, "argument out of range or not a multiple of 4", DAG)
2595 : SDValue();
2596 case Intrinsic::loongarch_lsx_vstelm_w:
2597 return (!isShiftedInt<8, 2>(
2598 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2599 !isUInt<2>(Op.getConstantOperandVal(5)))
2601 Op, "argument out of range or not a multiple of 4", DAG)
2602 : SDValue();
2603 case Intrinsic::loongarch_lasx_xvstelm_d:
2604 return (!isShiftedInt<8, 3>(
2605 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2606 !isUInt<2>(Op.getConstantOperandVal(5)))
2608 Op, "argument out of range or not a multiple of 8", DAG)
2609 : SDValue();
2610 case Intrinsic::loongarch_lsx_vstelm_d:
2611 return (!isShiftedInt<8, 3>(
2612 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2613 !isUInt<1>(Op.getConstantOperandVal(5)))
2615 Op, "argument out of range or not a multiple of 8", DAG)
2616 : SDValue();
2617 }
2618}
2619
2620SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2621 SelectionDAG &DAG) const {
2622 SDLoc DL(Op);
2623 SDValue Lo = Op.getOperand(0);
2624 SDValue Hi = Op.getOperand(1);
2625 SDValue Shamt = Op.getOperand(2);
2626 EVT VT = Lo.getValueType();
2627
2628 // if Shamt-GRLen < 0: // Shamt < GRLen
2629 // Lo = Lo << Shamt
2630 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2631 // else:
2632 // Lo = 0
2633 // Hi = Lo << (Shamt-GRLen)
2634
2635 SDValue Zero = DAG.getConstant(0, DL, VT);
2636 SDValue One = DAG.getConstant(1, DL, VT);
2637 SDValue MinusGRLen =
2638 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2639 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2640 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2641 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2642
2643 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2644 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2645 SDValue ShiftRightLo =
2646 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2647 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2648 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2649 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2650
2651 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2652
2653 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2654 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2655
2656 SDValue Parts[2] = {Lo, Hi};
2657 return DAG.getMergeValues(Parts, DL);
2658}
2659
2660SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2661 SelectionDAG &DAG,
2662 bool IsSRA) const {
2663 SDLoc DL(Op);
2664 SDValue Lo = Op.getOperand(0);
2665 SDValue Hi = Op.getOperand(1);
2666 SDValue Shamt = Op.getOperand(2);
2667 EVT VT = Lo.getValueType();
2668
2669 // SRA expansion:
2670 // if Shamt-GRLen < 0: // Shamt < GRLen
2671 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2672 // Hi = Hi >>s Shamt
2673 // else:
2674 // Lo = Hi >>s (Shamt-GRLen);
2675 // Hi = Hi >>s (GRLen-1)
2676 //
2677 // SRL expansion:
2678 // if Shamt-GRLen < 0: // Shamt < GRLen
2679 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2680 // Hi = Hi >>u Shamt
2681 // else:
2682 // Lo = Hi >>u (Shamt-GRLen);
2683 // Hi = 0;
2684
2685 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2686
2687 SDValue Zero = DAG.getConstant(0, DL, VT);
2688 SDValue One = DAG.getConstant(1, DL, VT);
2689 SDValue MinusGRLen =
2690 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2691 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2692 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2693 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2694
2695 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2696 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2697 SDValue ShiftLeftHi =
2698 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2699 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2700 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2701 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2702 SDValue HiFalse =
2703 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2704
2705 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2706
2707 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2708 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2709
2710 SDValue Parts[2] = {Lo, Hi};
2711 return DAG.getMergeValues(Parts, DL);
2712}
2713
2714// Returns the opcode of the target-specific SDNode that implements the 32-bit
2715// form of the given Opcode.
2717 switch (Opcode) {
2718 default:
2719 llvm_unreachable("Unexpected opcode");
2720 case ISD::SDIV:
2721 return LoongArchISD::DIV_W;
2722 case ISD::UDIV:
2723 return LoongArchISD::DIV_WU;
2724 case ISD::SREM:
2725 return LoongArchISD::MOD_W;
2726 case ISD::UREM:
2727 return LoongArchISD::MOD_WU;
2728 case ISD::SHL:
2729 return LoongArchISD::SLL_W;
2730 case ISD::SRA:
2731 return LoongArchISD::SRA_W;
2732 case ISD::SRL:
2733 return LoongArchISD::SRL_W;
2734 case ISD::ROTL:
2735 case ISD::ROTR:
2736 return LoongArchISD::ROTR_W;
2737 case ISD::CTTZ:
2738 return LoongArchISD::CTZ_W;
2739 case ISD::CTLZ:
2740 return LoongArchISD::CLZ_W;
2741 }
2742}
2743
2744// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2745// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2746// otherwise be promoted to i64, making it difficult to select the
2747// SLL_W/.../*W later one because the fact the operation was originally of
2748// type i8/i16/i32 is lost.
2750 unsigned ExtOpc = ISD::ANY_EXTEND) {
2751 SDLoc DL(N);
2752 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2753 SDValue NewOp0, NewRes;
2754
2755 switch (NumOp) {
2756 default:
2757 llvm_unreachable("Unexpected NumOp");
2758 case 1: {
2759 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2760 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2761 break;
2762 }
2763 case 2: {
2764 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2765 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2766 if (N->getOpcode() == ISD::ROTL) {
2767 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2768 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2769 }
2770 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2771 break;
2772 }
2773 // TODO:Handle more NumOp.
2774 }
2775
2776 // ReplaceNodeResults requires we maintain the same type for the return
2777 // value.
2778 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2779}
2780
2781// Converts the given 32-bit operation to a i64 operation with signed extension
2782// semantic to reduce the signed extension instructions.
2784 SDLoc DL(N);
2785 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2786 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2787 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2788 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2789 DAG.getValueType(MVT::i32));
2790 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2791}
2792
2793// Helper function that emits error message for intrinsics with/without chain
2794// and return a UNDEF or and the chain as the results.
2797 StringRef ErrorMsg, bool WithChain = true) {
2798 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2799 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2800 if (!WithChain)
2801 return;
2802 Results.push_back(N->getOperand(0));
2803}
2804
2805template <unsigned N>
2806static void
2808 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2809 unsigned ResOp) {
2810 const StringRef ErrorMsgOOR = "argument out of range";
2811 unsigned Imm = Node->getConstantOperandVal(2);
2812 if (!isUInt<N>(Imm)) {
2814 /*WithChain=*/false);
2815 return;
2816 }
2817 SDLoc DL(Node);
2818 SDValue Vec = Node->getOperand(1);
2819
2820 SDValue PickElt =
2821 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2822 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2824 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2825 PickElt.getValue(0)));
2826}
2827
2830 SelectionDAG &DAG,
2831 const LoongArchSubtarget &Subtarget,
2832 unsigned ResOp) {
2833 SDLoc DL(N);
2834 SDValue Vec = N->getOperand(1);
2835
2836 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2837 Results.push_back(
2838 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2839}
2840
2841static void
2843 SelectionDAG &DAG,
2844 const LoongArchSubtarget &Subtarget) {
2845 switch (N->getConstantOperandVal(0)) {
2846 default:
2847 llvm_unreachable("Unexpected Intrinsic.");
2848 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2849 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2851 break;
2852 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2853 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2854 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2856 break;
2857 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2858 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2860 break;
2861 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2862 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2864 break;
2865 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2866 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2867 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2869 break;
2870 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2871 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2873 break;
2874 case Intrinsic::loongarch_lsx_bz_b:
2875 case Intrinsic::loongarch_lsx_bz_h:
2876 case Intrinsic::loongarch_lsx_bz_w:
2877 case Intrinsic::loongarch_lsx_bz_d:
2878 case Intrinsic::loongarch_lasx_xbz_b:
2879 case Intrinsic::loongarch_lasx_xbz_h:
2880 case Intrinsic::loongarch_lasx_xbz_w:
2881 case Intrinsic::loongarch_lasx_xbz_d:
2882 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2884 break;
2885 case Intrinsic::loongarch_lsx_bz_v:
2886 case Intrinsic::loongarch_lasx_xbz_v:
2887 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2889 break;
2890 case Intrinsic::loongarch_lsx_bnz_b:
2891 case Intrinsic::loongarch_lsx_bnz_h:
2892 case Intrinsic::loongarch_lsx_bnz_w:
2893 case Intrinsic::loongarch_lsx_bnz_d:
2894 case Intrinsic::loongarch_lasx_xbnz_b:
2895 case Intrinsic::loongarch_lasx_xbnz_h:
2896 case Intrinsic::loongarch_lasx_xbnz_w:
2897 case Intrinsic::loongarch_lasx_xbnz_d:
2898 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2900 break;
2901 case Intrinsic::loongarch_lsx_bnz_v:
2902 case Intrinsic::loongarch_lasx_xbnz_v:
2903 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2905 break;
2906 }
2907}
2908
2911 SDLoc DL(N);
2912 EVT VT = N->getValueType(0);
2913 switch (N->getOpcode()) {
2914 default:
2915 llvm_unreachable("Don't know how to legalize this operation");
2916 case ISD::ADD:
2917 case ISD::SUB:
2918 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2919 "Unexpected custom legalisation");
2920 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2921 break;
2922 case ISD::SDIV:
2923 case ISD::UDIV:
2924 case ISD::SREM:
2925 case ISD::UREM:
2926 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2927 "Unexpected custom legalisation");
2928 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2929 Subtarget.hasDiv32() && VT == MVT::i32
2931 : ISD::SIGN_EXTEND));
2932 break;
2933 case ISD::SHL:
2934 case ISD::SRA:
2935 case ISD::SRL:
2936 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2937 "Unexpected custom legalisation");
2938 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2939 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2940 break;
2941 }
2942 break;
2943 case ISD::ROTL:
2944 case ISD::ROTR:
2945 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2946 "Unexpected custom legalisation");
2947 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2948 break;
2949 case ISD::FP_TO_SINT: {
2950 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2951 "Unexpected custom legalisation");
2952 SDValue Src = N->getOperand(0);
2953 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2954 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2956 if (Src.getValueType() == MVT::f16)
2957 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2958 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2959 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2960 return;
2961 }
2962 // If the FP type needs to be softened, emit a library call using the 'si'
2963 // version. If we left it to default legalization we'd end up with 'di'.
2964 RTLIB::Libcall LC;
2965 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2966 MakeLibCallOptions CallOptions;
2967 EVT OpVT = Src.getValueType();
2968 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2969 SDValue Chain = SDValue();
2970 SDValue Result;
2971 std::tie(Result, Chain) =
2972 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2973 Results.push_back(Result);
2974 break;
2975 }
2976 case ISD::BITCAST: {
2977 SDValue Src = N->getOperand(0);
2978 EVT SrcVT = Src.getValueType();
2979 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2980 Subtarget.hasBasicF()) {
2981 SDValue Dst =
2982 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2983 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2984 }
2985 break;
2986 }
2987 case ISD::FP_TO_UINT: {
2988 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2989 "Unexpected custom legalisation");
2990 auto &TLI = DAG.getTargetLoweringInfo();
2991 SDValue Tmp1, Tmp2;
2992 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2993 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2994 break;
2995 }
2996 case ISD::BSWAP: {
2997 SDValue Src = N->getOperand(0);
2998 assert((VT == MVT::i16 || VT == MVT::i32) &&
2999 "Unexpected custom legalization");
3000 MVT GRLenVT = Subtarget.getGRLenVT();
3001 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3002 SDValue Tmp;
3003 switch (VT.getSizeInBits()) {
3004 default:
3005 llvm_unreachable("Unexpected operand width");
3006 case 16:
3007 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
3008 break;
3009 case 32:
3010 // Only LA64 will get to here due to the size mismatch between VT and
3011 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
3012 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
3013 break;
3014 }
3015 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3016 break;
3017 }
3018 case ISD::BITREVERSE: {
3019 SDValue Src = N->getOperand(0);
3020 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
3021 "Unexpected custom legalization");
3022 MVT GRLenVT = Subtarget.getGRLenVT();
3023 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3024 SDValue Tmp;
3025 switch (VT.getSizeInBits()) {
3026 default:
3027 llvm_unreachable("Unexpected operand width");
3028 case 8:
3029 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3030 break;
3031 case 32:
3032 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3033 break;
3034 }
3035 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3036 break;
3037 }
3038 case ISD::CTLZ:
3039 case ISD::CTTZ: {
3040 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3041 "Unexpected custom legalisation");
3042 Results.push_back(customLegalizeToWOp(N, DAG, 1));
3043 break;
3044 }
3046 SDValue Chain = N->getOperand(0);
3047 SDValue Op2 = N->getOperand(2);
3048 MVT GRLenVT = Subtarget.getGRLenVT();
3049 const StringRef ErrorMsgOOR = "argument out of range";
3050 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3051 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3052
3053 switch (N->getConstantOperandVal(1)) {
3054 default:
3055 llvm_unreachable("Unexpected Intrinsic.");
3056 case Intrinsic::loongarch_movfcsr2gr: {
3057 if (!Subtarget.hasBasicF()) {
3058 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3059 return;
3060 }
3061 unsigned Imm = Op2->getAsZExtVal();
3062 if (!isUInt<2>(Imm)) {
3063 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3064 return;
3065 }
3066 SDValue MOVFCSR2GRResults = DAG.getNode(
3067 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3068 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3069 Results.push_back(
3070 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3071 Results.push_back(MOVFCSR2GRResults.getValue(1));
3072 break;
3073 }
3074#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
3075 case Intrinsic::loongarch_##NAME: { \
3076 SDValue NODE = DAG.getNode( \
3077 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3078 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3079 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3080 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3081 Results.push_back(NODE.getValue(1)); \
3082 break; \
3083 }
3084 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3085 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3086 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3087 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3088 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3089 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3090#undef CRC_CASE_EXT_BINARYOP
3091
3092#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3093 case Intrinsic::loongarch_##NAME: { \
3094 SDValue NODE = DAG.getNode( \
3095 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3096 {Chain, Op2, \
3097 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3098 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3099 Results.push_back(NODE.getValue(1)); \
3100 break; \
3101 }
3102 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3103 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3104#undef CRC_CASE_EXT_UNARYOP
3105#define CSR_CASE(ID) \
3106 case Intrinsic::loongarch_##ID: { \
3107 if (!Subtarget.is64Bit()) \
3108 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3109 break; \
3110 }
3111 CSR_CASE(csrrd_d);
3112 CSR_CASE(csrwr_d);
3113 CSR_CASE(csrxchg_d);
3114 CSR_CASE(iocsrrd_d);
3115#undef CSR_CASE
3116 case Intrinsic::loongarch_csrrd_w: {
3117 unsigned Imm = Op2->getAsZExtVal();
3118 if (!isUInt<14>(Imm)) {
3119 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3120 return;
3121 }
3122 SDValue CSRRDResults =
3123 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3124 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3125 Results.push_back(
3126 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3127 Results.push_back(CSRRDResults.getValue(1));
3128 break;
3129 }
3130 case Intrinsic::loongarch_csrwr_w: {
3131 unsigned Imm = N->getConstantOperandVal(3);
3132 if (!isUInt<14>(Imm)) {
3133 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3134 return;
3135 }
3136 SDValue CSRWRResults =
3137 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3138 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3139 DAG.getConstant(Imm, DL, GRLenVT)});
3140 Results.push_back(
3141 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3142 Results.push_back(CSRWRResults.getValue(1));
3143 break;
3144 }
3145 case Intrinsic::loongarch_csrxchg_w: {
3146 unsigned Imm = N->getConstantOperandVal(4);
3147 if (!isUInt<14>(Imm)) {
3148 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3149 return;
3150 }
3151 SDValue CSRXCHGResults = DAG.getNode(
3152 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3153 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3154 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3155 DAG.getConstant(Imm, DL, GRLenVT)});
3156 Results.push_back(
3157 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3158 Results.push_back(CSRXCHGResults.getValue(1));
3159 break;
3160 }
3161#define IOCSRRD_CASE(NAME, NODE) \
3162 case Intrinsic::loongarch_##NAME: { \
3163 SDValue IOCSRRDResults = \
3164 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3165 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3166 Results.push_back( \
3167 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3168 Results.push_back(IOCSRRDResults.getValue(1)); \
3169 break; \
3170 }
3171 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3172 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3173 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3174#undef IOCSRRD_CASE
3175 case Intrinsic::loongarch_cpucfg: {
3176 SDValue CPUCFGResults =
3177 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3178 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3179 Results.push_back(
3180 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3181 Results.push_back(CPUCFGResults.getValue(1));
3182 break;
3183 }
3184 case Intrinsic::loongarch_lddir_d: {
3185 if (!Subtarget.is64Bit()) {
3186 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3187 return;
3188 }
3189 break;
3190 }
3191 }
3192 break;
3193 }
3194 case ISD::READ_REGISTER: {
3195 if (Subtarget.is64Bit())
3196 DAG.getContext()->emitError(
3197 "On LA64, only 64-bit registers can be read.");
3198 else
3199 DAG.getContext()->emitError(
3200 "On LA32, only 32-bit registers can be read.");
3201 Results.push_back(DAG.getUNDEF(VT));
3202 Results.push_back(N->getOperand(0));
3203 break;
3204 }
3206 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3207 break;
3208 }
3209 case ISD::LROUND: {
3210 SDValue Op0 = N->getOperand(0);
3211 EVT OpVT = Op0.getValueType();
3212 RTLIB::Libcall LC =
3213 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3214 MakeLibCallOptions CallOptions;
3215 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3216 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3217 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3218 Results.push_back(Result);
3219 break;
3220 }
3221 }
3222}
3223
3226 const LoongArchSubtarget &Subtarget) {
3227 if (DCI.isBeforeLegalizeOps())
3228 return SDValue();
3229
3230 SDValue FirstOperand = N->getOperand(0);
3231 SDValue SecondOperand = N->getOperand(1);
3232 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3233 EVT ValTy = N->getValueType(0);
3234 SDLoc DL(N);
3235 uint64_t lsb, msb;
3236 unsigned SMIdx, SMLen;
3237 ConstantSDNode *CN;
3238 SDValue NewOperand;
3239 MVT GRLenVT = Subtarget.getGRLenVT();
3240
3241 // Op's second operand must be a shifted mask.
3242 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3243 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3244 return SDValue();
3245
3246 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3247 // Pattern match BSTRPICK.
3248 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3249 // => BSTRPICK $dst, $src, msb, lsb
3250 // where msb = lsb + len - 1
3251
3252 // The second operand of the shift must be an immediate.
3253 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3254 return SDValue();
3255
3256 lsb = CN->getZExtValue();
3257
3258 // Return if the shifted mask does not start at bit 0 or the sum of its
3259 // length and lsb exceeds the word's size.
3260 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3261 return SDValue();
3262
3263 NewOperand = FirstOperand.getOperand(0);
3264 } else {
3265 // Pattern match BSTRPICK.
3266 // $dst = and $src, (2**len- 1) , if len > 12
3267 // => BSTRPICK $dst, $src, msb, lsb
3268 // where lsb = 0 and msb = len - 1
3269
3270 // If the mask is <= 0xfff, andi can be used instead.
3271 if (CN->getZExtValue() <= 0xfff)
3272 return SDValue();
3273
3274 // Return if the MSB exceeds.
3275 if (SMIdx + SMLen > ValTy.getSizeInBits())
3276 return SDValue();
3277
3278 if (SMIdx > 0) {
3279 // Omit if the constant has more than 2 uses. This a conservative
3280 // decision. Whether it is a win depends on the HW microarchitecture.
3281 // However it should always be better for 1 and 2 uses.
3282 if (CN->use_size() > 2)
3283 return SDValue();
3284 // Return if the constant can be composed by a single LU12I.W.
3285 if ((CN->getZExtValue() & 0xfff) == 0)
3286 return SDValue();
3287 // Return if the constand can be composed by a single ADDI with
3288 // the zero register.
3289 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3290 return SDValue();
3291 }
3292
3293 lsb = SMIdx;
3294 NewOperand = FirstOperand;
3295 }
3296
3297 msb = lsb + SMLen - 1;
3298 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3299 DAG.getConstant(msb, DL, GRLenVT),
3300 DAG.getConstant(lsb, DL, GRLenVT));
3301 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3302 return NR0;
3303 // Try to optimize to
3304 // bstrpick $Rd, $Rs, msb, lsb
3305 // slli $Rd, $Rd, lsb
3306 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3307 DAG.getConstant(lsb, DL, GRLenVT));
3308}
3309
3312 const LoongArchSubtarget &Subtarget) {
3313 if (DCI.isBeforeLegalizeOps())
3314 return SDValue();
3315
3316 // $dst = srl (and $src, Mask), Shamt
3317 // =>
3318 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3319 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3320 //
3321
3322 SDValue FirstOperand = N->getOperand(0);
3323 ConstantSDNode *CN;
3324 EVT ValTy = N->getValueType(0);
3325 SDLoc DL(N);
3326 MVT GRLenVT = Subtarget.getGRLenVT();
3327 unsigned MaskIdx, MaskLen;
3328 uint64_t Shamt;
3329
3330 // The first operand must be an AND and the second operand of the AND must be
3331 // a shifted mask.
3332 if (FirstOperand.getOpcode() != ISD::AND ||
3333 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3334 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3335 return SDValue();
3336
3337 // The second operand (shift amount) must be an immediate.
3338 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3339 return SDValue();
3340
3341 Shamt = CN->getZExtValue();
3342 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3343 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3344 FirstOperand->getOperand(0),
3345 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3346 DAG.getConstant(Shamt, DL, GRLenVT));
3347
3348 return SDValue();
3349}
3350
3353 const LoongArchSubtarget &Subtarget) {
3354 MVT GRLenVT = Subtarget.getGRLenVT();
3355 EVT ValTy = N->getValueType(0);
3356 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3357 ConstantSDNode *CN0, *CN1;
3358 SDLoc DL(N);
3359 unsigned ValBits = ValTy.getSizeInBits();
3360 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3361 unsigned Shamt;
3362 bool SwapAndRetried = false;
3363
3364 if (DCI.isBeforeLegalizeOps())
3365 return SDValue();
3366
3367 if (ValBits != 32 && ValBits != 64)
3368 return SDValue();
3369
3370Retry:
3371 // 1st pattern to match BSTRINS:
3372 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3373 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3374 // =>
3375 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3376 if (N0.getOpcode() == ISD::AND &&
3377 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3378 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3379 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3380 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3381 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3382 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3383 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3384 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3385 (MaskIdx0 + MaskLen0 <= ValBits)) {
3386 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3387 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3388 N1.getOperand(0).getOperand(0),
3389 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3390 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3391 }
3392
3393 // 2nd pattern to match BSTRINS:
3394 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3395 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3396 // =>
3397 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3398 if (N0.getOpcode() == ISD::AND &&
3399 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3400 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3401 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3402 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3403 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3404 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3405 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3406 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3407 (MaskIdx0 + MaskLen0 <= ValBits)) {
3408 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3409 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3410 N1.getOperand(0).getOperand(0),
3411 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3412 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3413 }
3414
3415 // 3rd pattern to match BSTRINS:
3416 // R = or (and X, mask0), (and Y, mask1)
3417 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3418 // =>
3419 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3420 // where msb = lsb + size - 1
3421 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3422 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3423 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3424 (MaskIdx0 + MaskLen0 <= 64) &&
3425 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3426 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3427 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3428 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3429 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3430 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3431 DAG.getConstant(ValBits == 32
3432 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3433 : (MaskIdx0 + MaskLen0 - 1),
3434 DL, GRLenVT),
3435 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3436 }
3437
3438 // 4th pattern to match BSTRINS:
3439 // R = or (and X, mask), (shl Y, shamt)
3440 // where mask = (2**shamt - 1)
3441 // =>
3442 // R = BSTRINS X, Y, ValBits - 1, shamt
3443 // where ValBits = 32 or 64
3444 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3445 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3446 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3447 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3448 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3449 (MaskIdx0 + MaskLen0 <= ValBits)) {
3450 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3451 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3452 N1.getOperand(0),
3453 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3454 DAG.getConstant(Shamt, DL, GRLenVT));
3455 }
3456
3457 // 5th pattern to match BSTRINS:
3458 // R = or (and X, mask), const
3459 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3460 // =>
3461 // R = BSTRINS X, (const >> lsb), msb, lsb
3462 // where msb = lsb + size - 1
3463 if (N0.getOpcode() == ISD::AND &&
3464 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3465 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3466 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3467 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3468 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3469 return DAG.getNode(
3470 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3471 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3472 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3473 : (MaskIdx0 + MaskLen0 - 1),
3474 DL, GRLenVT),
3475 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3476 }
3477
3478 // 6th pattern.
3479 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3480 // by the incoming bits are known to be zero.
3481 // =>
3482 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3483 //
3484 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3485 // pattern is more common than the 1st. So we put the 1st before the 6th in
3486 // order to match as many nodes as possible.
3487 ConstantSDNode *CNMask, *CNShamt;
3488 unsigned MaskIdx, MaskLen;
3489 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3490 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3491 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3492 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3493 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3494 Shamt = CNShamt->getZExtValue();
3495 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3496 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3497 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3498 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3499 N1.getOperand(0).getOperand(0),
3500 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3501 DAG.getConstant(Shamt, DL, GRLenVT));
3502 }
3503 }
3504
3505 // 7th pattern.
3506 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3507 // overwritten by the incoming bits are known to be zero.
3508 // =>
3509 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3510 //
3511 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3512 // before the 7th in order to match as many nodes as possible.
3513 if (N1.getOpcode() == ISD::AND &&
3514 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3515 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3516 N1.getOperand(0).getOpcode() == ISD::SHL &&
3517 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3518 CNShamt->getZExtValue() == MaskIdx) {
3519 APInt ShMask(ValBits, CNMask->getZExtValue());
3520 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3521 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3522 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3523 N1.getOperand(0).getOperand(0),
3524 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3525 DAG.getConstant(MaskIdx, DL, GRLenVT));
3526 }
3527 }
3528
3529 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3530 if (!SwapAndRetried) {
3531 std::swap(N0, N1);
3532 SwapAndRetried = true;
3533 goto Retry;
3534 }
3535
3536 SwapAndRetried = false;
3537Retry2:
3538 // 8th pattern.
3539 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3540 // the incoming bits are known to be zero.
3541 // =>
3542 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3543 //
3544 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3545 // we put it here in order to match as many nodes as possible or generate less
3546 // instructions.
3547 if (N1.getOpcode() == ISD::AND &&
3548 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3549 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3550 APInt ShMask(ValBits, CNMask->getZExtValue());
3551 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3552 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3553 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3554 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3555 N1->getOperand(0),
3556 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3557 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3558 DAG.getConstant(MaskIdx, DL, GRLenVT));
3559 }
3560 }
3561 // Swap N0/N1 and retry.
3562 if (!SwapAndRetried) {
3563 std::swap(N0, N1);
3564 SwapAndRetried = true;
3565 goto Retry2;
3566 }
3567
3568 return SDValue();
3569}
3570
3571static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3572 ExtType = ISD::NON_EXTLOAD;
3573
3574 switch (V.getNode()->getOpcode()) {
3575 case ISD::LOAD: {
3576 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3577 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3578 (LoadNode->getMemoryVT() == MVT::i16)) {
3579 ExtType = LoadNode->getExtensionType();
3580 return true;
3581 }
3582 return false;
3583 }
3584 case ISD::AssertSext: {
3585 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3586 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3587 ExtType = ISD::SEXTLOAD;
3588 return true;
3589 }
3590 return false;
3591 }
3592 case ISD::AssertZext: {
3593 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3594 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3595 ExtType = ISD::ZEXTLOAD;
3596 return true;
3597 }
3598 return false;
3599 }
3600 default:
3601 return false;
3602 }
3603
3604 return false;
3605}
3606
3607// Eliminate redundant truncation and zero-extension nodes.
3608// * Case 1:
3609// +------------+ +------------+ +------------+
3610// | Input1 | | Input2 | | CC |
3611// +------------+ +------------+ +------------+
3612// | | |
3613// V V +----+
3614// +------------+ +------------+ |
3615// | TRUNCATE | | TRUNCATE | |
3616// +------------+ +------------+ |
3617// | | |
3618// V V |
3619// +------------+ +------------+ |
3620// | ZERO_EXT | | ZERO_EXT | |
3621// +------------+ +------------+ |
3622// | | |
3623// | +-------------+ |
3624// V V | |
3625// +----------------+ | |
3626// | AND | | |
3627// +----------------+ | |
3628// | | |
3629// +---------------+ | |
3630// | | |
3631// V V V
3632// +-------------+
3633// | CMP |
3634// +-------------+
3635// * Case 2:
3636// +------------+ +------------+ +-------------+ +------------+ +------------+
3637// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3638// +------------+ +------------+ +-------------+ +------------+ +------------+
3639// | | | | |
3640// V | | | |
3641// +------------+ | | | |
3642// | XOR |<---------------------+ | |
3643// +------------+ | | |
3644// | | | |
3645// V V +---------------+ |
3646// +------------+ +------------+ | |
3647// | TRUNCATE | | TRUNCATE | | +-------------------------+
3648// +------------+ +------------+ | |
3649// | | | |
3650// V V | |
3651// +------------+ +------------+ | |
3652// | ZERO_EXT | | ZERO_EXT | | |
3653// +------------+ +------------+ | |
3654// | | | |
3655// V V | |
3656// +----------------+ | |
3657// | AND | | |
3658// +----------------+ | |
3659// | | |
3660// +---------------+ | |
3661// | | |
3662// V V V
3663// +-------------+
3664// | CMP |
3665// +-------------+
3668 const LoongArchSubtarget &Subtarget) {
3669 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3670
3671 SDNode *AndNode = N->getOperand(0).getNode();
3672 if (AndNode->getOpcode() != ISD::AND)
3673 return SDValue();
3674
3675 SDValue AndInputValue2 = AndNode->getOperand(1);
3676 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3677 return SDValue();
3678
3679 SDValue CmpInputValue = N->getOperand(1);
3680 SDValue AndInputValue1 = AndNode->getOperand(0);
3681 if (AndInputValue1.getOpcode() == ISD::XOR) {
3682 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3683 return SDValue();
3684 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3685 if (!CN || CN->getSExtValue() != -1)
3686 return SDValue();
3687 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3688 if (!CN || CN->getSExtValue() != 0)
3689 return SDValue();
3690 AndInputValue1 = AndInputValue1.getOperand(0);
3691 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3692 return SDValue();
3693 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3694 if (AndInputValue2 != CmpInputValue)
3695 return SDValue();
3696 } else {
3697 return SDValue();
3698 }
3699
3700 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3701 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3702 return SDValue();
3703
3704 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3705 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3706 return SDValue();
3707
3708 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3709 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3710 ISD::LoadExtType ExtType1;
3711 ISD::LoadExtType ExtType2;
3712
3713 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3714 !checkValueWidth(TruncInputValue2, ExtType2))
3715 return SDValue();
3716
3717 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3718 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3719 return SDValue();
3720
3721 if ((ExtType2 != ISD::ZEXTLOAD) &&
3722 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3723 return SDValue();
3724
3725 // These truncation and zero-extension nodes are not necessary, remove them.
3726 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3727 TruncInputValue1, TruncInputValue2);
3728 SDValue NewSetCC =
3729 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3730 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3731 return SDValue(N, 0);
3732}
3733
3734// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3737 const LoongArchSubtarget &Subtarget) {
3738 if (DCI.isBeforeLegalizeOps())
3739 return SDValue();
3740
3741 SDValue Src = N->getOperand(0);
3742 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3743 return SDValue();
3744
3745 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3746 Src.getOperand(0));
3747}
3748
3749template <unsigned N>
3751 SelectionDAG &DAG,
3752 const LoongArchSubtarget &Subtarget,
3753 bool IsSigned = false) {
3754 SDLoc DL(Node);
3755 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3756 // Check the ImmArg.
3757 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3758 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3759 DAG.getContext()->emitError(Node->getOperationName(0) +
3760 ": argument out of range.");
3761 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3762 }
3763 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3764}
3765
3766template <unsigned N>
3767static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3768 SelectionDAG &DAG, bool IsSigned = false) {
3769 SDLoc DL(Node);
3770 EVT ResTy = Node->getValueType(0);
3771 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3772
3773 // Check the ImmArg.
3774 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3775 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3776 DAG.getContext()->emitError(Node->getOperationName(0) +
3777 ": argument out of range.");
3778 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3779 }
3780 return DAG.getConstant(
3782 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3783 DL, ResTy);
3784}
3785
3787 SDLoc DL(Node);
3788 EVT ResTy = Node->getValueType(0);
3789 SDValue Vec = Node->getOperand(2);
3790 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3791 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3792}
3793
3795 SDLoc DL(Node);
3796 EVT ResTy = Node->getValueType(0);
3797 SDValue One = DAG.getConstant(1, DL, ResTy);
3798 SDValue Bit =
3799 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3800
3801 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3802 DAG.getNOT(DL, Bit, ResTy));
3803}
3804
3805template <unsigned N>
3807 SDLoc DL(Node);
3808 EVT ResTy = Node->getValueType(0);
3809 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3810 // Check the unsigned ImmArg.
3811 if (!isUInt<N>(CImm->getZExtValue())) {
3812 DAG.getContext()->emitError(Node->getOperationName(0) +
3813 ": argument out of range.");
3814 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3815 }
3816
3817 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3818 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3819
3820 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3821}
3822
3823template <unsigned N>
3825 SDLoc DL(Node);
3826 EVT ResTy = Node->getValueType(0);
3827 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3828 // Check the unsigned ImmArg.
3829 if (!isUInt<N>(CImm->getZExtValue())) {
3830 DAG.getContext()->emitError(Node->getOperationName(0) +
3831 ": argument out of range.");
3832 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3833 }
3834
3835 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3836 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3837 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3838}
3839
3840template <unsigned N>
3842 SDLoc DL(Node);
3843 EVT ResTy = Node->getValueType(0);
3844 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3845 // Check the unsigned ImmArg.
3846 if (!isUInt<N>(CImm->getZExtValue())) {
3847 DAG.getContext()->emitError(Node->getOperationName(0) +
3848 ": argument out of range.");
3849 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3850 }
3851
3852 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3853 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3854 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3855}
3856
3857static SDValue
3860 const LoongArchSubtarget &Subtarget) {
3861 SDLoc DL(N);
3862 switch (N->getConstantOperandVal(0)) {
3863 default:
3864 break;
3865 case Intrinsic::loongarch_lsx_vadd_b:
3866 case Intrinsic::loongarch_lsx_vadd_h:
3867 case Intrinsic::loongarch_lsx_vadd_w:
3868 case Intrinsic::loongarch_lsx_vadd_d:
3869 case Intrinsic::loongarch_lasx_xvadd_b:
3870 case Intrinsic::loongarch_lasx_xvadd_h:
3871 case Intrinsic::loongarch_lasx_xvadd_w:
3872 case Intrinsic::loongarch_lasx_xvadd_d:
3873 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3874 N->getOperand(2));
3875 case Intrinsic::loongarch_lsx_vaddi_bu:
3876 case Intrinsic::loongarch_lsx_vaddi_hu:
3877 case Intrinsic::loongarch_lsx_vaddi_wu:
3878 case Intrinsic::loongarch_lsx_vaddi_du:
3879 case Intrinsic::loongarch_lasx_xvaddi_bu:
3880 case Intrinsic::loongarch_lasx_xvaddi_hu:
3881 case Intrinsic::loongarch_lasx_xvaddi_wu:
3882 case Intrinsic::loongarch_lasx_xvaddi_du:
3883 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3884 lowerVectorSplatImm<5>(N, 2, DAG));
3885 case Intrinsic::loongarch_lsx_vsub_b:
3886 case Intrinsic::loongarch_lsx_vsub_h:
3887 case Intrinsic::loongarch_lsx_vsub_w:
3888 case Intrinsic::loongarch_lsx_vsub_d:
3889 case Intrinsic::loongarch_lasx_xvsub_b:
3890 case Intrinsic::loongarch_lasx_xvsub_h:
3891 case Intrinsic::loongarch_lasx_xvsub_w:
3892 case Intrinsic::loongarch_lasx_xvsub_d:
3893 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3894 N->getOperand(2));
3895 case Intrinsic::loongarch_lsx_vsubi_bu:
3896 case Intrinsic::loongarch_lsx_vsubi_hu:
3897 case Intrinsic::loongarch_lsx_vsubi_wu:
3898 case Intrinsic::loongarch_lsx_vsubi_du:
3899 case Intrinsic::loongarch_lasx_xvsubi_bu:
3900 case Intrinsic::loongarch_lasx_xvsubi_hu:
3901 case Intrinsic::loongarch_lasx_xvsubi_wu:
3902 case Intrinsic::loongarch_lasx_xvsubi_du:
3903 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3904 lowerVectorSplatImm<5>(N, 2, DAG));
3905 case Intrinsic::loongarch_lsx_vneg_b:
3906 case Intrinsic::loongarch_lsx_vneg_h:
3907 case Intrinsic::loongarch_lsx_vneg_w:
3908 case Intrinsic::loongarch_lsx_vneg_d:
3909 case Intrinsic::loongarch_lasx_xvneg_b:
3910 case Intrinsic::loongarch_lasx_xvneg_h:
3911 case Intrinsic::loongarch_lasx_xvneg_w:
3912 case Intrinsic::loongarch_lasx_xvneg_d:
3913 return DAG.getNode(
3914 ISD::SUB, DL, N->getValueType(0),
3915 DAG.getConstant(
3916 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3917 /*isSigned=*/true),
3918 SDLoc(N), N->getValueType(0)),
3919 N->getOperand(1));
3920 case Intrinsic::loongarch_lsx_vmax_b:
3921 case Intrinsic::loongarch_lsx_vmax_h:
3922 case Intrinsic::loongarch_lsx_vmax_w:
3923 case Intrinsic::loongarch_lsx_vmax_d:
3924 case Intrinsic::loongarch_lasx_xvmax_b:
3925 case Intrinsic::loongarch_lasx_xvmax_h:
3926 case Intrinsic::loongarch_lasx_xvmax_w:
3927 case Intrinsic::loongarch_lasx_xvmax_d:
3928 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3929 N->getOperand(2));
3930 case Intrinsic::loongarch_lsx_vmax_bu:
3931 case Intrinsic::loongarch_lsx_vmax_hu:
3932 case Intrinsic::loongarch_lsx_vmax_wu:
3933 case Intrinsic::loongarch_lsx_vmax_du:
3934 case Intrinsic::loongarch_lasx_xvmax_bu:
3935 case Intrinsic::loongarch_lasx_xvmax_hu:
3936 case Intrinsic::loongarch_lasx_xvmax_wu:
3937 case Intrinsic::loongarch_lasx_xvmax_du:
3938 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3939 N->getOperand(2));
3940 case Intrinsic::loongarch_lsx_vmaxi_b:
3941 case Intrinsic::loongarch_lsx_vmaxi_h:
3942 case Intrinsic::loongarch_lsx_vmaxi_w:
3943 case Intrinsic::loongarch_lsx_vmaxi_d:
3944 case Intrinsic::loongarch_lasx_xvmaxi_b:
3945 case Intrinsic::loongarch_lasx_xvmaxi_h:
3946 case Intrinsic::loongarch_lasx_xvmaxi_w:
3947 case Intrinsic::loongarch_lasx_xvmaxi_d:
3948 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3949 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3950 case Intrinsic::loongarch_lsx_vmaxi_bu:
3951 case Intrinsic::loongarch_lsx_vmaxi_hu:
3952 case Intrinsic::loongarch_lsx_vmaxi_wu:
3953 case Intrinsic::loongarch_lsx_vmaxi_du:
3954 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3955 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3956 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3957 case Intrinsic::loongarch_lasx_xvmaxi_du:
3958 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3959 lowerVectorSplatImm<5>(N, 2, DAG));
3960 case Intrinsic::loongarch_lsx_vmin_b:
3961 case Intrinsic::loongarch_lsx_vmin_h:
3962 case Intrinsic::loongarch_lsx_vmin_w:
3963 case Intrinsic::loongarch_lsx_vmin_d:
3964 case Intrinsic::loongarch_lasx_xvmin_b:
3965 case Intrinsic::loongarch_lasx_xvmin_h:
3966 case Intrinsic::loongarch_lasx_xvmin_w:
3967 case Intrinsic::loongarch_lasx_xvmin_d:
3968 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3969 N->getOperand(2));
3970 case Intrinsic::loongarch_lsx_vmin_bu:
3971 case Intrinsic::loongarch_lsx_vmin_hu:
3972 case Intrinsic::loongarch_lsx_vmin_wu:
3973 case Intrinsic::loongarch_lsx_vmin_du:
3974 case Intrinsic::loongarch_lasx_xvmin_bu:
3975 case Intrinsic::loongarch_lasx_xvmin_hu:
3976 case Intrinsic::loongarch_lasx_xvmin_wu:
3977 case Intrinsic::loongarch_lasx_xvmin_du:
3978 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3979 N->getOperand(2));
3980 case Intrinsic::loongarch_lsx_vmini_b:
3981 case Intrinsic::loongarch_lsx_vmini_h:
3982 case Intrinsic::loongarch_lsx_vmini_w:
3983 case Intrinsic::loongarch_lsx_vmini_d:
3984 case Intrinsic::loongarch_lasx_xvmini_b:
3985 case Intrinsic::loongarch_lasx_xvmini_h:
3986 case Intrinsic::loongarch_lasx_xvmini_w:
3987 case Intrinsic::loongarch_lasx_xvmini_d:
3988 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3989 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3990 case Intrinsic::loongarch_lsx_vmini_bu:
3991 case Intrinsic::loongarch_lsx_vmini_hu:
3992 case Intrinsic::loongarch_lsx_vmini_wu:
3993 case Intrinsic::loongarch_lsx_vmini_du:
3994 case Intrinsic::loongarch_lasx_xvmini_bu:
3995 case Intrinsic::loongarch_lasx_xvmini_hu:
3996 case Intrinsic::loongarch_lasx_xvmini_wu:
3997 case Intrinsic::loongarch_lasx_xvmini_du:
3998 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3999 lowerVectorSplatImm<5>(N, 2, DAG));
4000 case Intrinsic::loongarch_lsx_vmul_b:
4001 case Intrinsic::loongarch_lsx_vmul_h:
4002 case Intrinsic::loongarch_lsx_vmul_w:
4003 case Intrinsic::loongarch_lsx_vmul_d:
4004 case Intrinsic::loongarch_lasx_xvmul_b:
4005 case Intrinsic::loongarch_lasx_xvmul_h:
4006 case Intrinsic::loongarch_lasx_xvmul_w:
4007 case Intrinsic::loongarch_lasx_xvmul_d:
4008 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
4009 N->getOperand(2));
4010 case Intrinsic::loongarch_lsx_vmadd_b:
4011 case Intrinsic::loongarch_lsx_vmadd_h:
4012 case Intrinsic::loongarch_lsx_vmadd_w:
4013 case Intrinsic::loongarch_lsx_vmadd_d:
4014 case Intrinsic::loongarch_lasx_xvmadd_b:
4015 case Intrinsic::loongarch_lasx_xvmadd_h:
4016 case Intrinsic::loongarch_lasx_xvmadd_w:
4017 case Intrinsic::loongarch_lasx_xvmadd_d: {
4018 EVT ResTy = N->getValueType(0);
4019 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
4020 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4021 N->getOperand(3)));
4022 }
4023 case Intrinsic::loongarch_lsx_vmsub_b:
4024 case Intrinsic::loongarch_lsx_vmsub_h:
4025 case Intrinsic::loongarch_lsx_vmsub_w:
4026 case Intrinsic::loongarch_lsx_vmsub_d:
4027 case Intrinsic::loongarch_lasx_xvmsub_b:
4028 case Intrinsic::loongarch_lasx_xvmsub_h:
4029 case Intrinsic::loongarch_lasx_xvmsub_w:
4030 case Intrinsic::loongarch_lasx_xvmsub_d: {
4031 EVT ResTy = N->getValueType(0);
4032 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4033 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4034 N->getOperand(3)));
4035 }
4036 case Intrinsic::loongarch_lsx_vdiv_b:
4037 case Intrinsic::loongarch_lsx_vdiv_h:
4038 case Intrinsic::loongarch_lsx_vdiv_w:
4039 case Intrinsic::loongarch_lsx_vdiv_d:
4040 case Intrinsic::loongarch_lasx_xvdiv_b:
4041 case Intrinsic::loongarch_lasx_xvdiv_h:
4042 case Intrinsic::loongarch_lasx_xvdiv_w:
4043 case Intrinsic::loongarch_lasx_xvdiv_d:
4044 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4045 N->getOperand(2));
4046 case Intrinsic::loongarch_lsx_vdiv_bu:
4047 case Intrinsic::loongarch_lsx_vdiv_hu:
4048 case Intrinsic::loongarch_lsx_vdiv_wu:
4049 case Intrinsic::loongarch_lsx_vdiv_du:
4050 case Intrinsic::loongarch_lasx_xvdiv_bu:
4051 case Intrinsic::loongarch_lasx_xvdiv_hu:
4052 case Intrinsic::loongarch_lasx_xvdiv_wu:
4053 case Intrinsic::loongarch_lasx_xvdiv_du:
4054 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4055 N->getOperand(2));
4056 case Intrinsic::loongarch_lsx_vmod_b:
4057 case Intrinsic::loongarch_lsx_vmod_h:
4058 case Intrinsic::loongarch_lsx_vmod_w:
4059 case Intrinsic::loongarch_lsx_vmod_d:
4060 case Intrinsic::loongarch_lasx_xvmod_b:
4061 case Intrinsic::loongarch_lasx_xvmod_h:
4062 case Intrinsic::loongarch_lasx_xvmod_w:
4063 case Intrinsic::loongarch_lasx_xvmod_d:
4064 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4065 N->getOperand(2));
4066 case Intrinsic::loongarch_lsx_vmod_bu:
4067 case Intrinsic::loongarch_lsx_vmod_hu:
4068 case Intrinsic::loongarch_lsx_vmod_wu:
4069 case Intrinsic::loongarch_lsx_vmod_du:
4070 case Intrinsic::loongarch_lasx_xvmod_bu:
4071 case Intrinsic::loongarch_lasx_xvmod_hu:
4072 case Intrinsic::loongarch_lasx_xvmod_wu:
4073 case Intrinsic::loongarch_lasx_xvmod_du:
4074 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4075 N->getOperand(2));
4076 case Intrinsic::loongarch_lsx_vand_v:
4077 case Intrinsic::loongarch_lasx_xvand_v:
4078 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4079 N->getOperand(2));
4080 case Intrinsic::loongarch_lsx_vor_v:
4081 case Intrinsic::loongarch_lasx_xvor_v:
4082 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4083 N->getOperand(2));
4084 case Intrinsic::loongarch_lsx_vxor_v:
4085 case Intrinsic::loongarch_lasx_xvxor_v:
4086 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4087 N->getOperand(2));
4088 case Intrinsic::loongarch_lsx_vnor_v:
4089 case Intrinsic::loongarch_lasx_xvnor_v: {
4090 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4091 N->getOperand(2));
4092 return DAG.getNOT(DL, Res, Res->getValueType(0));
4093 }
4094 case Intrinsic::loongarch_lsx_vandi_b:
4095 case Intrinsic::loongarch_lasx_xvandi_b:
4096 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4097 lowerVectorSplatImm<8>(N, 2, DAG));
4098 case Intrinsic::loongarch_lsx_vori_b:
4099 case Intrinsic::loongarch_lasx_xvori_b:
4100 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4101 lowerVectorSplatImm<8>(N, 2, DAG));
4102 case Intrinsic::loongarch_lsx_vxori_b:
4103 case Intrinsic::loongarch_lasx_xvxori_b:
4104 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4105 lowerVectorSplatImm<8>(N, 2, DAG));
4106 case Intrinsic::loongarch_lsx_vsll_b:
4107 case Intrinsic::loongarch_lsx_vsll_h:
4108 case Intrinsic::loongarch_lsx_vsll_w:
4109 case Intrinsic::loongarch_lsx_vsll_d:
4110 case Intrinsic::loongarch_lasx_xvsll_b:
4111 case Intrinsic::loongarch_lasx_xvsll_h:
4112 case Intrinsic::loongarch_lasx_xvsll_w:
4113 case Intrinsic::loongarch_lasx_xvsll_d:
4114 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4115 truncateVecElts(N, DAG));
4116 case Intrinsic::loongarch_lsx_vslli_b:
4117 case Intrinsic::loongarch_lasx_xvslli_b:
4118 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4119 lowerVectorSplatImm<3>(N, 2, DAG));
4120 case Intrinsic::loongarch_lsx_vslli_h:
4121 case Intrinsic::loongarch_lasx_xvslli_h:
4122 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4123 lowerVectorSplatImm<4>(N, 2, DAG));
4124 case Intrinsic::loongarch_lsx_vslli_w:
4125 case Intrinsic::loongarch_lasx_xvslli_w:
4126 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4127 lowerVectorSplatImm<5>(N, 2, DAG));
4128 case Intrinsic::loongarch_lsx_vslli_d:
4129 case Intrinsic::loongarch_lasx_xvslli_d:
4130 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4131 lowerVectorSplatImm<6>(N, 2, DAG));
4132 case Intrinsic::loongarch_lsx_vsrl_b:
4133 case Intrinsic::loongarch_lsx_vsrl_h:
4134 case Intrinsic::loongarch_lsx_vsrl_w:
4135 case Intrinsic::loongarch_lsx_vsrl_d:
4136 case Intrinsic::loongarch_lasx_xvsrl_b:
4137 case Intrinsic::loongarch_lasx_xvsrl_h:
4138 case Intrinsic::loongarch_lasx_xvsrl_w:
4139 case Intrinsic::loongarch_lasx_xvsrl_d:
4140 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4141 truncateVecElts(N, DAG));
4142 case Intrinsic::loongarch_lsx_vsrli_b:
4143 case Intrinsic::loongarch_lasx_xvsrli_b:
4144 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4145 lowerVectorSplatImm<3>(N, 2, DAG));
4146 case Intrinsic::loongarch_lsx_vsrli_h:
4147 case Intrinsic::loongarch_lasx_xvsrli_h:
4148 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4149 lowerVectorSplatImm<4>(N, 2, DAG));
4150 case Intrinsic::loongarch_lsx_vsrli_w:
4151 case Intrinsic::loongarch_lasx_xvsrli_w:
4152 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4153 lowerVectorSplatImm<5>(N, 2, DAG));
4154 case Intrinsic::loongarch_lsx_vsrli_d:
4155 case Intrinsic::loongarch_lasx_xvsrli_d:
4156 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4157 lowerVectorSplatImm<6>(N, 2, DAG));
4158 case Intrinsic::loongarch_lsx_vsra_b:
4159 case Intrinsic::loongarch_lsx_vsra_h:
4160 case Intrinsic::loongarch_lsx_vsra_w:
4161 case Intrinsic::loongarch_lsx_vsra_d:
4162 case Intrinsic::loongarch_lasx_xvsra_b:
4163 case Intrinsic::loongarch_lasx_xvsra_h:
4164 case Intrinsic::loongarch_lasx_xvsra_w:
4165 case Intrinsic::loongarch_lasx_xvsra_d:
4166 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4167 truncateVecElts(N, DAG));
4168 case Intrinsic::loongarch_lsx_vsrai_b:
4169 case Intrinsic::loongarch_lasx_xvsrai_b:
4170 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4171 lowerVectorSplatImm<3>(N, 2, DAG));
4172 case Intrinsic::loongarch_lsx_vsrai_h:
4173 case Intrinsic::loongarch_lasx_xvsrai_h:
4174 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4175 lowerVectorSplatImm<4>(N, 2, DAG));
4176 case Intrinsic::loongarch_lsx_vsrai_w:
4177 case Intrinsic::loongarch_lasx_xvsrai_w:
4178 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4179 lowerVectorSplatImm<5>(N, 2, DAG));
4180 case Intrinsic::loongarch_lsx_vsrai_d:
4181 case Intrinsic::loongarch_lasx_xvsrai_d:
4182 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4183 lowerVectorSplatImm<6>(N, 2, DAG));
4184 case Intrinsic::loongarch_lsx_vclz_b:
4185 case Intrinsic::loongarch_lsx_vclz_h:
4186 case Intrinsic::loongarch_lsx_vclz_w:
4187 case Intrinsic::loongarch_lsx_vclz_d:
4188 case Intrinsic::loongarch_lasx_xvclz_b:
4189 case Intrinsic::loongarch_lasx_xvclz_h:
4190 case Intrinsic::loongarch_lasx_xvclz_w:
4191 case Intrinsic::loongarch_lasx_xvclz_d:
4192 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4193 case Intrinsic::loongarch_lsx_vpcnt_b:
4194 case Intrinsic::loongarch_lsx_vpcnt_h:
4195 case Intrinsic::loongarch_lsx_vpcnt_w:
4196 case Intrinsic::loongarch_lsx_vpcnt_d:
4197 case Intrinsic::loongarch_lasx_xvpcnt_b:
4198 case Intrinsic::loongarch_lasx_xvpcnt_h:
4199 case Intrinsic::loongarch_lasx_xvpcnt_w:
4200 case Intrinsic::loongarch_lasx_xvpcnt_d:
4201 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4202 case Intrinsic::loongarch_lsx_vbitclr_b:
4203 case Intrinsic::loongarch_lsx_vbitclr_h:
4204 case Intrinsic::loongarch_lsx_vbitclr_w:
4205 case Intrinsic::loongarch_lsx_vbitclr_d:
4206 case Intrinsic::loongarch_lasx_xvbitclr_b:
4207 case Intrinsic::loongarch_lasx_xvbitclr_h:
4208 case Intrinsic::loongarch_lasx_xvbitclr_w:
4209 case Intrinsic::loongarch_lasx_xvbitclr_d:
4210 return lowerVectorBitClear(N, DAG);
4211 case Intrinsic::loongarch_lsx_vbitclri_b:
4212 case Intrinsic::loongarch_lasx_xvbitclri_b:
4213 return lowerVectorBitClearImm<3>(N, DAG);
4214 case Intrinsic::loongarch_lsx_vbitclri_h:
4215 case Intrinsic::loongarch_lasx_xvbitclri_h:
4216 return lowerVectorBitClearImm<4>(N, DAG);
4217 case Intrinsic::loongarch_lsx_vbitclri_w:
4218 case Intrinsic::loongarch_lasx_xvbitclri_w:
4219 return lowerVectorBitClearImm<5>(N, DAG);
4220 case Intrinsic::loongarch_lsx_vbitclri_d:
4221 case Intrinsic::loongarch_lasx_xvbitclri_d:
4222 return lowerVectorBitClearImm<6>(N, DAG);
4223 case Intrinsic::loongarch_lsx_vbitset_b:
4224 case Intrinsic::loongarch_lsx_vbitset_h:
4225 case Intrinsic::loongarch_lsx_vbitset_w:
4226 case Intrinsic::loongarch_lsx_vbitset_d:
4227 case Intrinsic::loongarch_lasx_xvbitset_b:
4228 case Intrinsic::loongarch_lasx_xvbitset_h:
4229 case Intrinsic::loongarch_lasx_xvbitset_w:
4230 case Intrinsic::loongarch_lasx_xvbitset_d: {
4231 EVT VecTy = N->getValueType(0);
4232 SDValue One = DAG.getConstant(1, DL, VecTy);
4233 return DAG.getNode(
4234 ISD::OR, DL, VecTy, N->getOperand(1),
4235 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4236 }
4237 case Intrinsic::loongarch_lsx_vbitseti_b:
4238 case Intrinsic::loongarch_lasx_xvbitseti_b:
4239 return lowerVectorBitSetImm<3>(N, DAG);
4240 case Intrinsic::loongarch_lsx_vbitseti_h:
4241 case Intrinsic::loongarch_lasx_xvbitseti_h:
4242 return lowerVectorBitSetImm<4>(N, DAG);
4243 case Intrinsic::loongarch_lsx_vbitseti_w:
4244 case Intrinsic::loongarch_lasx_xvbitseti_w:
4245 return lowerVectorBitSetImm<5>(N, DAG);
4246 case Intrinsic::loongarch_lsx_vbitseti_d:
4247 case Intrinsic::loongarch_lasx_xvbitseti_d:
4248 return lowerVectorBitSetImm<6>(N, DAG);
4249 case Intrinsic::loongarch_lsx_vbitrev_b:
4250 case Intrinsic::loongarch_lsx_vbitrev_h:
4251 case Intrinsic::loongarch_lsx_vbitrev_w:
4252 case Intrinsic::loongarch_lsx_vbitrev_d:
4253 case Intrinsic::loongarch_lasx_xvbitrev_b:
4254 case Intrinsic::loongarch_lasx_xvbitrev_h:
4255 case Intrinsic::loongarch_lasx_xvbitrev_w:
4256 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4257 EVT VecTy = N->getValueType(0);
4258 SDValue One = DAG.getConstant(1, DL, VecTy);
4259 return DAG.getNode(
4260 ISD::XOR, DL, VecTy, N->getOperand(1),
4261 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4262 }
4263 case Intrinsic::loongarch_lsx_vbitrevi_b:
4264 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4265 return lowerVectorBitRevImm<3>(N, DAG);
4266 case Intrinsic::loongarch_lsx_vbitrevi_h:
4267 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4268 return lowerVectorBitRevImm<4>(N, DAG);
4269 case Intrinsic::loongarch_lsx_vbitrevi_w:
4270 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4271 return lowerVectorBitRevImm<5>(N, DAG);
4272 case Intrinsic::loongarch_lsx_vbitrevi_d:
4273 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4274 return lowerVectorBitRevImm<6>(N, DAG);
4275 case Intrinsic::loongarch_lsx_vfadd_s:
4276 case Intrinsic::loongarch_lsx_vfadd_d:
4277 case Intrinsic::loongarch_lasx_xvfadd_s:
4278 case Intrinsic::loongarch_lasx_xvfadd_d:
4279 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4280 N->getOperand(2));
4281 case Intrinsic::loongarch_lsx_vfsub_s:
4282 case Intrinsic::loongarch_lsx_vfsub_d:
4283 case Intrinsic::loongarch_lasx_xvfsub_s:
4284 case Intrinsic::loongarch_lasx_xvfsub_d:
4285 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4286 N->getOperand(2));
4287 case Intrinsic::loongarch_lsx_vfmul_s:
4288 case Intrinsic::loongarch_lsx_vfmul_d:
4289 case Intrinsic::loongarch_lasx_xvfmul_s:
4290 case Intrinsic::loongarch_lasx_xvfmul_d:
4291 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4292 N->getOperand(2));
4293 case Intrinsic::loongarch_lsx_vfdiv_s:
4294 case Intrinsic::loongarch_lsx_vfdiv_d:
4295 case Intrinsic::loongarch_lasx_xvfdiv_s:
4296 case Intrinsic::loongarch_lasx_xvfdiv_d:
4297 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4298 N->getOperand(2));
4299 case Intrinsic::loongarch_lsx_vfmadd_s:
4300 case Intrinsic::loongarch_lsx_vfmadd_d:
4301 case Intrinsic::loongarch_lasx_xvfmadd_s:
4302 case Intrinsic::loongarch_lasx_xvfmadd_d:
4303 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4304 N->getOperand(2), N->getOperand(3));
4305 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4306 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4307 N->getOperand(1), N->getOperand(2),
4308 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4309 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4310 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4311 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4312 N->getOperand(1), N->getOperand(2),
4313 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4314 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4315 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4316 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4317 N->getOperand(1), N->getOperand(2),
4318 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4319 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4320 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4321 N->getOperand(1), N->getOperand(2),
4322 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4323 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4324 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4325 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4326 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4327 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4328 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4329 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4330 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
4331 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
4332 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4333 N->getOperand(1)));
4334 case Intrinsic::loongarch_lsx_vreplve_b:
4335 case Intrinsic::loongarch_lsx_vreplve_h:
4336 case Intrinsic::loongarch_lsx_vreplve_w:
4337 case Intrinsic::loongarch_lsx_vreplve_d:
4338 case Intrinsic::loongarch_lasx_xvreplve_b:
4339 case Intrinsic::loongarch_lasx_xvreplve_h:
4340 case Intrinsic::loongarch_lasx_xvreplve_w:
4341 case Intrinsic::loongarch_lasx_xvreplve_d:
4342 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4343 N->getOperand(1),
4344 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4345 N->getOperand(2)));
4346 }
4347 return SDValue();
4348}
4349
4351 DAGCombinerInfo &DCI) const {
4352 SelectionDAG &DAG = DCI.DAG;
4353 switch (N->getOpcode()) {
4354 default:
4355 break;
4356 case ISD::AND:
4357 return performANDCombine(N, DAG, DCI, Subtarget);
4358 case ISD::OR:
4359 return performORCombine(N, DAG, DCI, Subtarget);
4360 case ISD::SETCC:
4361 return performSETCCCombine(N, DAG, DCI, Subtarget);
4362 case ISD::SRL:
4363 return performSRLCombine(N, DAG, DCI, Subtarget);
4365 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4367 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4368 }
4369 return SDValue();
4370}
4371
4374 if (!ZeroDivCheck)
4375 return MBB;
4376
4377 // Build instructions:
4378 // MBB:
4379 // div(or mod) $dst, $dividend, $divisor
4380 // bnez $divisor, SinkMBB
4381 // BreakMBB:
4382 // break 7 // BRK_DIVZERO
4383 // SinkMBB:
4384 // fallthrough
4385 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4387 MachineFunction *MF = MBB->getParent();
4388 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4389 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4390 MF->insert(It, BreakMBB);
4391 MF->insert(It, SinkMBB);
4392
4393 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4394 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4395 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4396
4397 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4398 DebugLoc DL = MI.getDebugLoc();
4399 MachineOperand &Divisor = MI.getOperand(2);
4400 Register DivisorReg = Divisor.getReg();
4401
4402 // MBB:
4403 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4404 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4405 .addMBB(SinkMBB);
4406 MBB->addSuccessor(BreakMBB);
4407 MBB->addSuccessor(SinkMBB);
4408
4409 // BreakMBB:
4410 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4411 // definition of BRK_DIVZERO.
4412 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4413 BreakMBB->addSuccessor(SinkMBB);
4414
4415 // Clear Divisor's kill flag.
4416 Divisor.setIsKill(false);
4417
4418 return SinkMBB;
4419}
4420
4421static MachineBasicBlock *
4423 const LoongArchSubtarget &Subtarget) {
4424 unsigned CondOpc;
4425 switch (MI.getOpcode()) {
4426 default:
4427 llvm_unreachable("Unexpected opcode");
4428 case LoongArch::PseudoVBZ:
4429 CondOpc = LoongArch::VSETEQZ_V;
4430 break;
4431 case LoongArch::PseudoVBZ_B:
4432 CondOpc = LoongArch::VSETANYEQZ_B;
4433 break;
4434 case LoongArch::PseudoVBZ_H:
4435 CondOpc = LoongArch::VSETANYEQZ_H;
4436 break;
4437 case LoongArch::PseudoVBZ_W:
4438 CondOpc = LoongArch::VSETANYEQZ_W;
4439 break;
4440 case LoongArch::PseudoVBZ_D:
4441 CondOpc = LoongArch::VSETANYEQZ_D;
4442 break;
4443 case LoongArch::PseudoVBNZ:
4444 CondOpc = LoongArch::VSETNEZ_V;
4445 break;
4446 case LoongArch::PseudoVBNZ_B:
4447 CondOpc = LoongArch::VSETALLNEZ_B;
4448 break;
4449 case LoongArch::PseudoVBNZ_H:
4450 CondOpc = LoongArch::VSETALLNEZ_H;
4451 break;
4452 case LoongArch::PseudoVBNZ_W:
4453 CondOpc = LoongArch::VSETALLNEZ_W;
4454 break;
4455 case LoongArch::PseudoVBNZ_D:
4456 CondOpc = LoongArch::VSETALLNEZ_D;
4457 break;
4458 case LoongArch::PseudoXVBZ:
4459 CondOpc = LoongArch::XVSETEQZ_V;
4460 break;
4461 case LoongArch::PseudoXVBZ_B:
4462 CondOpc = LoongArch::XVSETANYEQZ_B;
4463 break;
4464 case LoongArch::PseudoXVBZ_H:
4465 CondOpc = LoongArch::XVSETANYEQZ_H;
4466 break;
4467 case LoongArch::PseudoXVBZ_W:
4468 CondOpc = LoongArch::XVSETANYEQZ_W;
4469 break;
4470 case LoongArch::PseudoXVBZ_D:
4471 CondOpc = LoongArch::XVSETANYEQZ_D;
4472 break;
4473 case LoongArch::PseudoXVBNZ:
4474 CondOpc = LoongArch::XVSETNEZ_V;
4475 break;
4476 case LoongArch::PseudoXVBNZ_B:
4477 CondOpc = LoongArch::XVSETALLNEZ_B;
4478 break;
4479 case LoongArch::PseudoXVBNZ_H:
4480 CondOpc = LoongArch::XVSETALLNEZ_H;
4481 break;
4482 case LoongArch::PseudoXVBNZ_W:
4483 CondOpc = LoongArch::XVSETALLNEZ_W;
4484 break;
4485 case LoongArch::PseudoXVBNZ_D:
4486 CondOpc = LoongArch::XVSETALLNEZ_D;
4487 break;
4488 }
4489
4490 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4491 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4492 DebugLoc DL = MI.getDebugLoc();
4495
4496 MachineFunction *F = BB->getParent();
4497 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4498 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4499 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4500
4501 F->insert(It, FalseBB);
4502 F->insert(It, TrueBB);
4503 F->insert(It, SinkBB);
4504
4505 // Transfer the remainder of MBB and its successor edges to Sink.
4506 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4508
4509 // Insert the real instruction to BB.
4510 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4511 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4512
4513 // Insert branch.
4514 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4515 BB->addSuccessor(FalseBB);
4516 BB->addSuccessor(TrueBB);
4517
4518 // FalseBB.
4519 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4520 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4521 .addReg(LoongArch::R0)
4522 .addImm(0);
4523 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4524 FalseBB->addSuccessor(SinkBB);
4525
4526 // TrueBB.
4527 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4528 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4529 .addReg(LoongArch::R0)
4530 .addImm(1);
4531 TrueBB->addSuccessor(SinkBB);
4532
4533 // SinkBB: merge the results.
4534 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4535 MI.getOperand(0).getReg())
4536 .addReg(RD1)
4537 .addMBB(FalseBB)
4538 .addReg(RD2)
4539 .addMBB(TrueBB);
4540
4541 // The pseudo instruction is gone now.
4542 MI.eraseFromParent();
4543 return SinkBB;
4544}
4545
4546static MachineBasicBlock *
4548 const LoongArchSubtarget &Subtarget) {
4549 unsigned InsOp;
4550 unsigned HalfSize;
4551 switch (MI.getOpcode()) {
4552 default:
4553 llvm_unreachable("Unexpected opcode");
4554 case LoongArch::PseudoXVINSGR2VR_B:
4555 HalfSize = 16;
4556 InsOp = LoongArch::VINSGR2VR_B;
4557 break;
4558 case LoongArch::PseudoXVINSGR2VR_H:
4559 HalfSize = 8;
4560 InsOp = LoongArch::VINSGR2VR_H;
4561 break;
4562 }
4563 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4564 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4565 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4566 DebugLoc DL = MI.getDebugLoc();
4568 // XDst = vector_insert XSrc, Elt, Idx
4569 Register XDst = MI.getOperand(0).getReg();
4570 Register XSrc = MI.getOperand(1).getReg();
4571 Register Elt = MI.getOperand(2).getReg();
4572 unsigned Idx = MI.getOperand(3).getImm();
4573
4574 Register ScratchReg1 = XSrc;
4575 if (Idx >= HalfSize) {
4576 ScratchReg1 = MRI.createVirtualRegister(RC);
4577 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4578 .addReg(XSrc)
4579 .addReg(XSrc)
4580 .addImm(1);
4581 }
4582
4583 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4584 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4585 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4586 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4587 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4588 .addReg(ScratchSubReg1)
4589 .addReg(Elt)
4590 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4591
4592 Register ScratchReg2 = XDst;
4593 if (Idx >= HalfSize)
4594 ScratchReg2 = MRI.createVirtualRegister(RC);
4595
4596 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4597 .addImm(0)
4598 .addReg(ScratchSubReg2)
4599 .addImm(LoongArch::sub_128);
4600
4601 if (Idx >= HalfSize)
4602 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4603 .addReg(XSrc)
4604 .addReg(ScratchReg2)
4605 .addImm(2);
4606
4607 MI.eraseFromParent();
4608 return BB;
4609}
4610
4613 const LoongArchSubtarget &Subtarget) {
4614 assert(Subtarget.hasExtLSX());
4615 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4616 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
4617 DebugLoc DL = MI.getDebugLoc();
4619 Register Dst = MI.getOperand(0).getReg();
4620 Register Src = MI.getOperand(1).getReg();
4621 Register ScratchReg1 = MRI.createVirtualRegister(RC);
4622 Register ScratchReg2 = MRI.createVirtualRegister(RC);
4623 Register ScratchReg3 = MRI.createVirtualRegister(RC);
4624
4625 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
4626 BuildMI(*BB, MI, DL,
4627 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
4628 : LoongArch::VINSGR2VR_W),
4629 ScratchReg2)
4630 .addReg(ScratchReg1)
4631 .addReg(Src)
4632 .addImm(0);
4633 BuildMI(
4634 *BB, MI, DL,
4635 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
4636 ScratchReg3)
4637 .addReg(ScratchReg2);
4638 BuildMI(*BB, MI, DL,
4639 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
4640 : LoongArch::VPICKVE2GR_W),
4641 Dst)
4642 .addReg(ScratchReg3)
4643 .addImm(0);
4644
4645 MI.eraseFromParent();
4646 return BB;
4647}
4648
4649MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4650 MachineInstr &MI, MachineBasicBlock *BB) const {
4651 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4652 DebugLoc DL = MI.getDebugLoc();
4653
4654 switch (MI.getOpcode()) {
4655 default:
4656 llvm_unreachable("Unexpected instr type to insert");
4657 case LoongArch::DIV_W:
4658 case LoongArch::DIV_WU:
4659 case LoongArch::MOD_W:
4660 case LoongArch::MOD_WU:
4661 case LoongArch::DIV_D:
4662 case LoongArch::DIV_DU:
4663 case LoongArch::MOD_D:
4664 case LoongArch::MOD_DU:
4665 return insertDivByZeroTrap(MI, BB);
4666 break;
4667 case LoongArch::WRFCSR: {
4668 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4669 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4670 .addReg(MI.getOperand(1).getReg());
4671 MI.eraseFromParent();
4672 return BB;
4673 }
4674 case LoongArch::RDFCSR: {
4675 MachineInstr *ReadFCSR =
4676 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4677 MI.getOperand(0).getReg())
4678 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4679 ReadFCSR->getOperand(1).setIsUndef();
4680 MI.eraseFromParent();
4681 return BB;
4682 }
4683 case LoongArch::PseudoVBZ:
4684 case LoongArch::PseudoVBZ_B:
4685 case LoongArch::PseudoVBZ_H:
4686 case LoongArch::PseudoVBZ_W:
4687 case LoongArch::PseudoVBZ_D:
4688 case LoongArch::PseudoVBNZ:
4689 case LoongArch::PseudoVBNZ_B:
4690 case LoongArch::PseudoVBNZ_H:
4691 case LoongArch::PseudoVBNZ_W:
4692 case LoongArch::PseudoVBNZ_D:
4693 case LoongArch::PseudoXVBZ:
4694 case LoongArch::PseudoXVBZ_B:
4695 case LoongArch::PseudoXVBZ_H:
4696 case LoongArch::PseudoXVBZ_W:
4697 case LoongArch::PseudoXVBZ_D:
4698 case LoongArch::PseudoXVBNZ:
4699 case LoongArch::PseudoXVBNZ_B:
4700 case LoongArch::PseudoXVBNZ_H:
4701 case LoongArch::PseudoXVBNZ_W:
4702 case LoongArch::PseudoXVBNZ_D:
4703 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4704 case LoongArch::PseudoXVINSGR2VR_B:
4705 case LoongArch::PseudoXVINSGR2VR_H:
4706 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4707 case LoongArch::PseudoCTPOP:
4708 return emitPseudoCTPOP(MI, BB, Subtarget);
4709 case TargetOpcode::STATEPOINT:
4710 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4711 // while bl call instruction (where statepoint will be lowered at the
4712 // end) has implicit def. This def is early-clobber as it will be set at
4713 // the moment of the call and earlier than any use is read.
4714 // Add this implicit dead def here as a workaround.
4715 MI.addOperand(*MI.getMF(),
4717 LoongArch::R1, /*isDef*/ true,
4718 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4719 /*isUndef*/ false, /*isEarlyClobber*/ true));
4720 if (!Subtarget.is64Bit())
4721 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4722 return emitPatchPoint(MI, BB);
4723 }
4724}
4725
4727 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4728 unsigned *Fast) const {
4729 if (!Subtarget.hasUAL())
4730 return false;
4731
4732 // TODO: set reasonable speed number.
4733 if (Fast)
4734 *Fast = 1;
4735 return true;
4736}
4737
4738const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4739 switch ((LoongArchISD::NodeType)Opcode) {
4741 break;
4742
4743#define NODE_NAME_CASE(node) \
4744 case LoongArchISD::node: \
4745 return "LoongArchISD::" #node;
4746
4747 // TODO: Add more target-dependent nodes later.
4748 NODE_NAME_CASE(CALL)
4749 NODE_NAME_CASE(CALL_MEDIUM)
4750 NODE_NAME_CASE(CALL_LARGE)
4751 NODE_NAME_CASE(RET)
4752 NODE_NAME_CASE(TAIL)
4753 NODE_NAME_CASE(TAIL_MEDIUM)
4754 NODE_NAME_CASE(TAIL_LARGE)
4755 NODE_NAME_CASE(SLL_W)
4756 NODE_NAME_CASE(SRA_W)
4757 NODE_NAME_CASE(SRL_W)
4758 NODE_NAME_CASE(BSTRINS)
4759 NODE_NAME_CASE(BSTRPICK)
4760 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4761 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4762 NODE_NAME_CASE(FTINT)
4763 NODE_NAME_CASE(REVB_2H)
4764 NODE_NAME_CASE(REVB_2W)
4765 NODE_NAME_CASE(BITREV_4B)
4766 NODE_NAME_CASE(BITREV_8B)
4767 NODE_NAME_CASE(BITREV_W)
4768 NODE_NAME_CASE(ROTR_W)
4769 NODE_NAME_CASE(ROTL_W)
4770 NODE_NAME_CASE(DIV_W)
4771 NODE_NAME_CASE(DIV_WU)
4772 NODE_NAME_CASE(MOD_W)
4773 NODE_NAME_CASE(MOD_WU)
4774 NODE_NAME_CASE(CLZ_W)
4775 NODE_NAME_CASE(CTZ_W)
4776 NODE_NAME_CASE(DBAR)
4777 NODE_NAME_CASE(IBAR)
4778 NODE_NAME_CASE(BREAK)
4779 NODE_NAME_CASE(SYSCALL)
4780 NODE_NAME_CASE(CRC_W_B_W)
4781 NODE_NAME_CASE(CRC_W_H_W)
4782 NODE_NAME_CASE(CRC_W_W_W)
4783 NODE_NAME_CASE(CRC_W_D_W)
4784 NODE_NAME_CASE(CRCC_W_B_W)
4785 NODE_NAME_CASE(CRCC_W_H_W)
4786 NODE_NAME_CASE(CRCC_W_W_W)
4787 NODE_NAME_CASE(CRCC_W_D_W)
4788 NODE_NAME_CASE(CSRRD)
4789 NODE_NAME_CASE(CSRWR)
4790 NODE_NAME_CASE(CSRXCHG)
4791 NODE_NAME_CASE(IOCSRRD_B)
4792 NODE_NAME_CASE(IOCSRRD_H)
4793 NODE_NAME_CASE(IOCSRRD_W)
4794 NODE_NAME_CASE(IOCSRRD_D)
4795 NODE_NAME_CASE(IOCSRWR_B)
4796 NODE_NAME_CASE(IOCSRWR_H)
4797 NODE_NAME_CASE(IOCSRWR_W)
4798 NODE_NAME_CASE(IOCSRWR_D)
4799 NODE_NAME_CASE(CPUCFG)
4800 NODE_NAME_CASE(MOVGR2FCSR)
4801 NODE_NAME_CASE(MOVFCSR2GR)
4802 NODE_NAME_CASE(CACOP_D)
4803 NODE_NAME_CASE(CACOP_W)
4804 NODE_NAME_CASE(VSHUF)
4805 NODE_NAME_CASE(VPICKEV)
4806 NODE_NAME_CASE(VPICKOD)
4807 NODE_NAME_CASE(VPACKEV)
4808 NODE_NAME_CASE(VPACKOD)
4809 NODE_NAME_CASE(VILVL)
4810 NODE_NAME_CASE(VILVH)
4811 NODE_NAME_CASE(VSHUF4I)
4812 NODE_NAME_CASE(VREPLVEI)
4813 NODE_NAME_CASE(VREPLGR2VR)
4814 NODE_NAME_CASE(XVPERMI)
4815 NODE_NAME_CASE(VPICK_SEXT_ELT)
4816 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4817 NODE_NAME_CASE(VREPLVE)
4818 NODE_NAME_CASE(VALL_ZERO)
4819 NODE_NAME_CASE(VANY_ZERO)
4820 NODE_NAME_CASE(VALL_NONZERO)
4821 NODE_NAME_CASE(VANY_NONZERO)
4822 NODE_NAME_CASE(FRECIPE)
4823 NODE_NAME_CASE(FRSQRTE)
4824 }
4825#undef NODE_NAME_CASE
4826 return nullptr;
4827}
4828
4829//===----------------------------------------------------------------------===//
4830// Calling Convention Implementation
4831//===----------------------------------------------------------------------===//
4832
4833// Eight general-purpose registers a0-a7 used for passing integer arguments,
4834// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4835// fixed-point arguments, and floating-point arguments when no FPR is available
4836// or with soft float ABI.
4837const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4838 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4839 LoongArch::R10, LoongArch::R11};
4840// Eight floating-point registers fa0-fa7 used for passing floating-point
4841// arguments, and fa0-fa1 are also used to return values.
4842const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4843 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4844 LoongArch::F6, LoongArch::F7};
4845// FPR32 and FPR64 alias each other.
4847 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4848 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4849
4850const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4851 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4852 LoongArch::VR6, LoongArch::VR7};
4853
4854const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4855 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4856 LoongArch::XR6, LoongArch::XR7};
4857
4858// Pass a 2*GRLen argument that has been split into two GRLen values through
4859// registers or the stack as necessary.
4860static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4861 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4862 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4863 ISD::ArgFlagsTy ArgFlags2) {
4864 unsigned GRLenInBytes = GRLen / 8;
4865 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4866 // At least one half can be passed via register.
4867 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4868 VA1.getLocVT(), CCValAssign::Full));
4869 } else {
4870 // Both halves must be passed on the stack, with proper alignment.
4871 Align StackAlign =
4872 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4873 State.addLoc(
4875 State.AllocateStack(GRLenInBytes, StackAlign),
4876 VA1.getLocVT(), CCValAssign::Full));
4878 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4879 LocVT2, CCValAssign::Full));
4880 return false;
4881 }
4882 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4883 // The second half can also be passed via register.
4884 State.addLoc(
4885 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4886 } else {
4887 // The second half is passed via the stack, without additional alignment.
4889 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4890 LocVT2, CCValAssign::Full));
4891 }
4892 return false;
4893}
4894
4895// Implements the LoongArch calling convention. Returns true upon failure.
4897 unsigned ValNo, MVT ValVT,
4898 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4899 CCState &State, bool IsFixed, bool IsRet,
4900 Type *OrigTy) {
4901 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4902 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4903 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4904 MVT LocVT = ValVT;
4905
4906 // Any return value split into more than two values can't be returned
4907 // directly.
4908 if (IsRet && ValNo > 1)
4909 return true;
4910
4911 // If passing a variadic argument, or if no FPR is available.
4912 bool UseGPRForFloat = true;
4913
4914 switch (ABI) {
4915 default:
4916 llvm_unreachable("Unexpected ABI");
4917 break;
4922 UseGPRForFloat = !IsFixed;
4923 break;
4926 break;
4927 }
4928
4929 // FPR32 and FPR64 alias each other.
4930 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4931 UseGPRForFloat = true;
4932
4933 if (UseGPRForFloat && ValVT == MVT::f32) {
4934 LocVT = GRLenVT;
4935 LocInfo = CCValAssign::BCvt;
4936 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4937 LocVT = MVT::i64;
4938 LocInfo = CCValAssign::BCvt;
4939 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4940 // TODO: Handle passing f64 on LA32 with D feature.
4941 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4942 }
4943
4944 // If this is a variadic argument, the LoongArch calling convention requires
4945 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4946 // byte alignment. An aligned register should be used regardless of whether
4947 // the original argument was split during legalisation or not. The argument
4948 // will not be passed by registers if the original type is larger than
4949 // 2*GRLen, so the register alignment rule does not apply.
4950 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4951 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4952 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4953 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4954 // Skip 'odd' register if necessary.
4955 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4956 State.AllocateReg(ArgGPRs);
4957 }
4958
4959 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4960 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4961 State.getPendingArgFlags();
4962
4963 assert(PendingLocs.size() == PendingArgFlags.size() &&
4964 "PendingLocs and PendingArgFlags out of sync");
4965
4966 // Split arguments might be passed indirectly, so keep track of the pending
4967 // values.
4968 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4969 LocVT = GRLenVT;
4970 LocInfo = CCValAssign::Indirect;
4971 PendingLocs.push_back(
4972 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4973 PendingArgFlags.push_back(ArgFlags);
4974 if (!ArgFlags.isSplitEnd()) {
4975 return false;
4976 }
4977 }
4978
4979 // If the split argument only had two elements, it should be passed directly
4980 // in registers or on the stack.
4981 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4982 PendingLocs.size() <= 2) {
4983 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4984 // Apply the normal calling convention rules to the first half of the
4985 // split argument.
4986 CCValAssign VA = PendingLocs[0];
4987 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4988 PendingLocs.clear();
4989 PendingArgFlags.clear();
4990 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4991 ArgFlags);
4992 }
4993
4994 // Allocate to a register if possible, or else a stack slot.
4995 Register Reg;
4996 unsigned StoreSizeBytes = GRLen / 8;
4997 Align StackAlign = Align(GRLen / 8);
4998
4999 if (ValVT == MVT::f32 && !UseGPRForFloat)
5000 Reg = State.AllocateReg(ArgFPR32s);
5001 else if (ValVT == MVT::f64 && !UseGPRForFloat)
5002 Reg = State.AllocateReg(ArgFPR64s);
5003 else if (ValVT.is128BitVector())
5004 Reg = State.AllocateReg(ArgVRs);
5005 else if (ValVT.is256BitVector())
5006 Reg = State.AllocateReg(ArgXRs);
5007 else
5008 Reg = State.AllocateReg(ArgGPRs);
5009
5010 unsigned StackOffset =
5011 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
5012
5013 // If we reach this point and PendingLocs is non-empty, we must be at the
5014 // end of a split argument that must be passed indirectly.
5015 if (!PendingLocs.empty()) {
5016 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
5017 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
5018 for (auto &It : PendingLocs) {
5019 if (Reg)
5020 It.convertToReg(Reg);
5021 else
5022 It.convertToMem(StackOffset);
5023 State.addLoc(It);
5024 }
5025 PendingLocs.clear();
5026 PendingArgFlags.clear();
5027 return false;
5028 }
5029 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
5030 "Expected an GRLenVT at this stage");
5031
5032 if (Reg) {
5033 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5034 return false;
5035 }
5036
5037 // When a floating-point value is passed on the stack, no bit-cast is needed.
5038 if (ValVT.isFloatingPoint()) {
5039 LocVT = ValVT;
5040 LocInfo = CCValAssign::Full;
5041 }
5042
5043 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5044 return false;
5045}
5046
5047void LoongArchTargetLowering::analyzeInputArgs(
5048 MachineFunction &MF, CCState &CCInfo,
5049 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
5050 LoongArchCCAssignFn Fn) const {
5052 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5053 MVT ArgVT = Ins[i].VT;
5054 Type *ArgTy = nullptr;
5055 if (IsRet)
5056 ArgTy = FType->getReturnType();
5057 else if (Ins[i].isOrigArg())
5058 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5061 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
5062 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
5063 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
5064 << '\n');
5065 llvm_unreachable("");
5066 }
5067 }
5068}
5069
5070void LoongArchTargetLowering::analyzeOutputArgs(
5071 MachineFunction &MF, CCState &CCInfo,
5072 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5073 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
5074 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5075 MVT ArgVT = Outs[i].VT;
5076 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5079 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
5080 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
5081 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
5082 << "\n");
5083 llvm_unreachable("");
5084 }
5085 }
5086}
5087
5088// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5089// values.
5091 const CCValAssign &VA, const SDLoc &DL) {
5092 switch (VA.getLocInfo()) {
5093 default:
5094 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5095 case CCValAssign::Full:
5097 break;
5098 case CCValAssign::BCvt:
5099 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5100 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
5101 else
5102 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5103 break;
5104 }
5105 return Val;
5106}
5107
5109 const CCValAssign &VA, const SDLoc &DL,
5110 const ISD::InputArg &In,
5111 const LoongArchTargetLowering &TLI) {
5114 EVT LocVT = VA.getLocVT();
5115 SDValue Val;
5116 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5117 Register VReg = RegInfo.createVirtualRegister(RC);
5118 RegInfo.addLiveIn(VA.getLocReg(), VReg);
5119 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5120
5121 // If input is sign extended from 32 bits, note it for the OptW pass.
5122 if (In.isOrigArg()) {
5123 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
5124 if (OrigArg->getType()->isIntegerTy()) {
5125 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
5126 // An input zero extended from i31 can also be considered sign extended.
5127 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
5128 (BitWidth < 32 && In.Flags.isZExt())) {
5131 LAFI->addSExt32Register(VReg);
5132 }
5133 }
5134 }
5135
5136 return convertLocVTToValVT(DAG, Val, VA, DL);
5137}
5138
5139// The caller is responsible for loading the full value if the argument is
5140// passed with CCValAssign::Indirect.
5142 const CCValAssign &VA, const SDLoc &DL) {
5144 MachineFrameInfo &MFI = MF.getFrameInfo();
5145 EVT ValVT = VA.getValVT();
5146 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
5147 /*IsImmutable=*/true);
5148 SDValue FIN = DAG.getFrameIndex(
5150
5151 ISD::LoadExtType ExtType;
5152 switch (VA.getLocInfo()) {
5153 default:
5154 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5155 case CCValAssign::Full:
5157 case CCValAssign::BCvt:
5158 ExtType = ISD::NON_EXTLOAD;
5159 break;
5160 }
5161 return DAG.getExtLoad(
5162 ExtType, DL, VA.getLocVT(), Chain, FIN,
5164}
5165
5167 const CCValAssign &VA, const SDLoc &DL) {
5168 EVT LocVT = VA.getLocVT();
5169
5170 switch (VA.getLocInfo()) {
5171 default:
5172 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5173 case CCValAssign::Full:
5174 break;
5175 case CCValAssign::BCvt:
5176 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5177 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
5178 else
5179 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5180 break;
5181 }
5182 return Val;
5183}
5184
5185static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5186 CCValAssign::LocInfo LocInfo,
5187 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5188 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5189 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5190 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5191 static const MCPhysReg GPRList[] = {
5192 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5193 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5194 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5195 if (MCRegister Reg = State.AllocateReg(GPRList)) {
5196 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5197 return false;
5198 }
5199 }
5200
5201 if (LocVT == MVT::f32) {
5202 // Pass in STG registers: F1, F2, F3, F4
5203 // fs0,fs1,fs2,fs3
5204 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5205 LoongArch::F26, LoongArch::F27};
5206 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
5207 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5208 return false;
5209 }
5210 }
5211
5212 if (LocVT == MVT::f64) {
5213 // Pass in STG registers: D1, D2, D3, D4
5214 // fs4,fs5,fs6,fs7
5215 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5216 LoongArch::F30_64, LoongArch::F31_64};
5217 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
5218 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5219 return false;
5220 }
5221 }
5222
5223 report_fatal_error("No registers left in GHC calling convention");
5224 return true;
5225}
5226
5227// Transform physical registers into virtual registers.
5229 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5230 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5231 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5232
5234
5235 switch (CallConv) {
5236 default:
5237 llvm_unreachable("Unsupported calling convention");
5238 case CallingConv::C:
5239 case CallingConv::Fast:
5240 break;
5241 case CallingConv::GHC:
5242 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5243 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5245 "GHC calling convention requires the F and D extensions");
5246 }
5247
5248 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5249 MVT GRLenVT = Subtarget.getGRLenVT();
5250 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5251 // Used with varargs to acumulate store chains.
5252 std::vector<SDValue> OutChains;
5253
5254 // Assign locations to all of the incoming arguments.
5256 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5257
5258 if (CallConv == CallingConv::GHC)
5260 else
5261 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5262
5263 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5264 CCValAssign &VA = ArgLocs[i];
5265 SDValue ArgValue;
5266 if (VA.isRegLoc())
5267 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5268 else
5269 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5270 if (VA.getLocInfo() == CCValAssign::Indirect) {
5271 // If the original argument was split and passed by reference, we need to
5272 // load all parts of it here (using the same address).
5273 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5275 unsigned ArgIndex = Ins[i].OrigArgIndex;
5276 unsigned ArgPartOffset = Ins[i].PartOffset;
5277 assert(ArgPartOffset == 0);
5278 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5279 CCValAssign &PartVA = ArgLocs[i + 1];
5280 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5281 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5282 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5283 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5285 ++i;
5286 }
5287 continue;
5288 }
5289 InVals.push_back(ArgValue);
5290 }
5291
5292 if (IsVarArg) {
5294 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5295 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5296 MachineFrameInfo &MFI = MF.getFrameInfo();
5297 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5298 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5299
5300 // Offset of the first variable argument from stack pointer, and size of
5301 // the vararg save area. For now, the varargs save area is either zero or
5302 // large enough to hold a0-a7.
5303 int VaArgOffset, VarArgsSaveSize;
5304
5305 // If all registers are allocated, then all varargs must be passed on the
5306 // stack and we don't need to save any argregs.
5307 if (ArgRegs.size() == Idx) {
5308 VaArgOffset = CCInfo.getStackSize();
5309 VarArgsSaveSize = 0;
5310 } else {
5311 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5312 VaArgOffset = -VarArgsSaveSize;
5313 }
5314
5315 // Record the frame index of the first variable argument
5316 // which is a value necessary to VASTART.
5317 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5318 LoongArchFI->setVarArgsFrameIndex(FI);
5319
5320 // If saving an odd number of registers then create an extra stack slot to
5321 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5322 // offsets to even-numbered registered remain 2*GRLen-aligned.
5323 if (Idx % 2) {
5324 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5325 true);
5326 VarArgsSaveSize += GRLenInBytes;
5327 }
5328
5329 // Copy the integer registers that may have been used for passing varargs
5330 // to the vararg save area.
5331 for (unsigned I = Idx; I < ArgRegs.size();
5332 ++I, VaArgOffset += GRLenInBytes) {
5333 const Register Reg = RegInfo.createVirtualRegister(RC);
5334 RegInfo.addLiveIn(ArgRegs[I], Reg);
5335 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5336 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5337 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5338 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5340 cast<StoreSDNode>(Store.getNode())
5341 ->getMemOperand()
5342 ->setValue((Value *)nullptr);
5343 OutChains.push_back(Store);
5344 }
5345 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5346 }
5347
5348 // All stores are grouped in one node to allow the matching between
5349 // the size of Ins and InVals. This only happens for vararg functions.
5350 if (!OutChains.empty()) {
5351 OutChains.push_back(Chain);
5352 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5353 }
5354
5355 return Chain;
5356}
5357
5359 return CI->isTailCall();
5360}
5361
5362// Check if the return value is used as only a return value, as otherwise
5363// we can't perform a tail-call.
5365 SDValue &Chain) const {
5366 if (N->getNumValues() != 1)
5367 return false;
5368 if (!N->hasNUsesOfValue(1, 0))
5369 return false;
5370
5371 SDNode *Copy = *N->user_begin();
5372 if (Copy->getOpcode() != ISD::CopyToReg)
5373 return false;
5374
5375 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5376 // isn't safe to perform a tail call.
5377 if (Copy->getGluedNode())
5378 return false;
5379
5380 // The copy must be used by a LoongArchISD::RET, and nothing else.
5381 bool HasRet = false;
5382 for (SDNode *Node : Copy->users()) {
5383 if (Node->getOpcode() != LoongArchISD::RET)
5384 return false;
5385 HasRet = true;
5386 }
5387
5388 if (!HasRet)
5389 return false;
5390
5391 Chain = Copy->getOperand(0);
5392 return true;
5393}
5394
5395// Check whether the call is eligible for tail call optimization.
5396bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5397 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5398 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5399
5400 auto CalleeCC = CLI.CallConv;
5401 auto &Outs = CLI.Outs;
5402 auto &Caller = MF.getFunction();
5403 auto CallerCC = Caller.getCallingConv();
5404
5405 // Do not tail call opt if the stack is used to pass parameters.
5406 if (CCInfo.getStackSize() != 0)
5407 return false;
5408
5409 // Do not tail call opt if any parameters need to be passed indirectly.
5410 for (auto &VA : ArgLocs)
5411 if (VA.getLocInfo() == CCValAssign::Indirect)
5412 return false;
5413
5414 // Do not tail call opt if either caller or callee uses struct return
5415 // semantics.
5416 auto IsCallerStructRet = Caller.hasStructRetAttr();
5417 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5418 if (IsCallerStructRet || IsCalleeStructRet)
5419 return false;
5420
5421 // Do not tail call opt if either the callee or caller has a byval argument.
5422 for (auto &Arg : Outs)
5423 if (Arg.Flags.isByVal())
5424 return false;
5425
5426 // The callee has to preserve all registers the caller needs to preserve.
5427 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5428 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5429 if (CalleeCC != CallerCC) {
5430 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5431 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5432 return false;
5433 }
5434 return true;
5435}
5436
5438 return DAG.getDataLayout().getPrefTypeAlign(
5439 VT.getTypeForEVT(*DAG.getContext()));
5440}
5441
5442// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5443// and output parameter nodes.
5444SDValue
5446 SmallVectorImpl<SDValue> &InVals) const {
5447 SelectionDAG &DAG = CLI.DAG;
5448 SDLoc &DL = CLI.DL;
5450 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5452 SDValue Chain = CLI.Chain;
5453 SDValue Callee = CLI.Callee;
5454 CallingConv::ID CallConv = CLI.CallConv;
5455 bool IsVarArg = CLI.IsVarArg;
5456 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5457 MVT GRLenVT = Subtarget.getGRLenVT();
5458 bool &IsTailCall = CLI.IsTailCall;
5459
5461
5462 // Analyze the operands of the call, assigning locations to each operand.
5464 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5465
5466 if (CallConv == CallingConv::GHC)
5467 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5468 else
5469 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5470
5471 // Check if it's really possible to do a tail call.
5472 if (IsTailCall)
5473 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5474
5475 if (IsTailCall)
5476 ++NumTailCalls;
5477 else if (CLI.CB && CLI.CB->isMustTailCall())
5478 report_fatal_error("failed to perform tail call elimination on a call "
5479 "site marked musttail");
5480
5481 // Get a count of how many bytes are to be pushed on the stack.
5482 unsigned NumBytes = ArgCCInfo.getStackSize();
5483
5484 // Create local copies for byval args.
5485 SmallVector<SDValue> ByValArgs;
5486 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5487 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5488 if (!Flags.isByVal())
5489 continue;
5490
5491 SDValue Arg = OutVals[i];
5492 unsigned Size = Flags.getByValSize();
5493 Align Alignment = Flags.getNonZeroByValAlign();
5494
5495 int FI =
5496 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5497 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5498 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5499
5500 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5501 /*IsVolatile=*/false,
5502 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5504 ByValArgs.push_back(FIPtr);
5505 }
5506
5507 if (!IsTailCall)
5508 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5509
5510 // Copy argument values to their designated locations.
5512 SmallVector<SDValue> MemOpChains;
5513 SDValue StackPtr;
5514 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5515 CCValAssign &VA = ArgLocs[i];
5516 SDValue ArgValue = OutVals[i];
5517 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5518
5519 // Promote the value if needed.
5520 // For now, only handle fully promoted and indirect arguments.
5521 if (VA.getLocInfo() == CCValAssign::Indirect) {
5522 // Store the argument in a stack slot and pass its address.
5523 Align StackAlign =
5524 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5525 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5526 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5527 // If the original argument was split and passed by reference, we need to
5528 // store the required parts of it here (and pass just one address).
5529 unsigned ArgIndex = Outs[i].OrigArgIndex;
5530 unsigned ArgPartOffset = Outs[i].PartOffset;
5531 assert(ArgPartOffset == 0);
5532 // Calculate the total size to store. We don't have access to what we're
5533 // actually storing other than performing the loop and collecting the
5534 // info.
5536 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5537 SDValue PartValue = OutVals[i + 1];
5538 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5539 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5540 EVT PartVT = PartValue.getValueType();
5541
5542 StoredSize += PartVT.getStoreSize();
5543 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5544 Parts.push_back(std::make_pair(PartValue, Offset));
5545 ++i;
5546 }
5547 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5548 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5549 MemOpChains.push_back(
5550 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5552 for (const auto &Part : Parts) {
5553 SDValue PartValue = Part.first;
5554 SDValue PartOffset = Part.second;
5556 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5557 MemOpChains.push_back(
5558 DAG.getStore(Chain, DL, PartValue, Address,
5560 }
5561 ArgValue = SpillSlot;
5562 } else {
5563 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5564 }
5565
5566 // Use local copy if it is a byval arg.
5567 if (Flags.isByVal())
5568 ArgValue = ByValArgs[j++];
5569
5570 if (VA.isRegLoc()) {
5571 // Queue up the argument copies and emit them at the end.
5572 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5573 } else {
5574 assert(VA.isMemLoc() && "Argument not register or memory");
5575 assert(!IsTailCall && "Tail call not allowed if stack is used "
5576 "for passing parameters");
5577
5578 // Work out the address of the stack slot.
5579 if (!StackPtr.getNode())
5580 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5582 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5584
5585 // Emit the store.
5586 MemOpChains.push_back(
5587 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5588 }
5589 }
5590
5591 // Join the stores, which are independent of one another.
5592 if (!MemOpChains.empty())
5593 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5594
5595 SDValue Glue;
5596
5597 // Build a sequence of copy-to-reg nodes, chained and glued together.
5598 for (auto &Reg : RegsToPass) {
5599 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5600 Glue = Chain.getValue(1);
5601 }
5602
5603 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5604 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5605 // split it and then direct call can be matched by PseudoCALL.
5606 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5607 const GlobalValue *GV = S->getGlobal();
5608 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5611 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5612 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5613 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5616 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5617 }
5618
5619 // The first call operand is the chain and the second is the target address.
5621 Ops.push_back(Chain);
5622 Ops.push_back(Callee);
5623
5624 // Add argument registers to the end of the list so that they are
5625 // known live into the call.
5626 for (auto &Reg : RegsToPass)
5627 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5628
5629 if (!IsTailCall) {
5630 // Add a register mask operand representing the call-preserved registers.
5631 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5632 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5633 assert(Mask && "Missing call preserved mask for calling convention");
5634 Ops.push_back(DAG.getRegisterMask(Mask));
5635 }
5636
5637 // Glue the call to the argument copies, if any.
5638 if (Glue.getNode())
5639 Ops.push_back(Glue);
5640
5641 // Emit the call.
5642 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5643 unsigned Op;
5644 switch (DAG.getTarget().getCodeModel()) {
5645 default:
5646 report_fatal_error("Unsupported code model");
5647 case CodeModel::Small:
5648 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5649 break;
5650 case CodeModel::Medium:
5651 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5653 break;
5654 case CodeModel::Large:
5655 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5657 break;
5658 }
5659
5660 if (IsTailCall) {
5662 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5663 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5664 return Ret;
5665 }
5666
5667 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5668 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5669 Glue = Chain.getValue(1);
5670
5671 // Mark the end of the call, which is glued to the call itself.
5672 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5673 Glue = Chain.getValue(1);
5674
5675 // Assign locations to each value returned by this call.
5677 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5678 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5679
5680 // Copy all of the result registers out of their specified physreg.
5681 for (auto &VA : RVLocs) {
5682 // Copy the value out.
5683 SDValue RetValue =
5684 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5685 // Glue the RetValue to the end of the call sequence.
5686 Chain = RetValue.getValue(1);
5687 Glue = RetValue.getValue(2);
5688
5689 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5690
5691 InVals.push_back(RetValue);
5692 }
5693
5694 return Chain;
5695}
5696
5698 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5699 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
5700 const Type *RetTy) const {
5702 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5703
5704 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5705 LoongArchABI::ABI ABI =
5706 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5707 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5708 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5709 nullptr))
5710 return false;
5711 }
5712 return true;
5713}
5714
5716 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5718 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5719 SelectionDAG &DAG) const {
5720 // Stores the assignment of the return value to a location.
5722
5723 // Info about the registers and stack slot.
5724 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5725 *DAG.getContext());
5726
5727 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5728 nullptr, CC_LoongArch);
5729 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5730 report_fatal_error("GHC functions return void only");
5731 SDValue Glue;
5732 SmallVector<SDValue, 4> RetOps(1, Chain);
5733
5734 // Copy the result values into the output registers.
5735 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5736 CCValAssign &VA = RVLocs[i];
5737 assert(VA.isRegLoc() && "Can only return in registers!");
5738
5739 // Handle a 'normal' return.
5740 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5741 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5742
5743 // Guarantee that all emitted copies are stuck together.
5744 Glue = Chain.getValue(1);
5745 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5746 }
5747
5748 RetOps[0] = Chain; // Update chain.
5749
5750 // Add the glue node if we have it.
5751 if (Glue.getNode())
5752 RetOps.push_back(Glue);
5753
5754 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5755}
5756
5758 EVT VT) const {
5759 if (!Subtarget.hasExtLSX())
5760 return false;
5761
5762 if (VT == MVT::f32) {
5763 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5764 return (masked == 0x3e000000 || masked == 0x40000000);
5765 }
5766
5767 if (VT == MVT::f64) {
5768 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5769 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5770 }
5771
5772 return false;
5773}
5774
5775bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5776 bool ForCodeSize) const {
5777 // TODO: Maybe need more checks here after vector extension is supported.
5778 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5779 return false;
5780 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5781 return false;
5782 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5783}
5784
5786 return true;
5787}
5788
5790 return true;
5791}
5792
5793bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5794 const Instruction *I) const {
5795 if (!Subtarget.is64Bit())
5796 return isa<LoadInst>(I) || isa<StoreInst>(I);
5797
5798 if (isa<LoadInst>(I))
5799 return true;
5800
5801 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5802 // require fences beacuse we can use amswap_db.[w/d].
5803 Type *Ty = I->getOperand(0)->getType();
5804 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5805 unsigned Size = Ty->getIntegerBitWidth();
5806 return (Size == 8 || Size == 16);
5807 }
5808
5809 return false;
5810}
5811
5813 LLVMContext &Context,
5814 EVT VT) const {
5815 if (!VT.isVector())
5816 return getPointerTy(DL);
5818}
5819
5821 // TODO: Support vectors.
5822 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5823}
5824
5826 const CallInst &I,
5827 MachineFunction &MF,
5828 unsigned Intrinsic) const {
5829 switch (Intrinsic) {
5830 default:
5831 return false;
5832 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5833 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5834 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5835 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5837 Info.memVT = MVT::i32;
5838 Info.ptrVal = I.getArgOperand(0);
5839 Info.offset = 0;
5840 Info.align = Align(4);
5843 return true;
5844 // TODO: Add more Intrinsics later.
5845 }
5846}
5847
5848// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5849// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5850// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5851// regression, we need to implement it manually.
5854
5856 Op == AtomicRMWInst::And) &&
5857 "Unable to expand");
5858 unsigned MinWordSize = 4;
5859
5860 IRBuilder<> Builder(AI);
5861 LLVMContext &Ctx = Builder.getContext();
5862 const DataLayout &DL = AI->getDataLayout();
5863 Type *ValueType = AI->getType();
5864 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
5865
5866 Value *Addr = AI->getPointerOperand();
5867 PointerType *PtrTy = cast<PointerType>(Addr->getType());
5868 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
5869
5870 Value *AlignedAddr = Builder.CreateIntrinsic(
5871 Intrinsic::ptrmask, {PtrTy, IntTy},
5872 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
5873 "AlignedAddr");
5874
5875 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
5876 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
5877 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
5878 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
5879 Value *Mask = Builder.CreateShl(
5880 ConstantInt::get(WordType,
5881 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
5882 ShiftAmt, "Mask");
5883 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
5884 Value *ValOperand_Shifted =
5885 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
5886 ShiftAmt, "ValOperand_Shifted");
5887 Value *NewOperand;
5888 if (Op == AtomicRMWInst::And)
5889 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
5890 else
5891 NewOperand = ValOperand_Shifted;
5892
5893 AtomicRMWInst *NewAI =
5894 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
5895 AI->getOrdering(), AI->getSyncScopeID());
5896
5897 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
5898 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
5899 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
5900 AI->replaceAllUsesWith(FinalOldResult);
5901 AI->eraseFromParent();
5902}
5903
5906 // TODO: Add more AtomicRMWInst that needs to be extended.
5907
5908 // Since floating-point operation requires a non-trivial set of data
5909 // operations, use CmpXChg to expand.
5910 if (AI->isFloatingPointOperation() ||
5916
5917 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
5920 AI->getOperation() == AtomicRMWInst::Sub)) {
5922 }
5923
5924 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5925 if (Subtarget.hasLAMCAS()) {
5926 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
5930 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
5932 }
5933
5934 if (Size == 8 || Size == 16)
5937}
5938
5939static Intrinsic::ID
5941 AtomicRMWInst::BinOp BinOp) {
5942 if (GRLen == 64) {
5943 switch (BinOp) {
5944 default:
5945 llvm_unreachable("Unexpected AtomicRMW BinOp");
5947 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5948 case AtomicRMWInst::Add:
5949 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5950 case AtomicRMWInst::Sub:
5951 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5953 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5955 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5957 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5958 case AtomicRMWInst::Max:
5959 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5960 case AtomicRMWInst::Min:
5961 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5962 // TODO: support other AtomicRMWInst.
5963 }
5964 }
5965
5966 if (GRLen == 32) {
5967 switch (BinOp) {
5968 default:
5969 llvm_unreachable("Unexpected AtomicRMW BinOp");
5971 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5972 case AtomicRMWInst::Add:
5973 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5974 case AtomicRMWInst::Sub:
5975 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5977 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5978 // TODO: support other AtomicRMWInst.
5979 }
5980 }
5981
5982 llvm_unreachable("Unexpected GRLen\n");
5983}
5984
5987 AtomicCmpXchgInst *CI) const {
5988
5989 if (Subtarget.hasLAMCAS())
5991
5993 if (Size == 8 || Size == 16)
5996}
5997
5999 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
6000 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
6001 AtomicOrdering FailOrd = CI->getFailureOrdering();
6002 Value *FailureOrdering =
6003 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
6004
6005 // TODO: Support cmpxchg on LA32.
6006 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
6007 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
6008 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
6009 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6010 Type *Tys[] = {AlignedAddr->getType()};
6011 Value *Result = Builder.CreateIntrinsic(
6012 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
6013 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6014 return Result;
6015}
6016
6018 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
6019 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
6020 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
6021 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
6022 // mask, as this produces better code than the LL/SC loop emitted by
6023 // int_loongarch_masked_atomicrmw_xchg.
6024 if (AI->getOperation() == AtomicRMWInst::Xchg &&
6025 isa<ConstantInt>(AI->getValOperand())) {
6026 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
6027 if (CVal->isZero())
6028 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
6029 Builder.CreateNot(Mask, "Inv_Mask"),
6030 AI->getAlign(), Ord);
6031 if (CVal->isMinusOne())
6032 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
6033 AI->getAlign(), Ord);
6034 }
6035
6036 unsigned GRLen = Subtarget.getGRLen();
6037 Value *Ordering =
6038 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
6039 Type *Tys[] = {AlignedAddr->getType()};
6041 AI->getModule(),
6043
6044 if (GRLen == 64) {
6045 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6046 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6047 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6048 }
6049
6050 Value *Result;
6051
6052 // Must pass the shift amount needed to sign extend the loaded value prior
6053 // to performing a signed comparison for min/max. ShiftAmt is the number of
6054 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
6055 // is the number of bits to left+right shift the value in order to
6056 // sign-extend.
6057 if (AI->getOperation() == AtomicRMWInst::Min ||
6059 const DataLayout &DL = AI->getDataLayout();
6060 unsigned ValWidth =
6061 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6062 Value *SextShamt =
6063 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
6064 Result = Builder.CreateCall(LlwOpScwLoop,
6065 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6066 } else {
6067 Result =
6068 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6069 }
6070
6071 if (GRLen == 64)
6072 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6073 return Result;
6074}
6075
6077 const MachineFunction &MF, EVT VT) const {
6078 VT = VT.getScalarType();
6079
6080 if (!VT.isSimple())
6081 return false;
6082
6083 switch (VT.getSimpleVT().SimpleTy) {
6084 case MVT::f32:
6085 case MVT::f64:
6086 return true;
6087 default:
6088 break;
6089 }
6090
6091 return false;
6092}
6093
6095 const Constant *PersonalityFn) const {
6096 return LoongArch::R4;
6097}
6098
6100 const Constant *PersonalityFn) const {
6101 return LoongArch::R5;
6102}
6103
6104//===----------------------------------------------------------------------===//
6105// Target Optimization Hooks
6106//===----------------------------------------------------------------------===//
6107
6109 const LoongArchSubtarget &Subtarget) {
6110 // Feature FRECIPE instrucions relative accuracy is 2^-14.
6111 // IEEE float has 23 digits and double has 52 digits.
6112 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
6113 return RefinementSteps;
6114}
6115
6117 SelectionDAG &DAG, int Enabled,
6118 int &RefinementSteps,
6119 bool &UseOneConstNR,
6120 bool Reciprocal) const {
6121 if (Subtarget.hasFrecipe()) {
6122 SDLoc DL(Operand);
6123 EVT VT = Operand.getValueType();
6124
6125 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6126 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6127 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6128 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6129 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6130
6131 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6132 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6133
6134 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
6135 if (Reciprocal)
6136 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
6137
6138 return Estimate;
6139 }
6140 }
6141
6142 return SDValue();
6143}
6144
6146 SelectionDAG &DAG,
6147 int Enabled,
6148 int &RefinementSteps) const {
6149 if (Subtarget.hasFrecipe()) {
6150 SDLoc DL(Operand);
6151 EVT VT = Operand.getValueType();
6152
6153 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6154 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6155 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6156 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6157 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6158
6159 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6160 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6161
6162 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
6163 }
6164 }
6165
6166 return SDValue();
6167}
6168
6169//===----------------------------------------------------------------------===//
6170// LoongArch Inline Assembly Support
6171//===----------------------------------------------------------------------===//
6172
6174LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
6175 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6176 //
6177 // 'f': A floating-point register (if available).
6178 // 'k': A memory operand whose address is formed by a base register and
6179 // (optionally scaled) index register.
6180 // 'l': A signed 16-bit constant.
6181 // 'm': A memory operand whose address is formed by a base register and
6182 // offset that is suitable for use in instructions with the same
6183 // addressing mode as st.w and ld.w.
6184 // 'I': A signed 12-bit constant (for arithmetic instructions).
6185 // 'J': Integer zero.
6186 // 'K': An unsigned 12-bit constant (for logic instructions).
6187 // "ZB": An address that is held in a general-purpose register. The offset is
6188 // zero.
6189 // "ZC": A memory operand whose address is formed by a base register and
6190 // offset that is suitable for use in instructions with the same
6191 // addressing mode as ll.w and sc.w.
6192 if (Constraint.size() == 1) {
6193 switch (Constraint[0]) {
6194 default:
6195 break;
6196 case 'f':
6197 return C_RegisterClass;
6198 case 'l':
6199 case 'I':
6200 case 'J':
6201 case 'K':
6202 return C_Immediate;
6203 case 'k':
6204 return C_Memory;
6205 }
6206 }
6207
6208 if (Constraint == "ZC" || Constraint == "ZB")
6209 return C_Memory;
6210
6211 // 'm' is handled here.
6212 return TargetLowering::getConstraintType(Constraint);
6213}
6214
6215InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
6216 StringRef ConstraintCode) const {
6217 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
6222}
6223
6224std::pair<unsigned, const TargetRegisterClass *>
6225LoongArchTargetLowering::getRegForInlineAsmConstraint(
6226 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
6227 // First, see if this is a constraint that directly corresponds to a LoongArch
6228 // register class.
6229 if (Constraint.size() == 1) {
6230 switch (Constraint[0]) {
6231 case 'r':
6232 // TODO: Support fixed vectors up to GRLen?
6233 if (VT.isVector())
6234 break;
6235 return std::make_pair(0U, &LoongArch::GPRRegClass);
6236 case 'f':
6237 if (Subtarget.hasBasicF() && VT == MVT::f32)
6238 return std::make_pair(0U, &LoongArch::FPR32RegClass);
6239 if (Subtarget.hasBasicD() && VT == MVT::f64)
6240 return std::make_pair(0U, &LoongArch::FPR64RegClass);
6241 if (Subtarget.hasExtLSX() &&
6242 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
6243 return std::make_pair(0U, &LoongArch::LSX128RegClass);
6244 if (Subtarget.hasExtLASX() &&
6245 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
6246 return std::make_pair(0U, &LoongArch::LASX256RegClass);
6247 break;
6248 default:
6249 break;
6250 }
6251 }
6252
6253 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6254 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6255 // constraints while the official register name is prefixed with a '$'. So we
6256 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6257 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6258 // case insensitive, so no need to convert the constraint to upper case here.
6259 //
6260 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6261 // decode the usage of register name aliases into their official names. And
6262 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6263 // official register names.
6264 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
6265 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
6266 bool IsFP = Constraint[2] == 'f';
6267 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
6268 std::pair<unsigned, const TargetRegisterClass *> R;
6270 TRI, join_items("", Temp.first, Temp.second), VT);
6271 // Match those names to the widest floating point register type available.
6272 if (IsFP) {
6273 unsigned RegNo = R.first;
6274 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
6275 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
6276 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
6277 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
6278 }
6279 }
6280 }
6281 return R;
6282 }
6283
6284 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6285}
6286
6287void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6288 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
6289 SelectionDAG &DAG) const {
6290 // Currently only support length 1 constraints.
6291 if (Constraint.size() == 1) {
6292 switch (Constraint[0]) {
6293 case 'l':
6294 // Validate & create a 16-bit signed immediate operand.
6295 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6296 uint64_t CVal = C->getSExtValue();
6297 if (isInt<16>(CVal))
6298 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6299 Subtarget.getGRLenVT()));
6300 }
6301 return;
6302 case 'I':
6303 // Validate & create a 12-bit signed immediate operand.
6304 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6305 uint64_t CVal = C->getSExtValue();
6306 if (isInt<12>(CVal))
6307 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6308 Subtarget.getGRLenVT()));
6309 }
6310 return;
6311 case 'J':
6312 // Validate & create an integer zero operand.
6313 if (auto *C = dyn_cast<ConstantSDNode>(Op))
6314 if (C->getZExtValue() == 0)
6315 Ops.push_back(
6316 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
6317 return;
6318 case 'K':
6319 // Validate & create a 12-bit unsigned immediate operand.
6320 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6321 uint64_t CVal = C->getZExtValue();
6322 if (isUInt<12>(CVal))
6323 Ops.push_back(
6324 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6325 }
6326 return;
6327 default:
6328 break;
6329 }
6330 }
6331 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6332}
6333
6334#define GET_REGISTER_MATCHER
6335#include "LoongArchGenAsmMatcher.inc"
6336
6339 const MachineFunction &MF) const {
6340 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6341 std::string NewRegName = Name.second.str();
6342 Register Reg = MatchRegisterAltName(NewRegName);
6343 if (Reg == LoongArch::NoRegister)
6344 Reg = MatchRegisterName(NewRegName);
6345 if (Reg == LoongArch::NoRegister)
6347 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6348 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6349 if (!ReservedRegs.test(Reg))
6350 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6351 StringRef(RegName) + "\"."));
6352 return Reg;
6353}
6354
6356 EVT VT, SDValue C) const {
6357 // TODO: Support vectors.
6358 if (!VT.isScalarInteger())
6359 return false;
6360
6361 // Omit the optimization if the data size exceeds GRLen.
6362 if (VT.getSizeInBits() > Subtarget.getGRLen())
6363 return false;
6364
6365 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6366 const APInt &Imm = ConstNode->getAPIntValue();
6367 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6368 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6369 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6370 return true;
6371 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6372 if (ConstNode->hasOneUse() &&
6373 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6374 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6375 return true;
6376 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6377 // in which the immediate has two set bits. Or Break (MUL x, imm)
6378 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6379 // equals to (1 << s0) - (1 << s1).
6380 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6381 unsigned Shifts = Imm.countr_zero();
6382 // Reject immediates which can be composed via a single LUI.
6383 if (Shifts >= 12)
6384 return false;
6385 // Reject multiplications can be optimized to
6386 // (SLLI (ALSL x, x, 1/2/3/4), s).
6387 APInt ImmPop = Imm.ashr(Shifts);
6388 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6389 return false;
6390 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6391 // since it needs one more instruction than other 3 cases.
6392 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6393 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6394 (ImmSmall - Imm).isPowerOf2())
6395 return true;
6396 }
6397 }
6398
6399 return false;
6400}
6401
6403 const AddrMode &AM,
6404 Type *Ty, unsigned AS,
6405 Instruction *I) const {
6406 // LoongArch has four basic addressing modes:
6407 // 1. reg
6408 // 2. reg + 12-bit signed offset
6409 // 3. reg + 14-bit signed offset left-shifted by 2
6410 // 4. reg1 + reg2
6411 // TODO: Add more checks after support vector extension.
6412
6413 // No global is ever allowed as a base.
6414 if (AM.BaseGV)
6415 return false;
6416
6417 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6418 // with `UAL` feature.
6419 if (!isInt<12>(AM.BaseOffs) &&
6420 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6421 return false;
6422
6423 switch (AM.Scale) {
6424 case 0:
6425 // "r+i" or just "i", depending on HasBaseReg.
6426 break;
6427 case 1:
6428 // "r+r+i" is not allowed.
6429 if (AM.HasBaseReg && AM.BaseOffs)
6430 return false;
6431 // Otherwise we have "r+r" or "r+i".
6432 break;
6433 case 2:
6434 // "2*r+r" or "2*r+i" is not allowed.
6435 if (AM.HasBaseReg || AM.BaseOffs)
6436 return false;
6437 // Allow "2*r" as "r+r".
6438 break;
6439 default:
6440 return false;
6441 }
6442
6443 return true;
6444}
6445
6447 return isInt<12>(Imm);
6448}
6449
6451 return isInt<12>(Imm);
6452}
6453
6455 // Zexts are free if they can be combined with a load.
6456 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6457 // poorly with type legalization of compares preferring sext.
6458 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6459 EVT MemVT = LD->getMemoryVT();
6460 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6461 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6462 LD->getExtensionType() == ISD::ZEXTLOAD))
6463 return true;
6464 }
6465
6466 return TargetLowering::isZExtFree(Val, VT2);
6467}
6468
6470 EVT DstVT) const {
6471 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6472}
6473
6475 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6476}
6477
6479 // TODO: Support vectors.
6480 if (Y.getValueType().isVector())
6481 return false;
6482
6483 return !isa<ConstantSDNode>(Y);
6484}
6485
6487 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6488 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
6489}
6490
6492 Type *Ty, bool IsSigned) const {
6493 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
6494 return true;
6495
6496 return IsSigned;
6497}
6498
6500 // Return false to suppress the unnecessary extensions if the LibCall
6501 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6502 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6503 Type.getSizeInBits() < Subtarget.getGRLen()))
6504 return false;
6505 return true;
6506}
6507
6508// memcpy, and other memory intrinsics, typically tries to use wider load/store
6509// if the source/dest is aligned and the copy size is large enough. We therefore
6510// want to align such objects passed to memory intrinsics.
6512 unsigned &MinSize,
6513 Align &PrefAlign) const {
6514 if (!isa<MemIntrinsic>(CI))
6515 return false;
6516
6517 if (Subtarget.is64Bit()) {
6518 MinSize = 8;
6519 PrefAlign = Align(8);
6520 } else {
6521 MinSize = 4;
6522 PrefAlign = Align(4);
6523 }
6524
6525 return true;
6526}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
Value * getPointerOperand()
Definition: Instructions.h:870
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:861
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool isDSOLocal() const
Definition: GlobalValue.h:306
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
LLVMContext & getContext() const
Definition: IRBuilder.h:195
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Class to represent pointers.
Definition: DerivedTypes.h:670
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:703
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:287
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
@ None
Definition: CodeGenData.h:106
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:195
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)