LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
103
104 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
105 // we get to know which of sll and revb.2h is faster.
108
109 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
110 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
111 // and i32 could still be byte-swapped relatively cheaply.
113
119
122
123 // Set operations for LA64 only.
124
125 if (Subtarget.is64Bit()) {
143
147 Custom);
149 }
150
151 // Set operations for LA32 only.
152
153 if (!Subtarget.is64Bit()) {
159 }
160
162
163 static const ISD::CondCode FPCCToExpand[] = {
166
167 // Set operations for 'F' feature.
168
169 if (Subtarget.hasBasicF()) {
170 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
171 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
172 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
173
189
190 if (Subtarget.is64Bit())
192
193 if (!Subtarget.hasBasicD()) {
195 if (Subtarget.is64Bit()) {
198 }
199 }
200 }
201
202 // Set operations for 'D' feature.
203
204 if (Subtarget.hasBasicD()) {
205 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
206 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
207 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
208 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
209 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
210
226
227 if (Subtarget.is64Bit())
229 }
230
231 // Set operations for 'LSX' feature.
232
233 if (Subtarget.hasExtLSX()) {
235 // Expand all truncating stores and extending loads.
236 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
237 setTruncStoreAction(VT, InnerVT, Expand);
240 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
241 }
242 // By default everything must be expanded. Then we will selectively turn
243 // on ones that can be effectively codegen'd.
244 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
246 }
247
248 for (MVT VT : LSXVTs) {
252
256
260 }
261 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
264 Legal);
266 VT, Legal);
273 Expand);
275 }
276 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
278 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
280 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
283 }
284 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
292 VT, Expand);
294 }
296 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
297 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
298 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
299 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
300 }
301
302 // Set operations for 'LASX' feature.
303
304 if (Subtarget.hasExtLASX()) {
305 for (MVT VT : LASXVTs) {
309
314
318 }
319 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
322 Legal);
324 VT, Legal);
331 Expand);
333 }
334 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
336 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
338 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
341 }
342 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
350 VT, Expand);
352 }
353 }
354
355 // Set DAG combine for LA32 and LA64.
356
361
362 // Set DAG combine for 'LSX' feature.
363
364 if (Subtarget.hasExtLSX())
366
367 // Compute derived properties from the register classes.
369
371
374
376
378
379 // Function alignments.
381 // Set preferred alignments.
385
386 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
387 if (Subtarget.hasLAMCAS())
389
390 if (Subtarget.hasSCQ()) {
393 }
394}
395
397 const GlobalAddressSDNode *GA) const {
398 // In order to maximise the opportunity for common subexpression elimination,
399 // keep a separate ADD node for the global address offset instead of folding
400 // it in the global address node. Later peephole optimisations may choose to
401 // fold it back in when profitable.
402 return false;
403}
404
406 SelectionDAG &DAG) const {
407 switch (Op.getOpcode()) {
409 return lowerATOMIC_FENCE(Op, DAG);
411 return lowerEH_DWARF_CFA(Op, DAG);
413 return lowerGlobalAddress(Op, DAG);
415 return lowerGlobalTLSAddress(Op, DAG);
417 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
419 return lowerINTRINSIC_W_CHAIN(Op, DAG);
421 return lowerINTRINSIC_VOID(Op, DAG);
423 return lowerBlockAddress(Op, DAG);
424 case ISD::JumpTable:
425 return lowerJumpTable(Op, DAG);
426 case ISD::SHL_PARTS:
427 return lowerShiftLeftParts(Op, DAG);
428 case ISD::SRA_PARTS:
429 return lowerShiftRightParts(Op, DAG, true);
430 case ISD::SRL_PARTS:
431 return lowerShiftRightParts(Op, DAG, false);
433 return lowerConstantPool(Op, DAG);
434 case ISD::FP_TO_SINT:
435 return lowerFP_TO_SINT(Op, DAG);
436 case ISD::BITCAST:
437 return lowerBITCAST(Op, DAG);
438 case ISD::UINT_TO_FP:
439 return lowerUINT_TO_FP(Op, DAG);
440 case ISD::SINT_TO_FP:
441 return lowerSINT_TO_FP(Op, DAG);
442 case ISD::VASTART:
443 return lowerVASTART(Op, DAG);
444 case ISD::FRAMEADDR:
445 return lowerFRAMEADDR(Op, DAG);
446 case ISD::RETURNADDR:
447 return lowerRETURNADDR(Op, DAG);
449 return lowerWRITE_REGISTER(Op, DAG);
451 return lowerINSERT_VECTOR_ELT(Op, DAG);
453 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
455 return lowerBUILD_VECTOR(Op, DAG);
457 return lowerVECTOR_SHUFFLE(Op, DAG);
458 case ISD::BITREVERSE:
459 return lowerBITREVERSE(Op, DAG);
461 return lowerSCALAR_TO_VECTOR(Op, DAG);
462 }
463 return SDValue();
464}
465
467LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
468 SelectionDAG &DAG) const {
469 SDLoc DL(Op);
470 MVT OpVT = Op.getSimpleValueType();
471
472 SDValue Vector = DAG.getUNDEF(OpVT);
473 SDValue Val = Op.getOperand(0);
474 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
475
476 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
477}
478
479SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
480 SelectionDAG &DAG) const {
481 EVT ResTy = Op->getValueType(0);
482 SDValue Src = Op->getOperand(0);
483 SDLoc DL(Op);
484
485 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
486 unsigned int OrigEltNum = ResTy.getVectorNumElements();
487 unsigned int NewEltNum = NewVT.getVectorNumElements();
488
489 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
490
492 for (unsigned int i = 0; i < NewEltNum; i++) {
493 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
494 DAG.getConstant(i, DL, MVT::i64));
495 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
496 ? (unsigned)LoongArchISD::BITREV_8B
498 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
499 }
500 SDValue Res =
501 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
502
503 switch (ResTy.getSimpleVT().SimpleTy) {
504 default:
505 return SDValue();
506 case MVT::v16i8:
507 case MVT::v32i8:
508 return Res;
509 case MVT::v8i16:
510 case MVT::v16i16:
511 case MVT::v4i32:
512 case MVT::v8i32: {
514 for (unsigned int i = 0; i < NewEltNum; i++)
515 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
516 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
517 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
518 }
519 }
520}
521
522/// Determine whether a range fits a regular pattern of values.
523/// This function accounts for the possibility of jumping over the End iterator.
524template <typename ValType>
525static bool
527 unsigned CheckStride,
529 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
530 auto &I = Begin;
531
532 while (I != End) {
533 if (*I != -1 && *I != ExpectedIndex)
534 return false;
535 ExpectedIndex += ExpectedIndexStride;
536
537 // Incrementing past End is undefined behaviour so we must increment one
538 // step at a time and check for End at each step.
539 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
540 ; // Empty loop body.
541 }
542 return true;
543}
544
545/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
546///
547/// VREPLVEI performs vector broadcast based on an element specified by an
548/// integer immediate, with its mask being similar to:
549/// <x, x, x, ...>
550/// where x is any valid index.
551///
552/// When undef's appear in the mask they are treated as if they were whatever
553/// value is necessary in order to fit the above form.
555 MVT VT, SDValue V1, SDValue V2,
556 SelectionDAG &DAG) {
557 int SplatIndex = -1;
558 for (const auto &M : Mask) {
559 if (M != -1) {
560 SplatIndex = M;
561 break;
562 }
563 }
564
565 if (SplatIndex == -1)
566 return DAG.getUNDEF(VT);
567
568 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
569 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
570 APInt Imm(64, SplatIndex);
571 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
572 DAG.getConstant(Imm, DL, MVT::i64));
573 }
574
575 return SDValue();
576}
577
578/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
579///
580/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
581/// elements according to a <4 x i2> constant (encoded as an integer immediate).
582///
583/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
584/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
585/// When undef's appear they are treated as if they were whatever value is
586/// necessary in order to fit the above forms.
587///
588/// For example:
589/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
590/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
591/// i32 7, i32 6, i32 5, i32 4>
592/// is lowered to:
593/// (VSHUF4I_H $v0, $v1, 27)
594/// where the 27 comes from:
595/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
597 MVT VT, SDValue V1, SDValue V2,
598 SelectionDAG &DAG) {
599
600 // When the size is less than 4, lower cost instructions may be used.
601 if (Mask.size() < 4)
602 return SDValue();
603
604 int SubMask[4] = {-1, -1, -1, -1};
605 for (unsigned i = 0; i < 4; ++i) {
606 for (unsigned j = i; j < Mask.size(); j += 4) {
607 int Idx = Mask[j];
608
609 // Convert from vector index to 4-element subvector index
610 // If an index refers to an element outside of the subvector then give up
611 if (Idx != -1) {
612 Idx -= 4 * (j / 4);
613 if (Idx < 0 || Idx >= 4)
614 return SDValue();
615 }
616
617 // If the mask has an undef, replace it with the current index.
618 // Note that it might still be undef if the current index is also undef
619 if (SubMask[i] == -1)
620 SubMask[i] = Idx;
621 // Check that non-undef values are the same as in the mask. If they
622 // aren't then give up
623 else if (Idx != -1 && Idx != SubMask[i])
624 return SDValue();
625 }
626 }
627
628 // Calculate the immediate. Replace any remaining undefs with zero
629 APInt Imm(64, 0);
630 for (int i = 3; i >= 0; --i) {
631 int Idx = SubMask[i];
632
633 if (Idx == -1)
634 Idx = 0;
635
636 Imm <<= 2;
637 Imm |= Idx & 0x3;
638 }
639
640 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
641 DAG.getConstant(Imm, DL, MVT::i64));
642}
643
644/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
645///
646/// VPACKEV interleaves the even elements from each vector.
647///
648/// It is possible to lower into VPACKEV when the mask consists of two of the
649/// following forms interleaved:
650/// <0, 2, 4, ...>
651/// <n, n+2, n+4, ...>
652/// where n is the number of elements in the vector.
653/// For example:
654/// <0, 0, 2, 2, 4, 4, ...>
655/// <0, n, 2, n+2, 4, n+4, ...>
656///
657/// When undef's appear in the mask they are treated as if they were whatever
658/// value is necessary in order to fit the above forms.
660 MVT VT, SDValue V1, SDValue V2,
661 SelectionDAG &DAG) {
662
663 const auto &Begin = Mask.begin();
664 const auto &End = Mask.end();
665 SDValue OriV1 = V1, OriV2 = V2;
666
667 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
668 V1 = OriV1;
669 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
670 V1 = OriV2;
671 else
672 return SDValue();
673
674 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
675 V2 = OriV1;
676 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
677 V2 = OriV2;
678 else
679 return SDValue();
680
681 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
682}
683
684/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
685///
686/// VPACKOD interleaves the odd elements from each vector.
687///
688/// It is possible to lower into VPACKOD when the mask consists of two of the
689/// following forms interleaved:
690/// <1, 3, 5, ...>
691/// <n+1, n+3, n+5, ...>
692/// where n is the number of elements in the vector.
693/// For example:
694/// <1, 1, 3, 3, 5, 5, ...>
695/// <1, n+1, 3, n+3, 5, n+5, ...>
696///
697/// When undef's appear in the mask they are treated as if they were whatever
698/// value is necessary in order to fit the above forms.
700 MVT VT, SDValue V1, SDValue V2,
701 SelectionDAG &DAG) {
702
703 const auto &Begin = Mask.begin();
704 const auto &End = Mask.end();
705 SDValue OriV1 = V1, OriV2 = V2;
706
707 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
708 V1 = OriV1;
709 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
710 V1 = OriV2;
711 else
712 return SDValue();
713
714 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
715 V2 = OriV1;
716 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
717 V2 = OriV2;
718 else
719 return SDValue();
720
721 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
722}
723
724/// Lower VECTOR_SHUFFLE into VILVH (if possible).
725///
726/// VILVH interleaves consecutive elements from the left (highest-indexed) half
727/// of each vector.
728///
729/// It is possible to lower into VILVH when the mask consists of two of the
730/// following forms interleaved:
731/// <x, x+1, x+2, ...>
732/// <n+x, n+x+1, n+x+2, ...>
733/// where n is the number of elements in the vector and x is half n.
734/// For example:
735/// <x, x, x+1, x+1, x+2, x+2, ...>
736/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
737///
738/// When undef's appear in the mask they are treated as if they were whatever
739/// value is necessary in order to fit the above forms.
741 MVT VT, SDValue V1, SDValue V2,
742 SelectionDAG &DAG) {
743
744 const auto &Begin = Mask.begin();
745 const auto &End = Mask.end();
746 unsigned HalfSize = Mask.size() / 2;
747 SDValue OriV1 = V1, OriV2 = V2;
748
749 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
750 V1 = OriV1;
751 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
752 V1 = OriV2;
753 else
754 return SDValue();
755
756 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
757 V2 = OriV1;
758 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
759 1))
760 V2 = OriV2;
761 else
762 return SDValue();
763
764 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
765}
766
767/// Lower VECTOR_SHUFFLE into VILVL (if possible).
768///
769/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
770/// of each vector.
771///
772/// It is possible to lower into VILVL when the mask consists of two of the
773/// following forms interleaved:
774/// <0, 1, 2, ...>
775/// <n, n+1, n+2, ...>
776/// where n is the number of elements in the vector.
777/// For example:
778/// <0, 0, 1, 1, 2, 2, ...>
779/// <0, n, 1, n+1, 2, n+2, ...>
780///
781/// When undef's appear in the mask they are treated as if they were whatever
782/// value is necessary in order to fit the above forms.
784 MVT VT, SDValue V1, SDValue V2,
785 SelectionDAG &DAG) {
786
787 const auto &Begin = Mask.begin();
788 const auto &End = Mask.end();
789 SDValue OriV1 = V1, OriV2 = V2;
790
791 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
792 V1 = OriV1;
793 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
794 V1 = OriV2;
795 else
796 return SDValue();
797
798 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
799 V2 = OriV1;
800 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
801 V2 = OriV2;
802 else
803 return SDValue();
804
805 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
806}
807
808/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
809///
810/// VPICKEV copies the even elements of each vector into the result vector.
811///
812/// It is possible to lower into VPICKEV when the mask consists of two of the
813/// following forms concatenated:
814/// <0, 2, 4, ...>
815/// <n, n+2, n+4, ...>
816/// where n is the number of elements in the vector.
817/// For example:
818/// <0, 2, 4, ..., 0, 2, 4, ...>
819/// <0, 2, 4, ..., n, n+2, n+4, ...>
820///
821/// When undef's appear in the mask they are treated as if they were whatever
822/// value is necessary in order to fit the above forms.
824 MVT VT, SDValue V1, SDValue V2,
825 SelectionDAG &DAG) {
826
827 const auto &Begin = Mask.begin();
828 const auto &Mid = Mask.begin() + Mask.size() / 2;
829 const auto &End = Mask.end();
830 SDValue OriV1 = V1, OriV2 = V2;
831
832 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
833 V1 = OriV1;
834 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
835 V1 = OriV2;
836 else
837 return SDValue();
838
839 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
840 V2 = OriV1;
841 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
842 V2 = OriV2;
843
844 else
845 return SDValue();
846
847 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
848}
849
850/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
851///
852/// VPICKOD copies the odd elements of each vector into the result vector.
853///
854/// It is possible to lower into VPICKOD when the mask consists of two of the
855/// following forms concatenated:
856/// <1, 3, 5, ...>
857/// <n+1, n+3, n+5, ...>
858/// where n is the number of elements in the vector.
859/// For example:
860/// <1, 3, 5, ..., 1, 3, 5, ...>
861/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
862///
863/// When undef's appear in the mask they are treated as if they were whatever
864/// value is necessary in order to fit the above forms.
866 MVT VT, SDValue V1, SDValue V2,
867 SelectionDAG &DAG) {
868
869 const auto &Begin = Mask.begin();
870 const auto &Mid = Mask.begin() + Mask.size() / 2;
871 const auto &End = Mask.end();
872 SDValue OriV1 = V1, OriV2 = V2;
873
874 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
875 V1 = OriV1;
876 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
877 V1 = OriV2;
878 else
879 return SDValue();
880
881 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
882 V2 = OriV1;
883 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
884 V2 = OriV2;
885 else
886 return SDValue();
887
888 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
889}
890
891/// Lower VECTOR_SHUFFLE into VSHUF.
892///
893/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
894/// adding it as an operand to the resulting VSHUF.
896 MVT VT, SDValue V1, SDValue V2,
897 SelectionDAG &DAG) {
898
900 for (auto M : Mask)
901 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
902
903 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
904 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
905
906 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
907 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
908 // VSHF concatenates the vectors in a bitwise fashion:
909 // <0b00, 0b01> + <0b10, 0b11> ->
910 // 0b0100 + 0b1110 -> 0b01001110
911 // <0b10, 0b11, 0b00, 0b01>
912 // We must therefore swap the operands to get the correct result.
913 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
914}
915
916/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
917///
918/// This routine breaks down the specific type of 128-bit shuffle and
919/// dispatches to the lowering routines accordingly.
921 SDValue V1, SDValue V2, SelectionDAG &DAG) {
922 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
923 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
924 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
925 "Vector type is unsupported for lsx!");
926 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
927 "Two operands have different types!");
928 assert(VT.getVectorNumElements() == Mask.size() &&
929 "Unexpected mask size for shuffle!");
930 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
931
932 SDValue Result;
933 // TODO: Add more comparison patterns.
934 if (V2.isUndef()) {
935 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
936 return Result;
937 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
938 return Result;
939
940 // TODO: This comment may be enabled in the future to better match the
941 // pattern for instruction selection.
942 /* V2 = V1; */
943 }
944
945 // It is recommended not to change the pattern comparison order for better
946 // performance.
947 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
948 return Result;
949 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
950 return Result;
951 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
952 return Result;
953 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
954 return Result;
955 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
956 return Result;
957 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
958 return Result;
959 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
960 return Result;
961
962 return SDValue();
963}
964
965/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
966///
967/// It is a XVREPLVEI when the mask is:
968/// <x, x, x, ..., x+n, x+n, x+n, ...>
969/// where the number of x is equal to n and n is half the length of vector.
970///
971/// When undef's appear in the mask they are treated as if they were whatever
972/// value is necessary in order to fit the above form.
974 ArrayRef<int> Mask, MVT VT,
975 SDValue V1, SDValue V2,
976 SelectionDAG &DAG) {
977 int SplatIndex = -1;
978 for (const auto &M : Mask) {
979 if (M != -1) {
980 SplatIndex = M;
981 break;
982 }
983 }
984
985 if (SplatIndex == -1)
986 return DAG.getUNDEF(VT);
987
988 const auto &Begin = Mask.begin();
989 const auto &End = Mask.end();
990 unsigned HalfSize = Mask.size() / 2;
991
992 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
993 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
994 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
995 0)) {
996 APInt Imm(64, SplatIndex);
997 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
998 DAG.getConstant(Imm, DL, MVT::i64));
999 }
1000
1001 return SDValue();
1002}
1003
1004/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1006 MVT VT, SDValue V1, SDValue V2,
1007 SelectionDAG &DAG) {
1008 // When the size is less than or equal to 4, lower cost instructions may be
1009 // used.
1010 if (Mask.size() <= 4)
1011 return SDValue();
1012 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
1013}
1014
1015/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
1017 MVT VT, SDValue V1, SDValue V2,
1018 SelectionDAG &DAG) {
1019 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
1020}
1021
1022/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
1024 MVT VT, SDValue V1, SDValue V2,
1025 SelectionDAG &DAG) {
1026 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1027}
1028
1029/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1031 MVT VT, SDValue V1, SDValue V2,
1032 SelectionDAG &DAG) {
1033
1034 const auto &Begin = Mask.begin();
1035 const auto &End = Mask.end();
1036 unsigned HalfSize = Mask.size() / 2;
1037 unsigned LeftSize = HalfSize / 2;
1038 SDValue OriV1 = V1, OriV2 = V2;
1039
1040 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1041 1) &&
1042 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1043 V1 = OriV1;
1044 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1045 Mask.size() + HalfSize - LeftSize, 1) &&
1046 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1047 Mask.size() + HalfSize + LeftSize, 1))
1048 V1 = OriV2;
1049 else
1050 return SDValue();
1051
1052 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1053 1) &&
1054 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1055 1))
1056 V2 = OriV1;
1057 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1058 Mask.size() + HalfSize - LeftSize, 1) &&
1059 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1060 Mask.size() + HalfSize + LeftSize, 1))
1061 V2 = OriV2;
1062 else
1063 return SDValue();
1064
1065 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1066}
1067
1068/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1070 MVT VT, SDValue V1, SDValue V2,
1071 SelectionDAG &DAG) {
1072
1073 const auto &Begin = Mask.begin();
1074 const auto &End = Mask.end();
1075 unsigned HalfSize = Mask.size() / 2;
1076 SDValue OriV1 = V1, OriV2 = V2;
1077
1078 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1079 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1080 V1 = OriV1;
1081 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1082 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1083 Mask.size() + HalfSize, 1))
1084 V1 = OriV2;
1085 else
1086 return SDValue();
1087
1088 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1089 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1090 V2 = OriV1;
1091 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1092 1) &&
1093 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1094 Mask.size() + HalfSize, 1))
1095 V2 = OriV2;
1096 else
1097 return SDValue();
1098
1099 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1100}
1101
1102/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1104 MVT VT, SDValue V1, SDValue V2,
1105 SelectionDAG &DAG) {
1106
1107 const auto &Begin = Mask.begin();
1108 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1109 const auto &Mid = Mask.begin() + Mask.size() / 2;
1110 const auto &RightMid = Mask.end() - Mask.size() / 4;
1111 const auto &End = Mask.end();
1112 unsigned HalfSize = Mask.size() / 2;
1113 SDValue OriV1 = V1, OriV2 = V2;
1114
1115 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1116 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1117 V1 = OriV1;
1118 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1119 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1120 V1 = OriV2;
1121 else
1122 return SDValue();
1123
1124 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1125 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1126 V2 = OriV1;
1127 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1128 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1129 V2 = OriV2;
1130
1131 else
1132 return SDValue();
1133
1134 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1135}
1136
1137/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1139 MVT VT, SDValue V1, SDValue V2,
1140 SelectionDAG &DAG) {
1141
1142 const auto &Begin = Mask.begin();
1143 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1144 const auto &Mid = Mask.begin() + Mask.size() / 2;
1145 const auto &RightMid = Mask.end() - Mask.size() / 4;
1146 const auto &End = Mask.end();
1147 unsigned HalfSize = Mask.size() / 2;
1148 SDValue OriV1 = V1, OriV2 = V2;
1149
1150 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1151 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1152 V1 = OriV1;
1153 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1154 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1155 2))
1156 V1 = OriV2;
1157 else
1158 return SDValue();
1159
1160 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1161 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1162 V2 = OriV1;
1163 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1164 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1165 2))
1166 V2 = OriV2;
1167 else
1168 return SDValue();
1169
1170 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1171}
1172
1173/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1175 MVT VT, SDValue V1, SDValue V2,
1176 SelectionDAG &DAG) {
1177
1178 int MaskSize = Mask.size();
1179 int HalfSize = Mask.size() / 2;
1180 const auto &Begin = Mask.begin();
1181 const auto &Mid = Mask.begin() + HalfSize;
1182 const auto &End = Mask.end();
1183
1184 // VECTOR_SHUFFLE concatenates the vectors:
1185 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1186 // shuffling ->
1187 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1188 //
1189 // XVSHUF concatenates the vectors:
1190 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1191 // shuffling ->
1192 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1193 SmallVector<SDValue, 8> MaskAlloc;
1194 for (auto it = Begin; it < Mid; it++) {
1195 if (*it < 0) // UNDEF
1196 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1197 else if ((*it >= 0 && *it < HalfSize) ||
1198 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1199 int M = *it < HalfSize ? *it : *it - HalfSize;
1200 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1201 } else
1202 return SDValue();
1203 }
1204 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1205
1206 for (auto it = Mid; it < End; it++) {
1207 if (*it < 0) // UNDEF
1208 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1209 else if ((*it >= HalfSize && *it < MaskSize) ||
1210 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1211 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1212 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1213 } else
1214 return SDValue();
1215 }
1216 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1217
1218 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1219 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1220 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1221}
1222
1223/// Shuffle vectors by lane to generate more optimized instructions.
1224/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1225///
1226/// Therefore, except for the following four cases, other cases are regarded
1227/// as cross-lane shuffles, where optimization is relatively limited.
1228///
1229/// - Shuffle high, low lanes of two inputs vector
1230/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1231/// - Shuffle low, high lanes of two inputs vector
1232/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1233/// - Shuffle low, low lanes of two inputs vector
1234/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1235/// - Shuffle high, high lanes of two inputs vector
1236/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1237///
1238/// The first case is the closest to LoongArch instructions and the other
1239/// cases need to be converted to it for processing.
1240///
1241/// This function may modify V1, V2 and Mask
1243 MutableArrayRef<int> Mask, MVT VT,
1244 SDValue &V1, SDValue &V2,
1245 SelectionDAG &DAG) {
1246
1247 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1248
1249 int MaskSize = Mask.size();
1250 int HalfSize = Mask.size() / 2;
1251
1252 HalfMaskType preMask = None, postMask = None;
1253
1254 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1255 return M < 0 || (M >= 0 && M < HalfSize) ||
1256 (M >= MaskSize && M < MaskSize + HalfSize);
1257 }))
1258 preMask = HighLaneTy;
1259 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1260 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1261 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1262 }))
1263 preMask = LowLaneTy;
1264
1265 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1266 return M < 0 || (M >= 0 && M < HalfSize) ||
1267 (M >= MaskSize && M < MaskSize + HalfSize);
1268 }))
1269 postMask = HighLaneTy;
1270 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1271 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1272 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1273 }))
1274 postMask = LowLaneTy;
1275
1276 // The pre-half of mask is high lane type, and the post-half of mask
1277 // is low lane type, which is closest to the LoongArch instructions.
1278 //
1279 // Note: In the LoongArch architecture, the high lane of mask corresponds
1280 // to the lower 128-bit of vector register, and the low lane of mask
1281 // corresponds the higher 128-bit of vector register.
1282 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1283 return;
1284 }
1285 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1286 V1 = DAG.getBitcast(MVT::v4i64, V1);
1287 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1288 DAG.getConstant(0b01001110, DL, MVT::i64));
1289 V1 = DAG.getBitcast(VT, V1);
1290
1291 if (!V2.isUndef()) {
1292 V2 = DAG.getBitcast(MVT::v4i64, V2);
1293 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1294 DAG.getConstant(0b01001110, DL, MVT::i64));
1295 V2 = DAG.getBitcast(VT, V2);
1296 }
1297
1298 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1299 *it = *it < 0 ? *it : *it - HalfSize;
1300 }
1301 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1302 *it = *it < 0 ? *it : *it + HalfSize;
1303 }
1304 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1305 V1 = DAG.getBitcast(MVT::v4i64, V1);
1306 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1307 DAG.getConstant(0b11101110, DL, MVT::i64));
1308 V1 = DAG.getBitcast(VT, V1);
1309
1310 if (!V2.isUndef()) {
1311 V2 = DAG.getBitcast(MVT::v4i64, V2);
1312 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1313 DAG.getConstant(0b11101110, DL, MVT::i64));
1314 V2 = DAG.getBitcast(VT, V2);
1315 }
1316
1317 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1318 *it = *it < 0 ? *it : *it - HalfSize;
1319 }
1320 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1321 V1 = DAG.getBitcast(MVT::v4i64, V1);
1322 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1323 DAG.getConstant(0b01000100, DL, MVT::i64));
1324 V1 = DAG.getBitcast(VT, V1);
1325
1326 if (!V2.isUndef()) {
1327 V2 = DAG.getBitcast(MVT::v4i64, V2);
1328 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1329 DAG.getConstant(0b01000100, DL, MVT::i64));
1330 V2 = DAG.getBitcast(VT, V2);
1331 }
1332
1333 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1334 *it = *it < 0 ? *it : *it + HalfSize;
1335 }
1336 } else { // cross-lane
1337 return;
1338 }
1339}
1340
1341/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1342///
1343/// This routine breaks down the specific type of 256-bit shuffle and
1344/// dispatches to the lowering routines accordingly.
1346 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1347 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1348 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1349 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1350 "Vector type is unsupported for lasx!");
1351 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1352 "Two operands have different types!");
1353 assert(VT.getVectorNumElements() == Mask.size() &&
1354 "Unexpected mask size for shuffle!");
1355 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1356 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1357
1358 // canonicalize non cross-lane shuffle vector
1359 SmallVector<int> NewMask(Mask);
1360 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1361
1362 SDValue Result;
1363 // TODO: Add more comparison patterns.
1364 if (V2.isUndef()) {
1365 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1366 return Result;
1367 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1368 return Result;
1369
1370 // TODO: This comment may be enabled in the future to better match the
1371 // pattern for instruction selection.
1372 /* V2 = V1; */
1373 }
1374
1375 // It is recommended not to change the pattern comparison order for better
1376 // performance.
1377 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1378 return Result;
1379 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1380 return Result;
1381 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1382 return Result;
1383 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1384 return Result;
1385 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1386 return Result;
1387 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1388 return Result;
1389 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1390 return Result;
1391
1392 return SDValue();
1393}
1394
1395SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1396 SelectionDAG &DAG) const {
1397 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1398 ArrayRef<int> OrigMask = SVOp->getMask();
1399 SDValue V1 = Op.getOperand(0);
1400 SDValue V2 = Op.getOperand(1);
1401 MVT VT = Op.getSimpleValueType();
1402 int NumElements = VT.getVectorNumElements();
1403 SDLoc DL(Op);
1404
1405 bool V1IsUndef = V1.isUndef();
1406 bool V2IsUndef = V2.isUndef();
1407 if (V1IsUndef && V2IsUndef)
1408 return DAG.getUNDEF(VT);
1409
1410 // When we create a shuffle node we put the UNDEF node to second operand,
1411 // but in some cases the first operand may be transformed to UNDEF.
1412 // In this case we should just commute the node.
1413 if (V1IsUndef)
1414 return DAG.getCommutedVectorShuffle(*SVOp);
1415
1416 // Check for non-undef masks pointing at an undef vector and make the masks
1417 // undef as well. This makes it easier to match the shuffle based solely on
1418 // the mask.
1419 if (V2IsUndef &&
1420 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1421 SmallVector<int, 8> NewMask(OrigMask);
1422 for (int &M : NewMask)
1423 if (M >= NumElements)
1424 M = -1;
1425 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1426 }
1427
1428 // Check for illegal shuffle mask element index values.
1429 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1430 (void)MaskUpperLimit;
1431 assert(llvm::all_of(OrigMask,
1432 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1433 "Out of bounds shuffle index");
1434
1435 // For each vector width, delegate to a specialized lowering routine.
1436 if (VT.is128BitVector())
1437 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1438
1439 if (VT.is256BitVector())
1440 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1441
1442 return SDValue();
1443}
1444
1445static bool isConstantOrUndef(const SDValue Op) {
1446 if (Op->isUndef())
1447 return true;
1448 if (isa<ConstantSDNode>(Op))
1449 return true;
1450 if (isa<ConstantFPSDNode>(Op))
1451 return true;
1452 return false;
1453}
1454
1456 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1457 if (isConstantOrUndef(Op->getOperand(i)))
1458 return true;
1459 return false;
1460}
1461
1462SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1463 SelectionDAG &DAG) const {
1464 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1465 EVT ResTy = Op->getValueType(0);
1466 SDLoc DL(Op);
1467 APInt SplatValue, SplatUndef;
1468 unsigned SplatBitSize;
1469 bool HasAnyUndefs;
1470 bool Is128Vec = ResTy.is128BitVector();
1471 bool Is256Vec = ResTy.is256BitVector();
1472
1473 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1474 (!Subtarget.hasExtLASX() || !Is256Vec))
1475 return SDValue();
1476
1477 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1478 /*MinSplatBits=*/8) &&
1479 SplatBitSize <= 64) {
1480 // We can only cope with 8, 16, 32, or 64-bit elements.
1481 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1482 SplatBitSize != 64)
1483 return SDValue();
1484
1485 EVT ViaVecTy;
1486
1487 switch (SplatBitSize) {
1488 default:
1489 return SDValue();
1490 case 8:
1491 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1492 break;
1493 case 16:
1494 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1495 break;
1496 case 32:
1497 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1498 break;
1499 case 64:
1500 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1501 break;
1502 }
1503
1504 // SelectionDAG::getConstant will promote SplatValue appropriately.
1505 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1506
1507 // Bitcast to the type we originally wanted.
1508 if (ViaVecTy != ResTy)
1509 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1510
1511 return Result;
1512 }
1513
1514 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1515 return Op;
1516
1518 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1519 // The resulting code is the same length as the expansion, but it doesn't
1520 // use memory operations.
1521 EVT ResTy = Node->getValueType(0);
1522
1523 assert(ResTy.isVector());
1524
1525 unsigned NumElts = ResTy.getVectorNumElements();
1526 SDValue Vector = DAG.getUNDEF(ResTy);
1527 for (unsigned i = 0; i < NumElts; ++i) {
1529 Node->getOperand(i),
1530 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1531 }
1532 return Vector;
1533 }
1534
1535 return SDValue();
1536}
1537
1538SDValue
1539LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1540 SelectionDAG &DAG) const {
1541 EVT VecTy = Op->getOperand(0)->getValueType(0);
1542 SDValue Idx = Op->getOperand(1);
1543 EVT EltTy = VecTy.getVectorElementType();
1544 unsigned NumElts = VecTy.getVectorNumElements();
1545
1546 if (isa<ConstantSDNode>(Idx) &&
1547 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1548 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1549 return Op;
1550
1551 return SDValue();
1552}
1553
1554SDValue
1555LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1556 SelectionDAG &DAG) const {
1557 if (isa<ConstantSDNode>(Op->getOperand(2)))
1558 return Op;
1559 return SDValue();
1560}
1561
1562SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1563 SelectionDAG &DAG) const {
1564 SDLoc DL(Op);
1565 SyncScope::ID FenceSSID =
1566 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1567
1568 // singlethread fences only synchronize with signal handlers on the same
1569 // thread and thus only need to preserve instruction order, not actually
1570 // enforce memory ordering.
1571 if (FenceSSID == SyncScope::SingleThread)
1572 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1573 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1574
1575 return Op;
1576}
1577
1578SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1579 SelectionDAG &DAG) const {
1580
1581 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1582 DAG.getContext()->emitError(
1583 "On LA64, only 64-bit registers can be written.");
1584 return Op.getOperand(0);
1585 }
1586
1587 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1588 DAG.getContext()->emitError(
1589 "On LA32, only 32-bit registers can be written.");
1590 return Op.getOperand(0);
1591 }
1592
1593 return Op;
1594}
1595
1596SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1597 SelectionDAG &DAG) const {
1598 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1599 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1600 "be a constant integer");
1601 return SDValue();
1602 }
1603
1606 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1607 EVT VT = Op.getValueType();
1608 SDLoc DL(Op);
1609 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1610 unsigned Depth = Op.getConstantOperandVal(0);
1611 int GRLenInBytes = Subtarget.getGRLen() / 8;
1612
1613 while (Depth--) {
1614 int Offset = -(GRLenInBytes * 2);
1615 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1616 DAG.getSignedConstant(Offset, DL, VT));
1617 FrameAddr =
1618 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1619 }
1620 return FrameAddr;
1621}
1622
1623SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1624 SelectionDAG &DAG) const {
1626 return SDValue();
1627
1628 // Currently only support lowering return address for current frame.
1629 if (Op.getConstantOperandVal(0) != 0) {
1630 DAG.getContext()->emitError(
1631 "return address can only be determined for the current frame");
1632 return SDValue();
1633 }
1634
1637 MVT GRLenVT = Subtarget.getGRLenVT();
1638
1639 // Return the value of the return address register, marking it an implicit
1640 // live-in.
1641 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1642 getRegClassFor(GRLenVT));
1643 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1644}
1645
1646SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1647 SelectionDAG &DAG) const {
1649 auto Size = Subtarget.getGRLen() / 8;
1650 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1651 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1652}
1653
1654SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1655 SelectionDAG &DAG) const {
1657 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1658
1659 SDLoc DL(Op);
1660 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1662
1663 // vastart just stores the address of the VarArgsFrameIndex slot into the
1664 // memory location argument.
1665 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1666 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1667 MachinePointerInfo(SV));
1668}
1669
1670SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1671 SelectionDAG &DAG) const {
1672 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1673 !Subtarget.hasBasicD() && "unexpected target features");
1674
1675 SDLoc DL(Op);
1676 SDValue Op0 = Op.getOperand(0);
1677 if (Op0->getOpcode() == ISD::AND) {
1678 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1679 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1680 return Op;
1681 }
1682
1683 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1684 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1685 Op0.getConstantOperandVal(2) == UINT64_C(0))
1686 return Op;
1687
1688 if (Op0.getOpcode() == ISD::AssertZext &&
1689 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1690 return Op;
1691
1692 EVT OpVT = Op0.getValueType();
1693 EVT RetVT = Op.getValueType();
1694 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1695 MakeLibCallOptions CallOptions;
1696 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1697 SDValue Chain = SDValue();
1699 std::tie(Result, Chain) =
1700 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1701 return Result;
1702}
1703
1704SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1705 SelectionDAG &DAG) const {
1706 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1707 !Subtarget.hasBasicD() && "unexpected target features");
1708
1709 SDLoc DL(Op);
1710 SDValue Op0 = Op.getOperand(0);
1711
1712 if ((Op0.getOpcode() == ISD::AssertSext ||
1714 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1715 return Op;
1716
1717 EVT OpVT = Op0.getValueType();
1718 EVT RetVT = Op.getValueType();
1719 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1720 MakeLibCallOptions CallOptions;
1721 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1722 SDValue Chain = SDValue();
1724 std::tie(Result, Chain) =
1725 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1726 return Result;
1727}
1728
1729SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1730 SelectionDAG &DAG) const {
1731
1732 SDLoc DL(Op);
1733 SDValue Op0 = Op.getOperand(0);
1734
1735 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1736 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1737 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1738 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1739 }
1740 return Op;
1741}
1742
1743SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1744 SelectionDAG &DAG) const {
1745
1746 SDLoc DL(Op);
1747 SDValue Op0 = Op.getOperand(0);
1748
1749 if (Op0.getValueType() == MVT::f16)
1750 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1751
1752 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1753 !Subtarget.hasBasicD()) {
1754 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1755 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1756 }
1757
1758 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1759 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1760 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1761}
1762
1764 SelectionDAG &DAG, unsigned Flags) {
1765 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1766}
1767
1769 SelectionDAG &DAG, unsigned Flags) {
1770 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1771 Flags);
1772}
1773
1775 SelectionDAG &DAG, unsigned Flags) {
1776 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1777 N->getOffset(), Flags);
1778}
1779
1781 SelectionDAG &DAG, unsigned Flags) {
1782 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1783}
1784
1785template <class NodeTy>
1786SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1788 bool IsLocal) const {
1789 SDLoc DL(N);
1790 EVT Ty = getPointerTy(DAG.getDataLayout());
1791 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1792 SDValue Load;
1793
1794 switch (M) {
1795 default:
1796 report_fatal_error("Unsupported code model");
1797
1798 case CodeModel::Large: {
1799 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1800
1801 // This is not actually used, but is necessary for successfully matching
1802 // the PseudoLA_*_LARGE nodes.
1803 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1804 if (IsLocal) {
1805 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1806 // eventually becomes the desired 5-insn code sequence.
1807 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1808 Tmp, Addr),
1809 0);
1810 } else {
1811 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1812 // eventually becomes the desired 5-insn code sequence.
1813 Load = SDValue(
1814 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1815 0);
1816 }
1817 break;
1818 }
1819
1820 case CodeModel::Small:
1821 case CodeModel::Medium:
1822 if (IsLocal) {
1823 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1824 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1825 Load = SDValue(
1826 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1827 } else {
1828 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1829 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1830 Load =
1831 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1832 }
1833 }
1834
1835 if (!IsLocal) {
1836 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1842 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1843 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1844 }
1845
1846 return Load;
1847}
1848
1849SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1850 SelectionDAG &DAG) const {
1851 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1852 DAG.getTarget().getCodeModel());
1853}
1854
1855SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1856 SelectionDAG &DAG) const {
1857 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1858 DAG.getTarget().getCodeModel());
1859}
1860
1861SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1862 SelectionDAG &DAG) const {
1863 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1864 DAG.getTarget().getCodeModel());
1865}
1866
1867SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1868 SelectionDAG &DAG) const {
1869 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1870 assert(N->getOffset() == 0 && "unexpected offset in global node");
1871 auto CM = DAG.getTarget().getCodeModel();
1872 const GlobalValue *GV = N->getGlobal();
1873
1874 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1875 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1876 CM = *GCM;
1877 }
1878
1879 return getAddr(N, DAG, CM, GV->isDSOLocal());
1880}
1881
1882SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1883 SelectionDAG &DAG,
1884 unsigned Opc, bool UseGOT,
1885 bool Large) const {
1886 SDLoc DL(N);
1887 EVT Ty = getPointerTy(DAG.getDataLayout());
1888 MVT GRLenVT = Subtarget.getGRLenVT();
1889
1890 // This is not actually used, but is necessary for successfully matching the
1891 // PseudoLA_*_LARGE nodes.
1892 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1893 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1894
1895 // Only IE needs an extra argument for large code model.
1896 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
1897 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1898 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1899
1900 // If it is LE for normal/medium code model, the add tp operation will occur
1901 // during the pseudo-instruction expansion.
1902 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
1903 return Offset;
1904
1905 if (UseGOT) {
1906 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1912 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1913 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1914 }
1915
1916 // Add the thread pointer.
1917 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1918 DAG.getRegister(LoongArch::R2, GRLenVT));
1919}
1920
1921SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1922 SelectionDAG &DAG,
1923 unsigned Opc,
1924 bool Large) const {
1925 SDLoc DL(N);
1926 EVT Ty = getPointerTy(DAG.getDataLayout());
1927 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1928
1929 // This is not actually used, but is necessary for successfully matching the
1930 // PseudoLA_*_LARGE nodes.
1931 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1932
1933 // Use a PC-relative addressing mode to access the dynamic GOT address.
1934 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1935 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1936 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1937
1938 // Prepare argument list to generate call.
1940 ArgListEntry Entry;
1941 Entry.Node = Load;
1942 Entry.Ty = CallTy;
1943 Args.push_back(Entry);
1944
1945 // Setup call to __tls_get_addr.
1947 CLI.setDebugLoc(DL)
1948 .setChain(DAG.getEntryNode())
1949 .setLibCallee(CallingConv::C, CallTy,
1950 DAG.getExternalSymbol("__tls_get_addr", Ty),
1951 std::move(Args));
1952
1953 return LowerCallTo(CLI).first;
1954}
1955
1956SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1957 SelectionDAG &DAG, unsigned Opc,
1958 bool Large) const {
1959 SDLoc DL(N);
1960 EVT Ty = getPointerTy(DAG.getDataLayout());
1961 const GlobalValue *GV = N->getGlobal();
1962
1963 // This is not actually used, but is necessary for successfully matching the
1964 // PseudoLA_*_LARGE nodes.
1965 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1966
1967 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1968 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1969 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1970 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1971 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1972}
1973
1974SDValue
1975LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1976 SelectionDAG &DAG) const {
1979 report_fatal_error("In GHC calling convention TLS is not supported");
1980
1981 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1982 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1983
1984 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1985 assert(N->getOffset() == 0 && "unexpected offset in global node");
1986
1987 if (DAG.getTarget().useEmulatedTLS())
1988 report_fatal_error("the emulated TLS is prohibited",
1989 /*GenCrashDiag=*/false);
1990
1991 bool IsDesc = DAG.getTarget().useTLSDESC();
1992
1993 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1995 // In this model, application code calls the dynamic linker function
1996 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1997 // runtime.
1998 if (!IsDesc)
1999 return getDynamicTLSAddr(N, DAG,
2000 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
2001 : LoongArch::PseudoLA_TLS_GD,
2002 Large);
2003 break;
2005 // Same as GeneralDynamic, except for assembly modifiers and relocation
2006 // records.
2007 if (!IsDesc)
2008 return getDynamicTLSAddr(N, DAG,
2009 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
2010 : LoongArch::PseudoLA_TLS_LD,
2011 Large);
2012 break;
2014 // This model uses the GOT to resolve TLS offsets.
2015 return getStaticTLSAddr(N, DAG,
2016 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
2017 : LoongArch::PseudoLA_TLS_IE,
2018 /*UseGOT=*/true, Large);
2020 // This model is used when static linking as the TLS offsets are resolved
2021 // during program linking.
2022 //
2023 // This node doesn't need an extra argument for the large code model.
2024 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
2025 /*UseGOT=*/false, Large);
2026 }
2027
2028 return getTLSDescAddr(N, DAG,
2029 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
2030 : LoongArch::PseudoLA_TLS_DESC,
2031 Large);
2032}
2033
2034template <unsigned N>
2036 SelectionDAG &DAG, bool IsSigned = false) {
2037 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2038 // Check the ImmArg.
2039 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2040 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2041 DAG.getContext()->emitError(Op->getOperationName(0) +
2042 ": argument out of range.");
2043 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2044 }
2045 return SDValue();
2046}
2047
2048SDValue
2049LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2050 SelectionDAG &DAG) const {
2051 SDLoc DL(Op);
2052 switch (Op.getConstantOperandVal(0)) {
2053 default:
2054 return SDValue(); // Don't custom lower most intrinsics.
2055 case Intrinsic::thread_pointer: {
2056 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2057 return DAG.getRegister(LoongArch::R2, PtrVT);
2058 }
2059 case Intrinsic::loongarch_lsx_vpickve2gr_d:
2060 case Intrinsic::loongarch_lsx_vpickve2gr_du:
2061 case Intrinsic::loongarch_lsx_vreplvei_d:
2062 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2063 return checkIntrinsicImmArg<1>(Op, 2, DAG);
2064 case Intrinsic::loongarch_lsx_vreplvei_w:
2065 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2066 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2067 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2068 case Intrinsic::loongarch_lasx_xvpickve_d:
2069 case Intrinsic::loongarch_lasx_xvpickve_d_f:
2070 return checkIntrinsicImmArg<2>(Op, 2, DAG);
2071 case Intrinsic::loongarch_lasx_xvinsve0_d:
2072 return checkIntrinsicImmArg<2>(Op, 3, DAG);
2073 case Intrinsic::loongarch_lsx_vsat_b:
2074 case Intrinsic::loongarch_lsx_vsat_bu:
2075 case Intrinsic::loongarch_lsx_vrotri_b:
2076 case Intrinsic::loongarch_lsx_vsllwil_h_b:
2077 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2078 case Intrinsic::loongarch_lsx_vsrlri_b:
2079 case Intrinsic::loongarch_lsx_vsrari_b:
2080 case Intrinsic::loongarch_lsx_vreplvei_h:
2081 case Intrinsic::loongarch_lasx_xvsat_b:
2082 case Intrinsic::loongarch_lasx_xvsat_bu:
2083 case Intrinsic::loongarch_lasx_xvrotri_b:
2084 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2085 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2086 case Intrinsic::loongarch_lasx_xvsrlri_b:
2087 case Intrinsic::loongarch_lasx_xvsrari_b:
2088 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2089 case Intrinsic::loongarch_lasx_xvpickve_w:
2090 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2091 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2092 case Intrinsic::loongarch_lasx_xvinsve0_w:
2093 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2094 case Intrinsic::loongarch_lsx_vsat_h:
2095 case Intrinsic::loongarch_lsx_vsat_hu:
2096 case Intrinsic::loongarch_lsx_vrotri_h:
2097 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2098 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2099 case Intrinsic::loongarch_lsx_vsrlri_h:
2100 case Intrinsic::loongarch_lsx_vsrari_h:
2101 case Intrinsic::loongarch_lsx_vreplvei_b:
2102 case Intrinsic::loongarch_lasx_xvsat_h:
2103 case Intrinsic::loongarch_lasx_xvsat_hu:
2104 case Intrinsic::loongarch_lasx_xvrotri_h:
2105 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2106 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2107 case Intrinsic::loongarch_lasx_xvsrlri_h:
2108 case Intrinsic::loongarch_lasx_xvsrari_h:
2109 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2110 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2111 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2112 case Intrinsic::loongarch_lsx_vsrani_b_h:
2113 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2114 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2115 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2116 case Intrinsic::loongarch_lsx_vssrani_b_h:
2117 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2118 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2119 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2120 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2121 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2122 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2123 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2124 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2125 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2126 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2127 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2128 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2129 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2130 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2131 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2132 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2133 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2134 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2135 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2136 case Intrinsic::loongarch_lsx_vsat_w:
2137 case Intrinsic::loongarch_lsx_vsat_wu:
2138 case Intrinsic::loongarch_lsx_vrotri_w:
2139 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2140 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2141 case Intrinsic::loongarch_lsx_vsrlri_w:
2142 case Intrinsic::loongarch_lsx_vsrari_w:
2143 case Intrinsic::loongarch_lsx_vslei_bu:
2144 case Intrinsic::loongarch_lsx_vslei_hu:
2145 case Intrinsic::loongarch_lsx_vslei_wu:
2146 case Intrinsic::loongarch_lsx_vslei_du:
2147 case Intrinsic::loongarch_lsx_vslti_bu:
2148 case Intrinsic::loongarch_lsx_vslti_hu:
2149 case Intrinsic::loongarch_lsx_vslti_wu:
2150 case Intrinsic::loongarch_lsx_vslti_du:
2151 case Intrinsic::loongarch_lsx_vbsll_v:
2152 case Intrinsic::loongarch_lsx_vbsrl_v:
2153 case Intrinsic::loongarch_lasx_xvsat_w:
2154 case Intrinsic::loongarch_lasx_xvsat_wu:
2155 case Intrinsic::loongarch_lasx_xvrotri_w:
2156 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2157 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2158 case Intrinsic::loongarch_lasx_xvsrlri_w:
2159 case Intrinsic::loongarch_lasx_xvsrari_w:
2160 case Intrinsic::loongarch_lasx_xvslei_bu:
2161 case Intrinsic::loongarch_lasx_xvslei_hu:
2162 case Intrinsic::loongarch_lasx_xvslei_wu:
2163 case Intrinsic::loongarch_lasx_xvslei_du:
2164 case Intrinsic::loongarch_lasx_xvslti_bu:
2165 case Intrinsic::loongarch_lasx_xvslti_hu:
2166 case Intrinsic::loongarch_lasx_xvslti_wu:
2167 case Intrinsic::loongarch_lasx_xvslti_du:
2168 case Intrinsic::loongarch_lasx_xvbsll_v:
2169 case Intrinsic::loongarch_lasx_xvbsrl_v:
2170 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2171 case Intrinsic::loongarch_lsx_vseqi_b:
2172 case Intrinsic::loongarch_lsx_vseqi_h:
2173 case Intrinsic::loongarch_lsx_vseqi_w:
2174 case Intrinsic::loongarch_lsx_vseqi_d:
2175 case Intrinsic::loongarch_lsx_vslei_b:
2176 case Intrinsic::loongarch_lsx_vslei_h:
2177 case Intrinsic::loongarch_lsx_vslei_w:
2178 case Intrinsic::loongarch_lsx_vslei_d:
2179 case Intrinsic::loongarch_lsx_vslti_b:
2180 case Intrinsic::loongarch_lsx_vslti_h:
2181 case Intrinsic::loongarch_lsx_vslti_w:
2182 case Intrinsic::loongarch_lsx_vslti_d:
2183 case Intrinsic::loongarch_lasx_xvseqi_b:
2184 case Intrinsic::loongarch_lasx_xvseqi_h:
2185 case Intrinsic::loongarch_lasx_xvseqi_w:
2186 case Intrinsic::loongarch_lasx_xvseqi_d:
2187 case Intrinsic::loongarch_lasx_xvslei_b:
2188 case Intrinsic::loongarch_lasx_xvslei_h:
2189 case Intrinsic::loongarch_lasx_xvslei_w:
2190 case Intrinsic::loongarch_lasx_xvslei_d:
2191 case Intrinsic::loongarch_lasx_xvslti_b:
2192 case Intrinsic::loongarch_lasx_xvslti_h:
2193 case Intrinsic::loongarch_lasx_xvslti_w:
2194 case Intrinsic::loongarch_lasx_xvslti_d:
2195 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2196 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2197 case Intrinsic::loongarch_lsx_vsrani_h_w:
2198 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2199 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2200 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2201 case Intrinsic::loongarch_lsx_vssrani_h_w:
2202 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2203 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2204 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2205 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2206 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2207 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2208 case Intrinsic::loongarch_lsx_vfrstpi_b:
2209 case Intrinsic::loongarch_lsx_vfrstpi_h:
2210 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2211 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2212 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2213 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2214 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2215 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2216 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2217 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2218 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2219 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2220 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2221 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2222 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2223 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2224 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2225 case Intrinsic::loongarch_lsx_vsat_d:
2226 case Intrinsic::loongarch_lsx_vsat_du:
2227 case Intrinsic::loongarch_lsx_vrotri_d:
2228 case Intrinsic::loongarch_lsx_vsrlri_d:
2229 case Intrinsic::loongarch_lsx_vsrari_d:
2230 case Intrinsic::loongarch_lasx_xvsat_d:
2231 case Intrinsic::loongarch_lasx_xvsat_du:
2232 case Intrinsic::loongarch_lasx_xvrotri_d:
2233 case Intrinsic::loongarch_lasx_xvsrlri_d:
2234 case Intrinsic::loongarch_lasx_xvsrari_d:
2235 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2236 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2237 case Intrinsic::loongarch_lsx_vsrani_w_d:
2238 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2239 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2240 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2241 case Intrinsic::loongarch_lsx_vssrani_w_d:
2242 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2243 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2244 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2245 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2246 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2247 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2248 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2249 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2250 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2251 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2252 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2253 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2254 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2255 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2256 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2257 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2258 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2259 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2260 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2261 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2262 case Intrinsic::loongarch_lsx_vsrani_d_q:
2263 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2264 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2265 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2266 case Intrinsic::loongarch_lsx_vssrani_d_q:
2267 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2268 case Intrinsic::loongarch_lsx_vssrani_du_q:
2269 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2270 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2271 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2272 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2273 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2274 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2275 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2276 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2277 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2278 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2279 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2280 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2281 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2282 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2283 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2284 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2285 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2286 case Intrinsic::loongarch_lsx_vnori_b:
2287 case Intrinsic::loongarch_lsx_vshuf4i_b:
2288 case Intrinsic::loongarch_lsx_vshuf4i_h:
2289 case Intrinsic::loongarch_lsx_vshuf4i_w:
2290 case Intrinsic::loongarch_lasx_xvnori_b:
2291 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2292 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2293 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2294 case Intrinsic::loongarch_lasx_xvpermi_d:
2295 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2296 case Intrinsic::loongarch_lsx_vshuf4i_d:
2297 case Intrinsic::loongarch_lsx_vpermi_w:
2298 case Intrinsic::loongarch_lsx_vbitseli_b:
2299 case Intrinsic::loongarch_lsx_vextrins_b:
2300 case Intrinsic::loongarch_lsx_vextrins_h:
2301 case Intrinsic::loongarch_lsx_vextrins_w:
2302 case Intrinsic::loongarch_lsx_vextrins_d:
2303 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2304 case Intrinsic::loongarch_lasx_xvpermi_w:
2305 case Intrinsic::loongarch_lasx_xvpermi_q:
2306 case Intrinsic::loongarch_lasx_xvbitseli_b:
2307 case Intrinsic::loongarch_lasx_xvextrins_b:
2308 case Intrinsic::loongarch_lasx_xvextrins_h:
2309 case Intrinsic::loongarch_lasx_xvextrins_w:
2310 case Intrinsic::loongarch_lasx_xvextrins_d:
2311 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2312 case Intrinsic::loongarch_lsx_vrepli_b:
2313 case Intrinsic::loongarch_lsx_vrepli_h:
2314 case Intrinsic::loongarch_lsx_vrepli_w:
2315 case Intrinsic::loongarch_lsx_vrepli_d:
2316 case Intrinsic::loongarch_lasx_xvrepli_b:
2317 case Intrinsic::loongarch_lasx_xvrepli_h:
2318 case Intrinsic::loongarch_lasx_xvrepli_w:
2319 case Intrinsic::loongarch_lasx_xvrepli_d:
2320 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2321 case Intrinsic::loongarch_lsx_vldi:
2322 case Intrinsic::loongarch_lasx_xvldi:
2323 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2324 }
2325}
2326
2327// Helper function that emits error message for intrinsics with chain and return
2328// merge values of a UNDEF and the chain.
2330 StringRef ErrorMsg,
2331 SelectionDAG &DAG) {
2332 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2333 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2334 SDLoc(Op));
2335}
2336
2337SDValue
2338LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2339 SelectionDAG &DAG) const {
2340 SDLoc DL(Op);
2341 MVT GRLenVT = Subtarget.getGRLenVT();
2342 EVT VT = Op.getValueType();
2343 SDValue Chain = Op.getOperand(0);
2344 const StringRef ErrorMsgOOR = "argument out of range";
2345 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2346 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2347
2348 switch (Op.getConstantOperandVal(1)) {
2349 default:
2350 return Op;
2351 case Intrinsic::loongarch_crc_w_b_w:
2352 case Intrinsic::loongarch_crc_w_h_w:
2353 case Intrinsic::loongarch_crc_w_w_w:
2354 case Intrinsic::loongarch_crc_w_d_w:
2355 case Intrinsic::loongarch_crcc_w_b_w:
2356 case Intrinsic::loongarch_crcc_w_h_w:
2357 case Intrinsic::loongarch_crcc_w_w_w:
2358 case Intrinsic::loongarch_crcc_w_d_w:
2359 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2360 case Intrinsic::loongarch_csrrd_w:
2361 case Intrinsic::loongarch_csrrd_d: {
2362 unsigned Imm = Op.getConstantOperandVal(2);
2363 return !isUInt<14>(Imm)
2364 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2365 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2366 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2367 }
2368 case Intrinsic::loongarch_csrwr_w:
2369 case Intrinsic::loongarch_csrwr_d: {
2370 unsigned Imm = Op.getConstantOperandVal(3);
2371 return !isUInt<14>(Imm)
2372 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2373 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2374 {Chain, Op.getOperand(2),
2375 DAG.getConstant(Imm, DL, GRLenVT)});
2376 }
2377 case Intrinsic::loongarch_csrxchg_w:
2378 case Intrinsic::loongarch_csrxchg_d: {
2379 unsigned Imm = Op.getConstantOperandVal(4);
2380 return !isUInt<14>(Imm)
2381 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2382 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2383 {Chain, Op.getOperand(2), Op.getOperand(3),
2384 DAG.getConstant(Imm, DL, GRLenVT)});
2385 }
2386 case Intrinsic::loongarch_iocsrrd_d: {
2387 return DAG.getNode(
2388 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2389 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2390 }
2391#define IOCSRRD_CASE(NAME, NODE) \
2392 case Intrinsic::loongarch_##NAME: { \
2393 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2394 {Chain, Op.getOperand(2)}); \
2395 }
2396 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2397 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2398 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2399#undef IOCSRRD_CASE
2400 case Intrinsic::loongarch_cpucfg: {
2401 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2402 {Chain, Op.getOperand(2)});
2403 }
2404 case Intrinsic::loongarch_lddir_d: {
2405 unsigned Imm = Op.getConstantOperandVal(3);
2406 return !isUInt<8>(Imm)
2407 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2408 : Op;
2409 }
2410 case Intrinsic::loongarch_movfcsr2gr: {
2411 if (!Subtarget.hasBasicF())
2412 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2413 unsigned Imm = Op.getConstantOperandVal(2);
2414 return !isUInt<2>(Imm)
2415 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2416 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2417 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2418 }
2419 case Intrinsic::loongarch_lsx_vld:
2420 case Intrinsic::loongarch_lsx_vldrepl_b:
2421 case Intrinsic::loongarch_lasx_xvld:
2422 case Intrinsic::loongarch_lasx_xvldrepl_b:
2423 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2424 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2425 : SDValue();
2426 case Intrinsic::loongarch_lsx_vldrepl_h:
2427 case Intrinsic::loongarch_lasx_xvldrepl_h:
2428 return !isShiftedInt<11, 1>(
2429 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2431 Op, "argument out of range or not a multiple of 2", DAG)
2432 : SDValue();
2433 case Intrinsic::loongarch_lsx_vldrepl_w:
2434 case Intrinsic::loongarch_lasx_xvldrepl_w:
2435 return !isShiftedInt<10, 2>(
2436 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2438 Op, "argument out of range or not a multiple of 4", DAG)
2439 : SDValue();
2440 case Intrinsic::loongarch_lsx_vldrepl_d:
2441 case Intrinsic::loongarch_lasx_xvldrepl_d:
2442 return !isShiftedInt<9, 3>(
2443 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2445 Op, "argument out of range or not a multiple of 8", DAG)
2446 : SDValue();
2447 }
2448}
2449
2450// Helper function that emits error message for intrinsics with void return
2451// value and return the chain.
2453 SelectionDAG &DAG) {
2454
2455 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2456 return Op.getOperand(0);
2457}
2458
2459SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2460 SelectionDAG &DAG) const {
2461 SDLoc DL(Op);
2462 MVT GRLenVT = Subtarget.getGRLenVT();
2463 SDValue Chain = Op.getOperand(0);
2464 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2465 SDValue Op2 = Op.getOperand(2);
2466 const StringRef ErrorMsgOOR = "argument out of range";
2467 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2468 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2469 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2470
2471 switch (IntrinsicEnum) {
2472 default:
2473 // TODO: Add more Intrinsics.
2474 return SDValue();
2475 case Intrinsic::loongarch_cacop_d:
2476 case Intrinsic::loongarch_cacop_w: {
2477 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2478 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2479 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2480 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2481 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2482 unsigned Imm1 = Op2->getAsZExtVal();
2483 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2484 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2485 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2486 return Op;
2487 }
2488 case Intrinsic::loongarch_dbar: {
2489 unsigned Imm = Op2->getAsZExtVal();
2490 return !isUInt<15>(Imm)
2491 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2492 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2493 DAG.getConstant(Imm, DL, GRLenVT));
2494 }
2495 case Intrinsic::loongarch_ibar: {
2496 unsigned Imm = Op2->getAsZExtVal();
2497 return !isUInt<15>(Imm)
2498 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2499 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2500 DAG.getConstant(Imm, DL, GRLenVT));
2501 }
2502 case Intrinsic::loongarch_break: {
2503 unsigned Imm = Op2->getAsZExtVal();
2504 return !isUInt<15>(Imm)
2505 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2506 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2507 DAG.getConstant(Imm, DL, GRLenVT));
2508 }
2509 case Intrinsic::loongarch_movgr2fcsr: {
2510 if (!Subtarget.hasBasicF())
2511 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2512 unsigned Imm = Op2->getAsZExtVal();
2513 return !isUInt<2>(Imm)
2514 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2515 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2516 DAG.getConstant(Imm, DL, GRLenVT),
2517 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2518 Op.getOperand(3)));
2519 }
2520 case Intrinsic::loongarch_syscall: {
2521 unsigned Imm = Op2->getAsZExtVal();
2522 return !isUInt<15>(Imm)
2523 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2524 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2525 DAG.getConstant(Imm, DL, GRLenVT));
2526 }
2527#define IOCSRWR_CASE(NAME, NODE) \
2528 case Intrinsic::loongarch_##NAME: { \
2529 SDValue Op3 = Op.getOperand(3); \
2530 return Subtarget.is64Bit() \
2531 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2532 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2533 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2534 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2535 Op3); \
2536 }
2537 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2538 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2539 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2540#undef IOCSRWR_CASE
2541 case Intrinsic::loongarch_iocsrwr_d: {
2542 return !Subtarget.is64Bit()
2543 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2544 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2545 Op2,
2546 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2547 Op.getOperand(3)));
2548 }
2549#define ASRT_LE_GT_CASE(NAME) \
2550 case Intrinsic::loongarch_##NAME: { \
2551 return !Subtarget.is64Bit() \
2552 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2553 : Op; \
2554 }
2555 ASRT_LE_GT_CASE(asrtle_d)
2556 ASRT_LE_GT_CASE(asrtgt_d)
2557#undef ASRT_LE_GT_CASE
2558 case Intrinsic::loongarch_ldpte_d: {
2559 unsigned Imm = Op.getConstantOperandVal(3);
2560 return !Subtarget.is64Bit()
2561 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2562 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2563 : Op;
2564 }
2565 case Intrinsic::loongarch_lsx_vst:
2566 case Intrinsic::loongarch_lasx_xvst:
2567 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2568 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2569 : SDValue();
2570 case Intrinsic::loongarch_lasx_xvstelm_b:
2571 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2572 !isUInt<5>(Op.getConstantOperandVal(5)))
2573 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2574 : SDValue();
2575 case Intrinsic::loongarch_lsx_vstelm_b:
2576 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2577 !isUInt<4>(Op.getConstantOperandVal(5)))
2578 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2579 : SDValue();
2580 case Intrinsic::loongarch_lasx_xvstelm_h:
2581 return (!isShiftedInt<8, 1>(
2582 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2583 !isUInt<4>(Op.getConstantOperandVal(5)))
2585 Op, "argument out of range or not a multiple of 2", DAG)
2586 : SDValue();
2587 case Intrinsic::loongarch_lsx_vstelm_h:
2588 return (!isShiftedInt<8, 1>(
2589 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2590 !isUInt<3>(Op.getConstantOperandVal(5)))
2592 Op, "argument out of range or not a multiple of 2", DAG)
2593 : SDValue();
2594 case Intrinsic::loongarch_lasx_xvstelm_w:
2595 return (!isShiftedInt<8, 2>(
2596 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2597 !isUInt<3>(Op.getConstantOperandVal(5)))
2599 Op, "argument out of range or not a multiple of 4", DAG)
2600 : SDValue();
2601 case Intrinsic::loongarch_lsx_vstelm_w:
2602 return (!isShiftedInt<8, 2>(
2603 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2604 !isUInt<2>(Op.getConstantOperandVal(5)))
2606 Op, "argument out of range or not a multiple of 4", DAG)
2607 : SDValue();
2608 case Intrinsic::loongarch_lasx_xvstelm_d:
2609 return (!isShiftedInt<8, 3>(
2610 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2611 !isUInt<2>(Op.getConstantOperandVal(5)))
2613 Op, "argument out of range or not a multiple of 8", DAG)
2614 : SDValue();
2615 case Intrinsic::loongarch_lsx_vstelm_d:
2616 return (!isShiftedInt<8, 3>(
2617 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2618 !isUInt<1>(Op.getConstantOperandVal(5)))
2620 Op, "argument out of range or not a multiple of 8", DAG)
2621 : SDValue();
2622 }
2623}
2624
2625SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2626 SelectionDAG &DAG) const {
2627 SDLoc DL(Op);
2628 SDValue Lo = Op.getOperand(0);
2629 SDValue Hi = Op.getOperand(1);
2630 SDValue Shamt = Op.getOperand(2);
2631 EVT VT = Lo.getValueType();
2632
2633 // if Shamt-GRLen < 0: // Shamt < GRLen
2634 // Lo = Lo << Shamt
2635 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2636 // else:
2637 // Lo = 0
2638 // Hi = Lo << (Shamt-GRLen)
2639
2640 SDValue Zero = DAG.getConstant(0, DL, VT);
2641 SDValue One = DAG.getConstant(1, DL, VT);
2642 SDValue MinusGRLen =
2643 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2644 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2645 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2646 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2647
2648 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2649 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2650 SDValue ShiftRightLo =
2651 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2652 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2653 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2654 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2655
2656 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2657
2658 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2659 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2660
2661 SDValue Parts[2] = {Lo, Hi};
2662 return DAG.getMergeValues(Parts, DL);
2663}
2664
2665SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2666 SelectionDAG &DAG,
2667 bool IsSRA) const {
2668 SDLoc DL(Op);
2669 SDValue Lo = Op.getOperand(0);
2670 SDValue Hi = Op.getOperand(1);
2671 SDValue Shamt = Op.getOperand(2);
2672 EVT VT = Lo.getValueType();
2673
2674 // SRA expansion:
2675 // if Shamt-GRLen < 0: // Shamt < GRLen
2676 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2677 // Hi = Hi >>s Shamt
2678 // else:
2679 // Lo = Hi >>s (Shamt-GRLen);
2680 // Hi = Hi >>s (GRLen-1)
2681 //
2682 // SRL expansion:
2683 // if Shamt-GRLen < 0: // Shamt < GRLen
2684 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2685 // Hi = Hi >>u Shamt
2686 // else:
2687 // Lo = Hi >>u (Shamt-GRLen);
2688 // Hi = 0;
2689
2690 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2691
2692 SDValue Zero = DAG.getConstant(0, DL, VT);
2693 SDValue One = DAG.getConstant(1, DL, VT);
2694 SDValue MinusGRLen =
2695 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2696 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2697 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2698 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2699
2700 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2701 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2702 SDValue ShiftLeftHi =
2703 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2704 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2705 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2706 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2707 SDValue HiFalse =
2708 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2709
2710 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2711
2712 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2713 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2714
2715 SDValue Parts[2] = {Lo, Hi};
2716 return DAG.getMergeValues(Parts, DL);
2717}
2718
2719// Returns the opcode of the target-specific SDNode that implements the 32-bit
2720// form of the given Opcode.
2722 switch (Opcode) {
2723 default:
2724 llvm_unreachable("Unexpected opcode");
2725 case ISD::SDIV:
2726 return LoongArchISD::DIV_W;
2727 case ISD::UDIV:
2728 return LoongArchISD::DIV_WU;
2729 case ISD::SREM:
2730 return LoongArchISD::MOD_W;
2731 case ISD::UREM:
2732 return LoongArchISD::MOD_WU;
2733 case ISD::SHL:
2734 return LoongArchISD::SLL_W;
2735 case ISD::SRA:
2736 return LoongArchISD::SRA_W;
2737 case ISD::SRL:
2738 return LoongArchISD::SRL_W;
2739 case ISD::ROTL:
2740 case ISD::ROTR:
2741 return LoongArchISD::ROTR_W;
2742 case ISD::CTTZ:
2743 return LoongArchISD::CTZ_W;
2744 case ISD::CTLZ:
2745 return LoongArchISD::CLZ_W;
2746 }
2747}
2748
2749// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2750// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2751// otherwise be promoted to i64, making it difficult to select the
2752// SLL_W/.../*W later one because the fact the operation was originally of
2753// type i8/i16/i32 is lost.
2755 unsigned ExtOpc = ISD::ANY_EXTEND) {
2756 SDLoc DL(N);
2757 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2758 SDValue NewOp0, NewRes;
2759
2760 switch (NumOp) {
2761 default:
2762 llvm_unreachable("Unexpected NumOp");
2763 case 1: {
2764 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2765 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2766 break;
2767 }
2768 case 2: {
2769 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2770 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2771 if (N->getOpcode() == ISD::ROTL) {
2772 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2773 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2774 }
2775 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2776 break;
2777 }
2778 // TODO:Handle more NumOp.
2779 }
2780
2781 // ReplaceNodeResults requires we maintain the same type for the return
2782 // value.
2783 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2784}
2785
2786// Converts the given 32-bit operation to a i64 operation with signed extension
2787// semantic to reduce the signed extension instructions.
2789 SDLoc DL(N);
2790 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2791 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2792 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2793 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2794 DAG.getValueType(MVT::i32));
2795 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2796}
2797
2798// Helper function that emits error message for intrinsics with/without chain
2799// and return a UNDEF or and the chain as the results.
2802 StringRef ErrorMsg, bool WithChain = true) {
2803 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2804 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2805 if (!WithChain)
2806 return;
2807 Results.push_back(N->getOperand(0));
2808}
2809
2810template <unsigned N>
2811static void
2813 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2814 unsigned ResOp) {
2815 const StringRef ErrorMsgOOR = "argument out of range";
2816 unsigned Imm = Node->getConstantOperandVal(2);
2817 if (!isUInt<N>(Imm)) {
2819 /*WithChain=*/false);
2820 return;
2821 }
2822 SDLoc DL(Node);
2823 SDValue Vec = Node->getOperand(1);
2824
2825 SDValue PickElt =
2826 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2827 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2829 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2830 PickElt.getValue(0)));
2831}
2832
2835 SelectionDAG &DAG,
2836 const LoongArchSubtarget &Subtarget,
2837 unsigned ResOp) {
2838 SDLoc DL(N);
2839 SDValue Vec = N->getOperand(1);
2840
2841 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2842 Results.push_back(
2843 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2844}
2845
2846static void
2848 SelectionDAG &DAG,
2849 const LoongArchSubtarget &Subtarget) {
2850 switch (N->getConstantOperandVal(0)) {
2851 default:
2852 llvm_unreachable("Unexpected Intrinsic.");
2853 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2854 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2856 break;
2857 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2858 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2859 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2861 break;
2862 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2863 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2865 break;
2866 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2867 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2869 break;
2870 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2871 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2872 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2874 break;
2875 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2876 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2878 break;
2879 case Intrinsic::loongarch_lsx_bz_b:
2880 case Intrinsic::loongarch_lsx_bz_h:
2881 case Intrinsic::loongarch_lsx_bz_w:
2882 case Intrinsic::loongarch_lsx_bz_d:
2883 case Intrinsic::loongarch_lasx_xbz_b:
2884 case Intrinsic::loongarch_lasx_xbz_h:
2885 case Intrinsic::loongarch_lasx_xbz_w:
2886 case Intrinsic::loongarch_lasx_xbz_d:
2887 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2889 break;
2890 case Intrinsic::loongarch_lsx_bz_v:
2891 case Intrinsic::loongarch_lasx_xbz_v:
2892 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2894 break;
2895 case Intrinsic::loongarch_lsx_bnz_b:
2896 case Intrinsic::loongarch_lsx_bnz_h:
2897 case Intrinsic::loongarch_lsx_bnz_w:
2898 case Intrinsic::loongarch_lsx_bnz_d:
2899 case Intrinsic::loongarch_lasx_xbnz_b:
2900 case Intrinsic::loongarch_lasx_xbnz_h:
2901 case Intrinsic::loongarch_lasx_xbnz_w:
2902 case Intrinsic::loongarch_lasx_xbnz_d:
2903 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2905 break;
2906 case Intrinsic::loongarch_lsx_bnz_v:
2907 case Intrinsic::loongarch_lasx_xbnz_v:
2908 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2910 break;
2911 }
2912}
2913
2916 SelectionDAG &DAG) {
2917 assert(N->getValueType(0) == MVT::i128 &&
2918 "AtomicCmpSwap on types less than 128 should be legal");
2919 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
2920
2921 unsigned Opcode;
2922 switch (MemOp->getMergedOrdering()) {
2926 Opcode = LoongArch::PseudoCmpXchg128Acquire;
2927 break;
2930 Opcode = LoongArch::PseudoCmpXchg128;
2931 break;
2932 default:
2933 llvm_unreachable("Unexpected ordering!");
2934 }
2935
2936 SDLoc DL(N);
2937 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
2938 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
2939 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
2940 NewVal.first, NewVal.second, N->getOperand(0)};
2941
2942 SDNode *CmpSwap = DAG.getMachineNode(
2943 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
2944 Ops);
2945 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
2946 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
2947 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
2948 Results.push_back(SDValue(CmpSwap, 3));
2949}
2950
2953 SDLoc DL(N);
2954 EVT VT = N->getValueType(0);
2955 switch (N->getOpcode()) {
2956 default:
2957 llvm_unreachable("Don't know how to legalize this operation");
2958 case ISD::ADD:
2959 case ISD::SUB:
2960 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2961 "Unexpected custom legalisation");
2962 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2963 break;
2964 case ISD::SDIV:
2965 case ISD::UDIV:
2966 case ISD::SREM:
2967 case ISD::UREM:
2968 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2969 "Unexpected custom legalisation");
2970 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2971 Subtarget.hasDiv32() && VT == MVT::i32
2973 : ISD::SIGN_EXTEND));
2974 break;
2975 case ISD::SHL:
2976 case ISD::SRA:
2977 case ISD::SRL:
2978 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2979 "Unexpected custom legalisation");
2980 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2981 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2982 break;
2983 }
2984 break;
2985 case ISD::ROTL:
2986 case ISD::ROTR:
2987 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2988 "Unexpected custom legalisation");
2989 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2990 break;
2991 case ISD::FP_TO_SINT: {
2992 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2993 "Unexpected custom legalisation");
2994 SDValue Src = N->getOperand(0);
2995 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2996 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2998 if (Src.getValueType() == MVT::f16)
2999 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
3000 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
3001 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
3002 return;
3003 }
3004 // If the FP type needs to be softened, emit a library call using the 'si'
3005 // version. If we left it to default legalization we'd end up with 'di'.
3006 RTLIB::Libcall LC;
3007 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
3008 MakeLibCallOptions CallOptions;
3009 EVT OpVT = Src.getValueType();
3010 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
3011 SDValue Chain = SDValue();
3012 SDValue Result;
3013 std::tie(Result, Chain) =
3014 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
3015 Results.push_back(Result);
3016 break;
3017 }
3018 case ISD::BITCAST: {
3019 SDValue Src = N->getOperand(0);
3020 EVT SrcVT = Src.getValueType();
3021 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
3022 Subtarget.hasBasicF()) {
3023 SDValue Dst =
3024 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
3025 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
3026 }
3027 break;
3028 }
3029 case ISD::FP_TO_UINT: {
3030 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3031 "Unexpected custom legalisation");
3032 auto &TLI = DAG.getTargetLoweringInfo();
3033 SDValue Tmp1, Tmp2;
3034 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
3035 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
3036 break;
3037 }
3038 case ISD::BSWAP: {
3039 SDValue Src = N->getOperand(0);
3040 assert((VT == MVT::i16 || VT == MVT::i32) &&
3041 "Unexpected custom legalization");
3042 MVT GRLenVT = Subtarget.getGRLenVT();
3043 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3044 SDValue Tmp;
3045 switch (VT.getSizeInBits()) {
3046 default:
3047 llvm_unreachable("Unexpected operand width");
3048 case 16:
3049 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
3050 break;
3051 case 32:
3052 // Only LA64 will get to here due to the size mismatch between VT and
3053 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
3054 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
3055 break;
3056 }
3057 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3058 break;
3059 }
3060 case ISD::BITREVERSE: {
3061 SDValue Src = N->getOperand(0);
3062 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
3063 "Unexpected custom legalization");
3064 MVT GRLenVT = Subtarget.getGRLenVT();
3065 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3066 SDValue Tmp;
3067 switch (VT.getSizeInBits()) {
3068 default:
3069 llvm_unreachable("Unexpected operand width");
3070 case 8:
3071 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3072 break;
3073 case 32:
3074 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3075 break;
3076 }
3077 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3078 break;
3079 }
3080 case ISD::CTLZ:
3081 case ISD::CTTZ: {
3082 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3083 "Unexpected custom legalisation");
3084 Results.push_back(customLegalizeToWOp(N, DAG, 1));
3085 break;
3086 }
3088 SDValue Chain = N->getOperand(0);
3089 SDValue Op2 = N->getOperand(2);
3090 MVT GRLenVT = Subtarget.getGRLenVT();
3091 const StringRef ErrorMsgOOR = "argument out of range";
3092 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3093 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3094
3095 switch (N->getConstantOperandVal(1)) {
3096 default:
3097 llvm_unreachable("Unexpected Intrinsic.");
3098 case Intrinsic::loongarch_movfcsr2gr: {
3099 if (!Subtarget.hasBasicF()) {
3100 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3101 return;
3102 }
3103 unsigned Imm = Op2->getAsZExtVal();
3104 if (!isUInt<2>(Imm)) {
3105 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3106 return;
3107 }
3108 SDValue MOVFCSR2GRResults = DAG.getNode(
3109 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3110 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3111 Results.push_back(
3112 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3113 Results.push_back(MOVFCSR2GRResults.getValue(1));
3114 break;
3115 }
3116#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
3117 case Intrinsic::loongarch_##NAME: { \
3118 SDValue NODE = DAG.getNode( \
3119 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3120 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3121 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3122 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3123 Results.push_back(NODE.getValue(1)); \
3124 break; \
3125 }
3126 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3127 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3128 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3129 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3130 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3131 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3132#undef CRC_CASE_EXT_BINARYOP
3133
3134#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3135 case Intrinsic::loongarch_##NAME: { \
3136 SDValue NODE = DAG.getNode( \
3137 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3138 {Chain, Op2, \
3139 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3140 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3141 Results.push_back(NODE.getValue(1)); \
3142 break; \
3143 }
3144 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3145 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3146#undef CRC_CASE_EXT_UNARYOP
3147#define CSR_CASE(ID) \
3148 case Intrinsic::loongarch_##ID: { \
3149 if (!Subtarget.is64Bit()) \
3150 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3151 break; \
3152 }
3153 CSR_CASE(csrrd_d);
3154 CSR_CASE(csrwr_d);
3155 CSR_CASE(csrxchg_d);
3156 CSR_CASE(iocsrrd_d);
3157#undef CSR_CASE
3158 case Intrinsic::loongarch_csrrd_w: {
3159 unsigned Imm = Op2->getAsZExtVal();
3160 if (!isUInt<14>(Imm)) {
3161 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3162 return;
3163 }
3164 SDValue CSRRDResults =
3165 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3166 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3167 Results.push_back(
3168 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3169 Results.push_back(CSRRDResults.getValue(1));
3170 break;
3171 }
3172 case Intrinsic::loongarch_csrwr_w: {
3173 unsigned Imm = N->getConstantOperandVal(3);
3174 if (!isUInt<14>(Imm)) {
3175 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3176 return;
3177 }
3178 SDValue CSRWRResults =
3179 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3180 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3181 DAG.getConstant(Imm, DL, GRLenVT)});
3182 Results.push_back(
3183 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3184 Results.push_back(CSRWRResults.getValue(1));
3185 break;
3186 }
3187 case Intrinsic::loongarch_csrxchg_w: {
3188 unsigned Imm = N->getConstantOperandVal(4);
3189 if (!isUInt<14>(Imm)) {
3190 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3191 return;
3192 }
3193 SDValue CSRXCHGResults = DAG.getNode(
3194 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3195 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3196 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3197 DAG.getConstant(Imm, DL, GRLenVT)});
3198 Results.push_back(
3199 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3200 Results.push_back(CSRXCHGResults.getValue(1));
3201 break;
3202 }
3203#define IOCSRRD_CASE(NAME, NODE) \
3204 case Intrinsic::loongarch_##NAME: { \
3205 SDValue IOCSRRDResults = \
3206 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3207 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3208 Results.push_back( \
3209 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3210 Results.push_back(IOCSRRDResults.getValue(1)); \
3211 break; \
3212 }
3213 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3214 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3215 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3216#undef IOCSRRD_CASE
3217 case Intrinsic::loongarch_cpucfg: {
3218 SDValue CPUCFGResults =
3219 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3220 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3221 Results.push_back(
3222 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3223 Results.push_back(CPUCFGResults.getValue(1));
3224 break;
3225 }
3226 case Intrinsic::loongarch_lddir_d: {
3227 if (!Subtarget.is64Bit()) {
3228 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3229 return;
3230 }
3231 break;
3232 }
3233 }
3234 break;
3235 }
3236 case ISD::READ_REGISTER: {
3237 if (Subtarget.is64Bit())
3238 DAG.getContext()->emitError(
3239 "On LA64, only 64-bit registers can be read.");
3240 else
3241 DAG.getContext()->emitError(
3242 "On LA32, only 32-bit registers can be read.");
3243 Results.push_back(DAG.getUNDEF(VT));
3244 Results.push_back(N->getOperand(0));
3245 break;
3246 }
3248 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3249 break;
3250 }
3251 case ISD::LROUND: {
3252 SDValue Op0 = N->getOperand(0);
3253 EVT OpVT = Op0.getValueType();
3254 RTLIB::Libcall LC =
3255 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3256 MakeLibCallOptions CallOptions;
3257 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3258 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3259 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3260 Results.push_back(Result);
3261 break;
3262 }
3263 case ISD::ATOMIC_CMP_SWAP: {
3265 break;
3266 }
3267 }
3268}
3269
3272 const LoongArchSubtarget &Subtarget) {
3273 if (DCI.isBeforeLegalizeOps())
3274 return SDValue();
3275
3276 SDValue FirstOperand = N->getOperand(0);
3277 SDValue SecondOperand = N->getOperand(1);
3278 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3279 EVT ValTy = N->getValueType(0);
3280 SDLoc DL(N);
3281 uint64_t lsb, msb;
3282 unsigned SMIdx, SMLen;
3283 ConstantSDNode *CN;
3284 SDValue NewOperand;
3285 MVT GRLenVT = Subtarget.getGRLenVT();
3286
3287 // Op's second operand must be a shifted mask.
3288 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3289 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3290 return SDValue();
3291
3292 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3293 // Pattern match BSTRPICK.
3294 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3295 // => BSTRPICK $dst, $src, msb, lsb
3296 // where msb = lsb + len - 1
3297
3298 // The second operand of the shift must be an immediate.
3299 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3300 return SDValue();
3301
3302 lsb = CN->getZExtValue();
3303
3304 // Return if the shifted mask does not start at bit 0 or the sum of its
3305 // length and lsb exceeds the word's size.
3306 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3307 return SDValue();
3308
3309 NewOperand = FirstOperand.getOperand(0);
3310 } else {
3311 // Pattern match BSTRPICK.
3312 // $dst = and $src, (2**len- 1) , if len > 12
3313 // => BSTRPICK $dst, $src, msb, lsb
3314 // where lsb = 0 and msb = len - 1
3315
3316 // If the mask is <= 0xfff, andi can be used instead.
3317 if (CN->getZExtValue() <= 0xfff)
3318 return SDValue();
3319
3320 // Return if the MSB exceeds.
3321 if (SMIdx + SMLen > ValTy.getSizeInBits())
3322 return SDValue();
3323
3324 if (SMIdx > 0) {
3325 // Omit if the constant has more than 2 uses. This a conservative
3326 // decision. Whether it is a win depends on the HW microarchitecture.
3327 // However it should always be better for 1 and 2 uses.
3328 if (CN->use_size() > 2)
3329 return SDValue();
3330 // Return if the constant can be composed by a single LU12I.W.
3331 if ((CN->getZExtValue() & 0xfff) == 0)
3332 return SDValue();
3333 // Return if the constand can be composed by a single ADDI with
3334 // the zero register.
3335 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3336 return SDValue();
3337 }
3338
3339 lsb = SMIdx;
3340 NewOperand = FirstOperand;
3341 }
3342
3343 msb = lsb + SMLen - 1;
3344 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3345 DAG.getConstant(msb, DL, GRLenVT),
3346 DAG.getConstant(lsb, DL, GRLenVT));
3347 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3348 return NR0;
3349 // Try to optimize to
3350 // bstrpick $Rd, $Rs, msb, lsb
3351 // slli $Rd, $Rd, lsb
3352 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3353 DAG.getConstant(lsb, DL, GRLenVT));
3354}
3355
3358 const LoongArchSubtarget &Subtarget) {
3359 if (DCI.isBeforeLegalizeOps())
3360 return SDValue();
3361
3362 // $dst = srl (and $src, Mask), Shamt
3363 // =>
3364 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3365 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3366 //
3367
3368 SDValue FirstOperand = N->getOperand(0);
3369 ConstantSDNode *CN;
3370 EVT ValTy = N->getValueType(0);
3371 SDLoc DL(N);
3372 MVT GRLenVT = Subtarget.getGRLenVT();
3373 unsigned MaskIdx, MaskLen;
3374 uint64_t Shamt;
3375
3376 // The first operand must be an AND and the second operand of the AND must be
3377 // a shifted mask.
3378 if (FirstOperand.getOpcode() != ISD::AND ||
3379 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3380 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3381 return SDValue();
3382
3383 // The second operand (shift amount) must be an immediate.
3384 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3385 return SDValue();
3386
3387 Shamt = CN->getZExtValue();
3388 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3389 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3390 FirstOperand->getOperand(0),
3391 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3392 DAG.getConstant(Shamt, DL, GRLenVT));
3393
3394 return SDValue();
3395}
3396
3399 const LoongArchSubtarget &Subtarget) {
3400 MVT GRLenVT = Subtarget.getGRLenVT();
3401 EVT ValTy = N->getValueType(0);
3402 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3403 ConstantSDNode *CN0, *CN1;
3404 SDLoc DL(N);
3405 unsigned ValBits = ValTy.getSizeInBits();
3406 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3407 unsigned Shamt;
3408 bool SwapAndRetried = false;
3409
3410 if (DCI.isBeforeLegalizeOps())
3411 return SDValue();
3412
3413 if (ValBits != 32 && ValBits != 64)
3414 return SDValue();
3415
3416Retry:
3417 // 1st pattern to match BSTRINS:
3418 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3419 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3420 // =>
3421 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3422 if (N0.getOpcode() == ISD::AND &&
3423 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3424 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3425 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3426 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3427 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3428 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3429 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3430 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3431 (MaskIdx0 + MaskLen0 <= ValBits)) {
3432 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3433 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3434 N1.getOperand(0).getOperand(0),
3435 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3436 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3437 }
3438
3439 // 2nd pattern to match BSTRINS:
3440 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3441 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3442 // =>
3443 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3444 if (N0.getOpcode() == ISD::AND &&
3445 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3446 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3447 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3448 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3449 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3450 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3451 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3452 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3453 (MaskIdx0 + MaskLen0 <= ValBits)) {
3454 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3455 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3456 N1.getOperand(0).getOperand(0),
3457 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3458 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3459 }
3460
3461 // 3rd pattern to match BSTRINS:
3462 // R = or (and X, mask0), (and Y, mask1)
3463 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3464 // =>
3465 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3466 // where msb = lsb + size - 1
3467 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3468 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3469 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3470 (MaskIdx0 + MaskLen0 <= 64) &&
3471 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3472 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3473 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3474 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3475 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3476 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3477 DAG.getConstant(ValBits == 32
3478 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3479 : (MaskIdx0 + MaskLen0 - 1),
3480 DL, GRLenVT),
3481 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3482 }
3483
3484 // 4th pattern to match BSTRINS:
3485 // R = or (and X, mask), (shl Y, shamt)
3486 // where mask = (2**shamt - 1)
3487 // =>
3488 // R = BSTRINS X, Y, ValBits - 1, shamt
3489 // where ValBits = 32 or 64
3490 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3491 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3492 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3493 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3494 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3495 (MaskIdx0 + MaskLen0 <= ValBits)) {
3496 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3497 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3498 N1.getOperand(0),
3499 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3500 DAG.getConstant(Shamt, DL, GRLenVT));
3501 }
3502
3503 // 5th pattern to match BSTRINS:
3504 // R = or (and X, mask), const
3505 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3506 // =>
3507 // R = BSTRINS X, (const >> lsb), msb, lsb
3508 // where msb = lsb + size - 1
3509 if (N0.getOpcode() == ISD::AND &&
3510 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3511 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3512 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3513 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3514 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3515 return DAG.getNode(
3516 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3517 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3518 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3519 : (MaskIdx0 + MaskLen0 - 1),
3520 DL, GRLenVT),
3521 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3522 }
3523
3524 // 6th pattern.
3525 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3526 // by the incoming bits are known to be zero.
3527 // =>
3528 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3529 //
3530 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3531 // pattern is more common than the 1st. So we put the 1st before the 6th in
3532 // order to match as many nodes as possible.
3533 ConstantSDNode *CNMask, *CNShamt;
3534 unsigned MaskIdx, MaskLen;
3535 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3536 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3537 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3538 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3539 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3540 Shamt = CNShamt->getZExtValue();
3541 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3542 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3543 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3544 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3545 N1.getOperand(0).getOperand(0),
3546 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3547 DAG.getConstant(Shamt, DL, GRLenVT));
3548 }
3549 }
3550
3551 // 7th pattern.
3552 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3553 // overwritten by the incoming bits are known to be zero.
3554 // =>
3555 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3556 //
3557 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3558 // before the 7th in order to match as many nodes as possible.
3559 if (N1.getOpcode() == ISD::AND &&
3560 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3561 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3562 N1.getOperand(0).getOpcode() == ISD::SHL &&
3563 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3564 CNShamt->getZExtValue() == MaskIdx) {
3565 APInt ShMask(ValBits, CNMask->getZExtValue());
3566 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3567 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3568 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3569 N1.getOperand(0).getOperand(0),
3570 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3571 DAG.getConstant(MaskIdx, DL, GRLenVT));
3572 }
3573 }
3574
3575 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3576 if (!SwapAndRetried) {
3577 std::swap(N0, N1);
3578 SwapAndRetried = true;
3579 goto Retry;
3580 }
3581
3582 SwapAndRetried = false;
3583Retry2:
3584 // 8th pattern.
3585 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3586 // the incoming bits are known to be zero.
3587 // =>
3588 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3589 //
3590 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3591 // we put it here in order to match as many nodes as possible or generate less
3592 // instructions.
3593 if (N1.getOpcode() == ISD::AND &&
3594 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3595 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3596 APInt ShMask(ValBits, CNMask->getZExtValue());
3597 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3598 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3599 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3600 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3601 N1->getOperand(0),
3602 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3603 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3604 DAG.getConstant(MaskIdx, DL, GRLenVT));
3605 }
3606 }
3607 // Swap N0/N1 and retry.
3608 if (!SwapAndRetried) {
3609 std::swap(N0, N1);
3610 SwapAndRetried = true;
3611 goto Retry2;
3612 }
3613
3614 return SDValue();
3615}
3616
3617static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3618 ExtType = ISD::NON_EXTLOAD;
3619
3620 switch (V.getNode()->getOpcode()) {
3621 case ISD::LOAD: {
3622 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3623 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3624 (LoadNode->getMemoryVT() == MVT::i16)) {
3625 ExtType = LoadNode->getExtensionType();
3626 return true;
3627 }
3628 return false;
3629 }
3630 case ISD::AssertSext: {
3631 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3632 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3633 ExtType = ISD::SEXTLOAD;
3634 return true;
3635 }
3636 return false;
3637 }
3638 case ISD::AssertZext: {
3639 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3640 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3641 ExtType = ISD::ZEXTLOAD;
3642 return true;
3643 }
3644 return false;
3645 }
3646 default:
3647 return false;
3648 }
3649
3650 return false;
3651}
3652
3653// Eliminate redundant truncation and zero-extension nodes.
3654// * Case 1:
3655// +------------+ +------------+ +------------+
3656// | Input1 | | Input2 | | CC |
3657// +------------+ +------------+ +------------+
3658// | | |
3659// V V +----+
3660// +------------+ +------------+ |
3661// | TRUNCATE | | TRUNCATE | |
3662// +------------+ +------------+ |
3663// | | |
3664// V V |
3665// +------------+ +------------+ |
3666// | ZERO_EXT | | ZERO_EXT | |
3667// +------------+ +------------+ |
3668// | | |
3669// | +-------------+ |
3670// V V | |
3671// +----------------+ | |
3672// | AND | | |
3673// +----------------+ | |
3674// | | |
3675// +---------------+ | |
3676// | | |
3677// V V V
3678// +-------------+
3679// | CMP |
3680// +-------------+
3681// * Case 2:
3682// +------------+ +------------+ +-------------+ +------------+ +------------+
3683// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3684// +------------+ +------------+ +-------------+ +------------+ +------------+
3685// | | | | |
3686// V | | | |
3687// +------------+ | | | |
3688// | XOR |<---------------------+ | |
3689// +------------+ | | |
3690// | | | |
3691// V V +---------------+ |
3692// +------------+ +------------+ | |
3693// | TRUNCATE | | TRUNCATE | | +-------------------------+
3694// +------------+ +------------+ | |
3695// | | | |
3696// V V | |
3697// +------------+ +------------+ | |
3698// | ZERO_EXT | | ZERO_EXT | | |
3699// +------------+ +------------+ | |
3700// | | | |
3701// V V | |
3702// +----------------+ | |
3703// | AND | | |
3704// +----------------+ | |
3705// | | |
3706// +---------------+ | |
3707// | | |
3708// V V V
3709// +-------------+
3710// | CMP |
3711// +-------------+
3714 const LoongArchSubtarget &Subtarget) {
3715 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3716
3717 SDNode *AndNode = N->getOperand(0).getNode();
3718 if (AndNode->getOpcode() != ISD::AND)
3719 return SDValue();
3720
3721 SDValue AndInputValue2 = AndNode->getOperand(1);
3722 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3723 return SDValue();
3724
3725 SDValue CmpInputValue = N->getOperand(1);
3726 SDValue AndInputValue1 = AndNode->getOperand(0);
3727 if (AndInputValue1.getOpcode() == ISD::XOR) {
3728 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3729 return SDValue();
3730 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3731 if (!CN || CN->getSExtValue() != -1)
3732 return SDValue();
3733 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3734 if (!CN || CN->getSExtValue() != 0)
3735 return SDValue();
3736 AndInputValue1 = AndInputValue1.getOperand(0);
3737 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3738 return SDValue();
3739 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3740 if (AndInputValue2 != CmpInputValue)
3741 return SDValue();
3742 } else {
3743 return SDValue();
3744 }
3745
3746 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3747 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3748 return SDValue();
3749
3750 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3751 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3752 return SDValue();
3753
3754 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3755 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3756 ISD::LoadExtType ExtType1;
3757 ISD::LoadExtType ExtType2;
3758
3759 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3760 !checkValueWidth(TruncInputValue2, ExtType2))
3761 return SDValue();
3762
3763 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3764 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3765 return SDValue();
3766
3767 if ((ExtType2 != ISD::ZEXTLOAD) &&
3768 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3769 return SDValue();
3770
3771 // These truncation and zero-extension nodes are not necessary, remove them.
3772 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3773 TruncInputValue1, TruncInputValue2);
3774 SDValue NewSetCC =
3775 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3776 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3777 return SDValue(N, 0);
3778}
3779
3780// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3783 const LoongArchSubtarget &Subtarget) {
3784 if (DCI.isBeforeLegalizeOps())
3785 return SDValue();
3786
3787 SDValue Src = N->getOperand(0);
3788 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3789 return SDValue();
3790
3791 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3792 Src.getOperand(0));
3793}
3794
3795template <unsigned N>
3797 SelectionDAG &DAG,
3798 const LoongArchSubtarget &Subtarget,
3799 bool IsSigned = false) {
3800 SDLoc DL(Node);
3801 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3802 // Check the ImmArg.
3803 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3804 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3805 DAG.getContext()->emitError(Node->getOperationName(0) +
3806 ": argument out of range.");
3807 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3808 }
3809 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3810}
3811
3812template <unsigned N>
3813static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3814 SelectionDAG &DAG, bool IsSigned = false) {
3815 SDLoc DL(Node);
3816 EVT ResTy = Node->getValueType(0);
3817 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3818
3819 // Check the ImmArg.
3820 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3821 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3822 DAG.getContext()->emitError(Node->getOperationName(0) +
3823 ": argument out of range.");
3824 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3825 }
3826 return DAG.getConstant(
3828 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3829 DL, ResTy);
3830}
3831
3833 SDLoc DL(Node);
3834 EVT ResTy = Node->getValueType(0);
3835 SDValue Vec = Node->getOperand(2);
3836 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3837 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3838}
3839
3841 SDLoc DL(Node);
3842 EVT ResTy = Node->getValueType(0);
3843 SDValue One = DAG.getConstant(1, DL, ResTy);
3844 SDValue Bit =
3845 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3846
3847 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3848 DAG.getNOT(DL, Bit, ResTy));
3849}
3850
3851template <unsigned N>
3853 SDLoc DL(Node);
3854 EVT ResTy = Node->getValueType(0);
3855 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3856 // Check the unsigned ImmArg.
3857 if (!isUInt<N>(CImm->getZExtValue())) {
3858 DAG.getContext()->emitError(Node->getOperationName(0) +
3859 ": argument out of range.");
3860 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3861 }
3862
3863 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3864 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3865
3866 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3867}
3868
3869template <unsigned N>
3871 SDLoc DL(Node);
3872 EVT ResTy = Node->getValueType(0);
3873 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3874 // Check the unsigned ImmArg.
3875 if (!isUInt<N>(CImm->getZExtValue())) {
3876 DAG.getContext()->emitError(Node->getOperationName(0) +
3877 ": argument out of range.");
3878 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3879 }
3880
3881 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3882 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3883 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3884}
3885
3886template <unsigned N>
3888 SDLoc DL(Node);
3889 EVT ResTy = Node->getValueType(0);
3890 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3891 // Check the unsigned ImmArg.
3892 if (!isUInt<N>(CImm->getZExtValue())) {
3893 DAG.getContext()->emitError(Node->getOperationName(0) +
3894 ": argument out of range.");
3895 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3896 }
3897
3898 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3899 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3900 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3901}
3902
3903static SDValue
3906 const LoongArchSubtarget &Subtarget) {
3907 SDLoc DL(N);
3908 switch (N->getConstantOperandVal(0)) {
3909 default:
3910 break;
3911 case Intrinsic::loongarch_lsx_vadd_b:
3912 case Intrinsic::loongarch_lsx_vadd_h:
3913 case Intrinsic::loongarch_lsx_vadd_w:
3914 case Intrinsic::loongarch_lsx_vadd_d:
3915 case Intrinsic::loongarch_lasx_xvadd_b:
3916 case Intrinsic::loongarch_lasx_xvadd_h:
3917 case Intrinsic::loongarch_lasx_xvadd_w:
3918 case Intrinsic::loongarch_lasx_xvadd_d:
3919 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3920 N->getOperand(2));
3921 case Intrinsic::loongarch_lsx_vaddi_bu:
3922 case Intrinsic::loongarch_lsx_vaddi_hu:
3923 case Intrinsic::loongarch_lsx_vaddi_wu:
3924 case Intrinsic::loongarch_lsx_vaddi_du:
3925 case Intrinsic::loongarch_lasx_xvaddi_bu:
3926 case Intrinsic::loongarch_lasx_xvaddi_hu:
3927 case Intrinsic::loongarch_lasx_xvaddi_wu:
3928 case Intrinsic::loongarch_lasx_xvaddi_du:
3929 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3930 lowerVectorSplatImm<5>(N, 2, DAG));
3931 case Intrinsic::loongarch_lsx_vsub_b:
3932 case Intrinsic::loongarch_lsx_vsub_h:
3933 case Intrinsic::loongarch_lsx_vsub_w:
3934 case Intrinsic::loongarch_lsx_vsub_d:
3935 case Intrinsic::loongarch_lasx_xvsub_b:
3936 case Intrinsic::loongarch_lasx_xvsub_h:
3937 case Intrinsic::loongarch_lasx_xvsub_w:
3938 case Intrinsic::loongarch_lasx_xvsub_d:
3939 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3940 N->getOperand(2));
3941 case Intrinsic::loongarch_lsx_vsubi_bu:
3942 case Intrinsic::loongarch_lsx_vsubi_hu:
3943 case Intrinsic::loongarch_lsx_vsubi_wu:
3944 case Intrinsic::loongarch_lsx_vsubi_du:
3945 case Intrinsic::loongarch_lasx_xvsubi_bu:
3946 case Intrinsic::loongarch_lasx_xvsubi_hu:
3947 case Intrinsic::loongarch_lasx_xvsubi_wu:
3948 case Intrinsic::loongarch_lasx_xvsubi_du:
3949 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3950 lowerVectorSplatImm<5>(N, 2, DAG));
3951 case Intrinsic::loongarch_lsx_vneg_b:
3952 case Intrinsic::loongarch_lsx_vneg_h:
3953 case Intrinsic::loongarch_lsx_vneg_w:
3954 case Intrinsic::loongarch_lsx_vneg_d:
3955 case Intrinsic::loongarch_lasx_xvneg_b:
3956 case Intrinsic::loongarch_lasx_xvneg_h:
3957 case Intrinsic::loongarch_lasx_xvneg_w:
3958 case Intrinsic::loongarch_lasx_xvneg_d:
3959 return DAG.getNode(
3960 ISD::SUB, DL, N->getValueType(0),
3961 DAG.getConstant(
3962 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3963 /*isSigned=*/true),
3964 SDLoc(N), N->getValueType(0)),
3965 N->getOperand(1));
3966 case Intrinsic::loongarch_lsx_vmax_b:
3967 case Intrinsic::loongarch_lsx_vmax_h:
3968 case Intrinsic::loongarch_lsx_vmax_w:
3969 case Intrinsic::loongarch_lsx_vmax_d:
3970 case Intrinsic::loongarch_lasx_xvmax_b:
3971 case Intrinsic::loongarch_lasx_xvmax_h:
3972 case Intrinsic::loongarch_lasx_xvmax_w:
3973 case Intrinsic::loongarch_lasx_xvmax_d:
3974 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3975 N->getOperand(2));
3976 case Intrinsic::loongarch_lsx_vmax_bu:
3977 case Intrinsic::loongarch_lsx_vmax_hu:
3978 case Intrinsic::loongarch_lsx_vmax_wu:
3979 case Intrinsic::loongarch_lsx_vmax_du:
3980 case Intrinsic::loongarch_lasx_xvmax_bu:
3981 case Intrinsic::loongarch_lasx_xvmax_hu:
3982 case Intrinsic::loongarch_lasx_xvmax_wu:
3983 case Intrinsic::loongarch_lasx_xvmax_du:
3984 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3985 N->getOperand(2));
3986 case Intrinsic::loongarch_lsx_vmaxi_b:
3987 case Intrinsic::loongarch_lsx_vmaxi_h:
3988 case Intrinsic::loongarch_lsx_vmaxi_w:
3989 case Intrinsic::loongarch_lsx_vmaxi_d:
3990 case Intrinsic::loongarch_lasx_xvmaxi_b:
3991 case Intrinsic::loongarch_lasx_xvmaxi_h:
3992 case Intrinsic::loongarch_lasx_xvmaxi_w:
3993 case Intrinsic::loongarch_lasx_xvmaxi_d:
3994 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3995 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3996 case Intrinsic::loongarch_lsx_vmaxi_bu:
3997 case Intrinsic::loongarch_lsx_vmaxi_hu:
3998 case Intrinsic::loongarch_lsx_vmaxi_wu:
3999 case Intrinsic::loongarch_lsx_vmaxi_du:
4000 case Intrinsic::loongarch_lasx_xvmaxi_bu:
4001 case Intrinsic::loongarch_lasx_xvmaxi_hu:
4002 case Intrinsic::loongarch_lasx_xvmaxi_wu:
4003 case Intrinsic::loongarch_lasx_xvmaxi_du:
4004 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
4005 lowerVectorSplatImm<5>(N, 2, DAG));
4006 case Intrinsic::loongarch_lsx_vmin_b:
4007 case Intrinsic::loongarch_lsx_vmin_h:
4008 case Intrinsic::loongarch_lsx_vmin_w:
4009 case Intrinsic::loongarch_lsx_vmin_d:
4010 case Intrinsic::loongarch_lasx_xvmin_b:
4011 case Intrinsic::loongarch_lasx_xvmin_h:
4012 case Intrinsic::loongarch_lasx_xvmin_w:
4013 case Intrinsic::loongarch_lasx_xvmin_d:
4014 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
4015 N->getOperand(2));
4016 case Intrinsic::loongarch_lsx_vmin_bu:
4017 case Intrinsic::loongarch_lsx_vmin_hu:
4018 case Intrinsic::loongarch_lsx_vmin_wu:
4019 case Intrinsic::loongarch_lsx_vmin_du:
4020 case Intrinsic::loongarch_lasx_xvmin_bu:
4021 case Intrinsic::loongarch_lasx_xvmin_hu:
4022 case Intrinsic::loongarch_lasx_xvmin_wu:
4023 case Intrinsic::loongarch_lasx_xvmin_du:
4024 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
4025 N->getOperand(2));
4026 case Intrinsic::loongarch_lsx_vmini_b:
4027 case Intrinsic::loongarch_lsx_vmini_h:
4028 case Intrinsic::loongarch_lsx_vmini_w:
4029 case Intrinsic::loongarch_lsx_vmini_d:
4030 case Intrinsic::loongarch_lasx_xvmini_b:
4031 case Intrinsic::loongarch_lasx_xvmini_h:
4032 case Intrinsic::loongarch_lasx_xvmini_w:
4033 case Intrinsic::loongarch_lasx_xvmini_d:
4034 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
4035 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
4036 case Intrinsic::loongarch_lsx_vmini_bu:
4037 case Intrinsic::loongarch_lsx_vmini_hu:
4038 case Intrinsic::loongarch_lsx_vmini_wu:
4039 case Intrinsic::loongarch_lsx_vmini_du:
4040 case Intrinsic::loongarch_lasx_xvmini_bu:
4041 case Intrinsic::loongarch_lasx_xvmini_hu:
4042 case Intrinsic::loongarch_lasx_xvmini_wu:
4043 case Intrinsic::loongarch_lasx_xvmini_du:
4044 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
4045 lowerVectorSplatImm<5>(N, 2, DAG));
4046 case Intrinsic::loongarch_lsx_vmul_b:
4047 case Intrinsic::loongarch_lsx_vmul_h:
4048 case Intrinsic::loongarch_lsx_vmul_w:
4049 case Intrinsic::loongarch_lsx_vmul_d:
4050 case Intrinsic::loongarch_lasx_xvmul_b:
4051 case Intrinsic::loongarch_lasx_xvmul_h:
4052 case Intrinsic::loongarch_lasx_xvmul_w:
4053 case Intrinsic::loongarch_lasx_xvmul_d:
4054 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
4055 N->getOperand(2));
4056 case Intrinsic::loongarch_lsx_vmadd_b:
4057 case Intrinsic::loongarch_lsx_vmadd_h:
4058 case Intrinsic::loongarch_lsx_vmadd_w:
4059 case Intrinsic::loongarch_lsx_vmadd_d:
4060 case Intrinsic::loongarch_lasx_xvmadd_b:
4061 case Intrinsic::loongarch_lasx_xvmadd_h:
4062 case Intrinsic::loongarch_lasx_xvmadd_w:
4063 case Intrinsic::loongarch_lasx_xvmadd_d: {
4064 EVT ResTy = N->getValueType(0);
4065 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
4066 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4067 N->getOperand(3)));
4068 }
4069 case Intrinsic::loongarch_lsx_vmsub_b:
4070 case Intrinsic::loongarch_lsx_vmsub_h:
4071 case Intrinsic::loongarch_lsx_vmsub_w:
4072 case Intrinsic::loongarch_lsx_vmsub_d:
4073 case Intrinsic::loongarch_lasx_xvmsub_b:
4074 case Intrinsic::loongarch_lasx_xvmsub_h:
4075 case Intrinsic::loongarch_lasx_xvmsub_w:
4076 case Intrinsic::loongarch_lasx_xvmsub_d: {
4077 EVT ResTy = N->getValueType(0);
4078 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4079 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4080 N->getOperand(3)));
4081 }
4082 case Intrinsic::loongarch_lsx_vdiv_b:
4083 case Intrinsic::loongarch_lsx_vdiv_h:
4084 case Intrinsic::loongarch_lsx_vdiv_w:
4085 case Intrinsic::loongarch_lsx_vdiv_d:
4086 case Intrinsic::loongarch_lasx_xvdiv_b:
4087 case Intrinsic::loongarch_lasx_xvdiv_h:
4088 case Intrinsic::loongarch_lasx_xvdiv_w:
4089 case Intrinsic::loongarch_lasx_xvdiv_d:
4090 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4091 N->getOperand(2));
4092 case Intrinsic::loongarch_lsx_vdiv_bu:
4093 case Intrinsic::loongarch_lsx_vdiv_hu:
4094 case Intrinsic::loongarch_lsx_vdiv_wu:
4095 case Intrinsic::loongarch_lsx_vdiv_du:
4096 case Intrinsic::loongarch_lasx_xvdiv_bu:
4097 case Intrinsic::loongarch_lasx_xvdiv_hu:
4098 case Intrinsic::loongarch_lasx_xvdiv_wu:
4099 case Intrinsic::loongarch_lasx_xvdiv_du:
4100 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4101 N->getOperand(2));
4102 case Intrinsic::loongarch_lsx_vmod_b:
4103 case Intrinsic::loongarch_lsx_vmod_h:
4104 case Intrinsic::loongarch_lsx_vmod_w:
4105 case Intrinsic::loongarch_lsx_vmod_d:
4106 case Intrinsic::loongarch_lasx_xvmod_b:
4107 case Intrinsic::loongarch_lasx_xvmod_h:
4108 case Intrinsic::loongarch_lasx_xvmod_w:
4109 case Intrinsic::loongarch_lasx_xvmod_d:
4110 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4111 N->getOperand(2));
4112 case Intrinsic::loongarch_lsx_vmod_bu:
4113 case Intrinsic::loongarch_lsx_vmod_hu:
4114 case Intrinsic::loongarch_lsx_vmod_wu:
4115 case Intrinsic::loongarch_lsx_vmod_du:
4116 case Intrinsic::loongarch_lasx_xvmod_bu:
4117 case Intrinsic::loongarch_lasx_xvmod_hu:
4118 case Intrinsic::loongarch_lasx_xvmod_wu:
4119 case Intrinsic::loongarch_lasx_xvmod_du:
4120 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4121 N->getOperand(2));
4122 case Intrinsic::loongarch_lsx_vand_v:
4123 case Intrinsic::loongarch_lasx_xvand_v:
4124 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4125 N->getOperand(2));
4126 case Intrinsic::loongarch_lsx_vor_v:
4127 case Intrinsic::loongarch_lasx_xvor_v:
4128 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4129 N->getOperand(2));
4130 case Intrinsic::loongarch_lsx_vxor_v:
4131 case Intrinsic::loongarch_lasx_xvxor_v:
4132 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4133 N->getOperand(2));
4134 case Intrinsic::loongarch_lsx_vnor_v:
4135 case Intrinsic::loongarch_lasx_xvnor_v: {
4136 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4137 N->getOperand(2));
4138 return DAG.getNOT(DL, Res, Res->getValueType(0));
4139 }
4140 case Intrinsic::loongarch_lsx_vandi_b:
4141 case Intrinsic::loongarch_lasx_xvandi_b:
4142 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4143 lowerVectorSplatImm<8>(N, 2, DAG));
4144 case Intrinsic::loongarch_lsx_vori_b:
4145 case Intrinsic::loongarch_lasx_xvori_b:
4146 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4147 lowerVectorSplatImm<8>(N, 2, DAG));
4148 case Intrinsic::loongarch_lsx_vxori_b:
4149 case Intrinsic::loongarch_lasx_xvxori_b:
4150 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4151 lowerVectorSplatImm<8>(N, 2, DAG));
4152 case Intrinsic::loongarch_lsx_vsll_b:
4153 case Intrinsic::loongarch_lsx_vsll_h:
4154 case Intrinsic::loongarch_lsx_vsll_w:
4155 case Intrinsic::loongarch_lsx_vsll_d:
4156 case Intrinsic::loongarch_lasx_xvsll_b:
4157 case Intrinsic::loongarch_lasx_xvsll_h:
4158 case Intrinsic::loongarch_lasx_xvsll_w:
4159 case Intrinsic::loongarch_lasx_xvsll_d:
4160 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4161 truncateVecElts(N, DAG));
4162 case Intrinsic::loongarch_lsx_vslli_b:
4163 case Intrinsic::loongarch_lasx_xvslli_b:
4164 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4165 lowerVectorSplatImm<3>(N, 2, DAG));
4166 case Intrinsic::loongarch_lsx_vslli_h:
4167 case Intrinsic::loongarch_lasx_xvslli_h:
4168 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4169 lowerVectorSplatImm<4>(N, 2, DAG));
4170 case Intrinsic::loongarch_lsx_vslli_w:
4171 case Intrinsic::loongarch_lasx_xvslli_w:
4172 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4173 lowerVectorSplatImm<5>(N, 2, DAG));
4174 case Intrinsic::loongarch_lsx_vslli_d:
4175 case Intrinsic::loongarch_lasx_xvslli_d:
4176 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4177 lowerVectorSplatImm<6>(N, 2, DAG));
4178 case Intrinsic::loongarch_lsx_vsrl_b:
4179 case Intrinsic::loongarch_lsx_vsrl_h:
4180 case Intrinsic::loongarch_lsx_vsrl_w:
4181 case Intrinsic::loongarch_lsx_vsrl_d:
4182 case Intrinsic::loongarch_lasx_xvsrl_b:
4183 case Intrinsic::loongarch_lasx_xvsrl_h:
4184 case Intrinsic::loongarch_lasx_xvsrl_w:
4185 case Intrinsic::loongarch_lasx_xvsrl_d:
4186 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4187 truncateVecElts(N, DAG));
4188 case Intrinsic::loongarch_lsx_vsrli_b:
4189 case Intrinsic::loongarch_lasx_xvsrli_b:
4190 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4191 lowerVectorSplatImm<3>(N, 2, DAG));
4192 case Intrinsic::loongarch_lsx_vsrli_h:
4193 case Intrinsic::loongarch_lasx_xvsrli_h:
4194 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4195 lowerVectorSplatImm<4>(N, 2, DAG));
4196 case Intrinsic::loongarch_lsx_vsrli_w:
4197 case Intrinsic::loongarch_lasx_xvsrli_w:
4198 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4199 lowerVectorSplatImm<5>(N, 2, DAG));
4200 case Intrinsic::loongarch_lsx_vsrli_d:
4201 case Intrinsic::loongarch_lasx_xvsrli_d:
4202 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4203 lowerVectorSplatImm<6>(N, 2, DAG));
4204 case Intrinsic::loongarch_lsx_vsra_b:
4205 case Intrinsic::loongarch_lsx_vsra_h:
4206 case Intrinsic::loongarch_lsx_vsra_w:
4207 case Intrinsic::loongarch_lsx_vsra_d:
4208 case Intrinsic::loongarch_lasx_xvsra_b:
4209 case Intrinsic::loongarch_lasx_xvsra_h:
4210 case Intrinsic::loongarch_lasx_xvsra_w:
4211 case Intrinsic::loongarch_lasx_xvsra_d:
4212 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4213 truncateVecElts(N, DAG));
4214 case Intrinsic::loongarch_lsx_vsrai_b:
4215 case Intrinsic::loongarch_lasx_xvsrai_b:
4216 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4217 lowerVectorSplatImm<3>(N, 2, DAG));
4218 case Intrinsic::loongarch_lsx_vsrai_h:
4219 case Intrinsic::loongarch_lasx_xvsrai_h:
4220 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4221 lowerVectorSplatImm<4>(N, 2, DAG));
4222 case Intrinsic::loongarch_lsx_vsrai_w:
4223 case Intrinsic::loongarch_lasx_xvsrai_w:
4224 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4225 lowerVectorSplatImm<5>(N, 2, DAG));
4226 case Intrinsic::loongarch_lsx_vsrai_d:
4227 case Intrinsic::loongarch_lasx_xvsrai_d:
4228 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4229 lowerVectorSplatImm<6>(N, 2, DAG));
4230 case Intrinsic::loongarch_lsx_vclz_b:
4231 case Intrinsic::loongarch_lsx_vclz_h:
4232 case Intrinsic::loongarch_lsx_vclz_w:
4233 case Intrinsic::loongarch_lsx_vclz_d:
4234 case Intrinsic::loongarch_lasx_xvclz_b:
4235 case Intrinsic::loongarch_lasx_xvclz_h:
4236 case Intrinsic::loongarch_lasx_xvclz_w:
4237 case Intrinsic::loongarch_lasx_xvclz_d:
4238 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4239 case Intrinsic::loongarch_lsx_vpcnt_b:
4240 case Intrinsic::loongarch_lsx_vpcnt_h:
4241 case Intrinsic::loongarch_lsx_vpcnt_w:
4242 case Intrinsic::loongarch_lsx_vpcnt_d:
4243 case Intrinsic::loongarch_lasx_xvpcnt_b:
4244 case Intrinsic::loongarch_lasx_xvpcnt_h:
4245 case Intrinsic::loongarch_lasx_xvpcnt_w:
4246 case Intrinsic::loongarch_lasx_xvpcnt_d:
4247 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4248 case Intrinsic::loongarch_lsx_vbitclr_b:
4249 case Intrinsic::loongarch_lsx_vbitclr_h:
4250 case Intrinsic::loongarch_lsx_vbitclr_w:
4251 case Intrinsic::loongarch_lsx_vbitclr_d:
4252 case Intrinsic::loongarch_lasx_xvbitclr_b:
4253 case Intrinsic::loongarch_lasx_xvbitclr_h:
4254 case Intrinsic::loongarch_lasx_xvbitclr_w:
4255 case Intrinsic::loongarch_lasx_xvbitclr_d:
4256 return lowerVectorBitClear(N, DAG);
4257 case Intrinsic::loongarch_lsx_vbitclri_b:
4258 case Intrinsic::loongarch_lasx_xvbitclri_b:
4259 return lowerVectorBitClearImm<3>(N, DAG);
4260 case Intrinsic::loongarch_lsx_vbitclri_h:
4261 case Intrinsic::loongarch_lasx_xvbitclri_h:
4262 return lowerVectorBitClearImm<4>(N, DAG);
4263 case Intrinsic::loongarch_lsx_vbitclri_w:
4264 case Intrinsic::loongarch_lasx_xvbitclri_w:
4265 return lowerVectorBitClearImm<5>(N, DAG);
4266 case Intrinsic::loongarch_lsx_vbitclri_d:
4267 case Intrinsic::loongarch_lasx_xvbitclri_d:
4268 return lowerVectorBitClearImm<6>(N, DAG);
4269 case Intrinsic::loongarch_lsx_vbitset_b:
4270 case Intrinsic::loongarch_lsx_vbitset_h:
4271 case Intrinsic::loongarch_lsx_vbitset_w:
4272 case Intrinsic::loongarch_lsx_vbitset_d:
4273 case Intrinsic::loongarch_lasx_xvbitset_b:
4274 case Intrinsic::loongarch_lasx_xvbitset_h:
4275 case Intrinsic::loongarch_lasx_xvbitset_w:
4276 case Intrinsic::loongarch_lasx_xvbitset_d: {
4277 EVT VecTy = N->getValueType(0);
4278 SDValue One = DAG.getConstant(1, DL, VecTy);
4279 return DAG.getNode(
4280 ISD::OR, DL, VecTy, N->getOperand(1),
4281 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4282 }
4283 case Intrinsic::loongarch_lsx_vbitseti_b:
4284 case Intrinsic::loongarch_lasx_xvbitseti_b:
4285 return lowerVectorBitSetImm<3>(N, DAG);
4286 case Intrinsic::loongarch_lsx_vbitseti_h:
4287 case Intrinsic::loongarch_lasx_xvbitseti_h:
4288 return lowerVectorBitSetImm<4>(N, DAG);
4289 case Intrinsic::loongarch_lsx_vbitseti_w:
4290 case Intrinsic::loongarch_lasx_xvbitseti_w:
4291 return lowerVectorBitSetImm<5>(N, DAG);
4292 case Intrinsic::loongarch_lsx_vbitseti_d:
4293 case Intrinsic::loongarch_lasx_xvbitseti_d:
4294 return lowerVectorBitSetImm<6>(N, DAG);
4295 case Intrinsic::loongarch_lsx_vbitrev_b:
4296 case Intrinsic::loongarch_lsx_vbitrev_h:
4297 case Intrinsic::loongarch_lsx_vbitrev_w:
4298 case Intrinsic::loongarch_lsx_vbitrev_d:
4299 case Intrinsic::loongarch_lasx_xvbitrev_b:
4300 case Intrinsic::loongarch_lasx_xvbitrev_h:
4301 case Intrinsic::loongarch_lasx_xvbitrev_w:
4302 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4303 EVT VecTy = N->getValueType(0);
4304 SDValue One = DAG.getConstant(1, DL, VecTy);
4305 return DAG.getNode(
4306 ISD::XOR, DL, VecTy, N->getOperand(1),
4307 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4308 }
4309 case Intrinsic::loongarch_lsx_vbitrevi_b:
4310 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4311 return lowerVectorBitRevImm<3>(N, DAG);
4312 case Intrinsic::loongarch_lsx_vbitrevi_h:
4313 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4314 return lowerVectorBitRevImm<4>(N, DAG);
4315 case Intrinsic::loongarch_lsx_vbitrevi_w:
4316 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4317 return lowerVectorBitRevImm<5>(N, DAG);
4318 case Intrinsic::loongarch_lsx_vbitrevi_d:
4319 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4320 return lowerVectorBitRevImm<6>(N, DAG);
4321 case Intrinsic::loongarch_lsx_vfadd_s:
4322 case Intrinsic::loongarch_lsx_vfadd_d:
4323 case Intrinsic::loongarch_lasx_xvfadd_s:
4324 case Intrinsic::loongarch_lasx_xvfadd_d:
4325 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4326 N->getOperand(2));
4327 case Intrinsic::loongarch_lsx_vfsub_s:
4328 case Intrinsic::loongarch_lsx_vfsub_d:
4329 case Intrinsic::loongarch_lasx_xvfsub_s:
4330 case Intrinsic::loongarch_lasx_xvfsub_d:
4331 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4332 N->getOperand(2));
4333 case Intrinsic::loongarch_lsx_vfmul_s:
4334 case Intrinsic::loongarch_lsx_vfmul_d:
4335 case Intrinsic::loongarch_lasx_xvfmul_s:
4336 case Intrinsic::loongarch_lasx_xvfmul_d:
4337 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4338 N->getOperand(2));
4339 case Intrinsic::loongarch_lsx_vfdiv_s:
4340 case Intrinsic::loongarch_lsx_vfdiv_d:
4341 case Intrinsic::loongarch_lasx_xvfdiv_s:
4342 case Intrinsic::loongarch_lasx_xvfdiv_d:
4343 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4344 N->getOperand(2));
4345 case Intrinsic::loongarch_lsx_vfmadd_s:
4346 case Intrinsic::loongarch_lsx_vfmadd_d:
4347 case Intrinsic::loongarch_lasx_xvfmadd_s:
4348 case Intrinsic::loongarch_lasx_xvfmadd_d:
4349 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4350 N->getOperand(2), N->getOperand(3));
4351 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4352 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4353 N->getOperand(1), N->getOperand(2),
4354 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4355 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4356 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4357 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4358 N->getOperand(1), N->getOperand(2),
4359 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4360 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4361 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4362 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4363 N->getOperand(1), N->getOperand(2),
4364 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4365 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4366 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4367 N->getOperand(1), N->getOperand(2),
4368 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4369 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4370 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4371 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4372 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4373 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4374 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4375 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4376 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
4377 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
4378 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4379 N->getOperand(1)));
4380 case Intrinsic::loongarch_lsx_vreplve_b:
4381 case Intrinsic::loongarch_lsx_vreplve_h:
4382 case Intrinsic::loongarch_lsx_vreplve_w:
4383 case Intrinsic::loongarch_lsx_vreplve_d:
4384 case Intrinsic::loongarch_lasx_xvreplve_b:
4385 case Intrinsic::loongarch_lasx_xvreplve_h:
4386 case Intrinsic::loongarch_lasx_xvreplve_w:
4387 case Intrinsic::loongarch_lasx_xvreplve_d:
4388 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4389 N->getOperand(1),
4390 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4391 N->getOperand(2)));
4392 }
4393 return SDValue();
4394}
4395
4397 DAGCombinerInfo &DCI) const {
4398 SelectionDAG &DAG = DCI.DAG;
4399 switch (N->getOpcode()) {
4400 default:
4401 break;
4402 case ISD::AND:
4403 return performANDCombine(N, DAG, DCI, Subtarget);
4404 case ISD::OR:
4405 return performORCombine(N, DAG, DCI, Subtarget);
4406 case ISD::SETCC:
4407 return performSETCCCombine(N, DAG, DCI, Subtarget);
4408 case ISD::SRL:
4409 return performSRLCombine(N, DAG, DCI, Subtarget);
4411 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4413 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4414 }
4415 return SDValue();
4416}
4417
4420 if (!ZeroDivCheck)
4421 return MBB;
4422
4423 // Build instructions:
4424 // MBB:
4425 // div(or mod) $dst, $dividend, $divisor
4426 // bnez $divisor, SinkMBB
4427 // BreakMBB:
4428 // break 7 // BRK_DIVZERO
4429 // SinkMBB:
4430 // fallthrough
4431 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4433 MachineFunction *MF = MBB->getParent();
4434 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4435 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4436 MF->insert(It, BreakMBB);
4437 MF->insert(It, SinkMBB);
4438
4439 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4440 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4441 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4442
4443 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4444 DebugLoc DL = MI.getDebugLoc();
4445 MachineOperand &Divisor = MI.getOperand(2);
4446 Register DivisorReg = Divisor.getReg();
4447
4448 // MBB:
4449 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4450 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4451 .addMBB(SinkMBB);
4452 MBB->addSuccessor(BreakMBB);
4453 MBB->addSuccessor(SinkMBB);
4454
4455 // BreakMBB:
4456 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4457 // definition of BRK_DIVZERO.
4458 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4459 BreakMBB->addSuccessor(SinkMBB);
4460
4461 // Clear Divisor's kill flag.
4462 Divisor.setIsKill(false);
4463
4464 return SinkMBB;
4465}
4466
4467static MachineBasicBlock *
4469 const LoongArchSubtarget &Subtarget) {
4470 unsigned CondOpc;
4471 switch (MI.getOpcode()) {
4472 default:
4473 llvm_unreachable("Unexpected opcode");
4474 case LoongArch::PseudoVBZ:
4475 CondOpc = LoongArch::VSETEQZ_V;
4476 break;
4477 case LoongArch::PseudoVBZ_B:
4478 CondOpc = LoongArch::VSETANYEQZ_B;
4479 break;
4480 case LoongArch::PseudoVBZ_H:
4481 CondOpc = LoongArch::VSETANYEQZ_H;
4482 break;
4483 case LoongArch::PseudoVBZ_W:
4484 CondOpc = LoongArch::VSETANYEQZ_W;
4485 break;
4486 case LoongArch::PseudoVBZ_D:
4487 CondOpc = LoongArch::VSETANYEQZ_D;
4488 break;
4489 case LoongArch::PseudoVBNZ:
4490 CondOpc = LoongArch::VSETNEZ_V;
4491 break;
4492 case LoongArch::PseudoVBNZ_B:
4493 CondOpc = LoongArch::VSETALLNEZ_B;
4494 break;
4495 case LoongArch::PseudoVBNZ_H:
4496 CondOpc = LoongArch::VSETALLNEZ_H;
4497 break;
4498 case LoongArch::PseudoVBNZ_W:
4499 CondOpc = LoongArch::VSETALLNEZ_W;
4500 break;
4501 case LoongArch::PseudoVBNZ_D:
4502 CondOpc = LoongArch::VSETALLNEZ_D;
4503 break;
4504 case LoongArch::PseudoXVBZ:
4505 CondOpc = LoongArch::XVSETEQZ_V;
4506 break;
4507 case LoongArch::PseudoXVBZ_B:
4508 CondOpc = LoongArch::XVSETANYEQZ_B;
4509 break;
4510 case LoongArch::PseudoXVBZ_H:
4511 CondOpc = LoongArch::XVSETANYEQZ_H;
4512 break;
4513 case LoongArch::PseudoXVBZ_W:
4514 CondOpc = LoongArch::XVSETANYEQZ_W;
4515 break;
4516 case LoongArch::PseudoXVBZ_D:
4517 CondOpc = LoongArch::XVSETANYEQZ_D;
4518 break;
4519 case LoongArch::PseudoXVBNZ:
4520 CondOpc = LoongArch::XVSETNEZ_V;
4521 break;
4522 case LoongArch::PseudoXVBNZ_B:
4523 CondOpc = LoongArch::XVSETALLNEZ_B;
4524 break;
4525 case LoongArch::PseudoXVBNZ_H:
4526 CondOpc = LoongArch::XVSETALLNEZ_H;
4527 break;
4528 case LoongArch::PseudoXVBNZ_W:
4529 CondOpc = LoongArch::XVSETALLNEZ_W;
4530 break;
4531 case LoongArch::PseudoXVBNZ_D:
4532 CondOpc = LoongArch::XVSETALLNEZ_D;
4533 break;
4534 }
4535
4536 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4537 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4538 DebugLoc DL = MI.getDebugLoc();
4541
4542 MachineFunction *F = BB->getParent();
4543 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4544 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4545 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4546
4547 F->insert(It, FalseBB);
4548 F->insert(It, TrueBB);
4549 F->insert(It, SinkBB);
4550
4551 // Transfer the remainder of MBB and its successor edges to Sink.
4552 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4554
4555 // Insert the real instruction to BB.
4556 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4557 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4558
4559 // Insert branch.
4560 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4561 BB->addSuccessor(FalseBB);
4562 BB->addSuccessor(TrueBB);
4563
4564 // FalseBB.
4565 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4566 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4567 .addReg(LoongArch::R0)
4568 .addImm(0);
4569 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4570 FalseBB->addSuccessor(SinkBB);
4571
4572 // TrueBB.
4573 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4574 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4575 .addReg(LoongArch::R0)
4576 .addImm(1);
4577 TrueBB->addSuccessor(SinkBB);
4578
4579 // SinkBB: merge the results.
4580 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4581 MI.getOperand(0).getReg())
4582 .addReg(RD1)
4583 .addMBB(FalseBB)
4584 .addReg(RD2)
4585 .addMBB(TrueBB);
4586
4587 // The pseudo instruction is gone now.
4588 MI.eraseFromParent();
4589 return SinkBB;
4590}
4591
4592static MachineBasicBlock *
4594 const LoongArchSubtarget &Subtarget) {
4595 unsigned InsOp;
4596 unsigned HalfSize;
4597 switch (MI.getOpcode()) {
4598 default:
4599 llvm_unreachable("Unexpected opcode");
4600 case LoongArch::PseudoXVINSGR2VR_B:
4601 HalfSize = 16;
4602 InsOp = LoongArch::VINSGR2VR_B;
4603 break;
4604 case LoongArch::PseudoXVINSGR2VR_H:
4605 HalfSize = 8;
4606 InsOp = LoongArch::VINSGR2VR_H;
4607 break;
4608 }
4609 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4610 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4611 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4612 DebugLoc DL = MI.getDebugLoc();
4614 // XDst = vector_insert XSrc, Elt, Idx
4615 Register XDst = MI.getOperand(0).getReg();
4616 Register XSrc = MI.getOperand(1).getReg();
4617 Register Elt = MI.getOperand(2).getReg();
4618 unsigned Idx = MI.getOperand(3).getImm();
4619
4620 Register ScratchReg1 = XSrc;
4621 if (Idx >= HalfSize) {
4622 ScratchReg1 = MRI.createVirtualRegister(RC);
4623 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4624 .addReg(XSrc)
4625 .addReg(XSrc)
4626 .addImm(1);
4627 }
4628
4629 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4630 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4631 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4632 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4633 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4634 .addReg(ScratchSubReg1)
4635 .addReg(Elt)
4636 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4637
4638 Register ScratchReg2 = XDst;
4639 if (Idx >= HalfSize)
4640 ScratchReg2 = MRI.createVirtualRegister(RC);
4641
4642 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4643 .addImm(0)
4644 .addReg(ScratchSubReg2)
4645 .addImm(LoongArch::sub_128);
4646
4647 if (Idx >= HalfSize)
4648 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4649 .addReg(XSrc)
4650 .addReg(ScratchReg2)
4651 .addImm(2);
4652
4653 MI.eraseFromParent();
4654 return BB;
4655}
4656
4659 const LoongArchSubtarget &Subtarget) {
4660 assert(Subtarget.hasExtLSX());
4661 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4662 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
4663 DebugLoc DL = MI.getDebugLoc();
4665 Register Dst = MI.getOperand(0).getReg();
4666 Register Src = MI.getOperand(1).getReg();
4667 Register ScratchReg1 = MRI.createVirtualRegister(RC);
4668 Register ScratchReg2 = MRI.createVirtualRegister(RC);
4669 Register ScratchReg3 = MRI.createVirtualRegister(RC);
4670
4671 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
4672 BuildMI(*BB, MI, DL,
4673 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
4674 : LoongArch::VINSGR2VR_W),
4675 ScratchReg2)
4676 .addReg(ScratchReg1)
4677 .addReg(Src)
4678 .addImm(0);
4679 BuildMI(
4680 *BB, MI, DL,
4681 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
4682 ScratchReg3)
4683 .addReg(ScratchReg2);
4684 BuildMI(*BB, MI, DL,
4685 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
4686 : LoongArch::VPICKVE2GR_W),
4687 Dst)
4688 .addReg(ScratchReg3)
4689 .addImm(0);
4690
4691 MI.eraseFromParent();
4692 return BB;
4693}
4694
4695MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4696 MachineInstr &MI, MachineBasicBlock *BB) const {
4697 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4698 DebugLoc DL = MI.getDebugLoc();
4699
4700 switch (MI.getOpcode()) {
4701 default:
4702 llvm_unreachable("Unexpected instr type to insert");
4703 case LoongArch::DIV_W:
4704 case LoongArch::DIV_WU:
4705 case LoongArch::MOD_W:
4706 case LoongArch::MOD_WU:
4707 case LoongArch::DIV_D:
4708 case LoongArch::DIV_DU:
4709 case LoongArch::MOD_D:
4710 case LoongArch::MOD_DU:
4711 return insertDivByZeroTrap(MI, BB);
4712 break;
4713 case LoongArch::WRFCSR: {
4714 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4715 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4716 .addReg(MI.getOperand(1).getReg());
4717 MI.eraseFromParent();
4718 return BB;
4719 }
4720 case LoongArch::RDFCSR: {
4721 MachineInstr *ReadFCSR =
4722 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4723 MI.getOperand(0).getReg())
4724 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4725 ReadFCSR->getOperand(1).setIsUndef();
4726 MI.eraseFromParent();
4727 return BB;
4728 }
4729 case LoongArch::PseudoVBZ:
4730 case LoongArch::PseudoVBZ_B:
4731 case LoongArch::PseudoVBZ_H:
4732 case LoongArch::PseudoVBZ_W:
4733 case LoongArch::PseudoVBZ_D:
4734 case LoongArch::PseudoVBNZ:
4735 case LoongArch::PseudoVBNZ_B:
4736 case LoongArch::PseudoVBNZ_H:
4737 case LoongArch::PseudoVBNZ_W:
4738 case LoongArch::PseudoVBNZ_D:
4739 case LoongArch::PseudoXVBZ:
4740 case LoongArch::PseudoXVBZ_B:
4741 case LoongArch::PseudoXVBZ_H:
4742 case LoongArch::PseudoXVBZ_W:
4743 case LoongArch::PseudoXVBZ_D:
4744 case LoongArch::PseudoXVBNZ:
4745 case LoongArch::PseudoXVBNZ_B:
4746 case LoongArch::PseudoXVBNZ_H:
4747 case LoongArch::PseudoXVBNZ_W:
4748 case LoongArch::PseudoXVBNZ_D:
4749 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4750 case LoongArch::PseudoXVINSGR2VR_B:
4751 case LoongArch::PseudoXVINSGR2VR_H:
4752 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4753 case LoongArch::PseudoCTPOP:
4754 return emitPseudoCTPOP(MI, BB, Subtarget);
4755 case TargetOpcode::STATEPOINT:
4756 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4757 // while bl call instruction (where statepoint will be lowered at the
4758 // end) has implicit def. This def is early-clobber as it will be set at
4759 // the moment of the call and earlier than any use is read.
4760 // Add this implicit dead def here as a workaround.
4761 MI.addOperand(*MI.getMF(),
4763 LoongArch::R1, /*isDef*/ true,
4764 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4765 /*isUndef*/ false, /*isEarlyClobber*/ true));
4766 if (!Subtarget.is64Bit())
4767 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4768 return emitPatchPoint(MI, BB);
4769 }
4770}
4771
4773 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4774 unsigned *Fast) const {
4775 if (!Subtarget.hasUAL())
4776 return false;
4777
4778 // TODO: set reasonable speed number.
4779 if (Fast)
4780 *Fast = 1;
4781 return true;
4782}
4783
4784const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4785 switch ((LoongArchISD::NodeType)Opcode) {
4787 break;
4788
4789#define NODE_NAME_CASE(node) \
4790 case LoongArchISD::node: \
4791 return "LoongArchISD::" #node;
4792
4793 // TODO: Add more target-dependent nodes later.
4794 NODE_NAME_CASE(CALL)
4795 NODE_NAME_CASE(CALL_MEDIUM)
4796 NODE_NAME_CASE(CALL_LARGE)
4797 NODE_NAME_CASE(RET)
4798 NODE_NAME_CASE(TAIL)
4799 NODE_NAME_CASE(TAIL_MEDIUM)
4800 NODE_NAME_CASE(TAIL_LARGE)
4801 NODE_NAME_CASE(SLL_W)
4802 NODE_NAME_CASE(SRA_W)
4803 NODE_NAME_CASE(SRL_W)
4804 NODE_NAME_CASE(BSTRINS)
4805 NODE_NAME_CASE(BSTRPICK)
4806 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4807 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4808 NODE_NAME_CASE(FTINT)
4809 NODE_NAME_CASE(REVB_2H)
4810 NODE_NAME_CASE(REVB_2W)
4811 NODE_NAME_CASE(BITREV_4B)
4812 NODE_NAME_CASE(BITREV_8B)
4813 NODE_NAME_CASE(BITREV_W)
4814 NODE_NAME_CASE(ROTR_W)
4815 NODE_NAME_CASE(ROTL_W)
4816 NODE_NAME_CASE(DIV_W)
4817 NODE_NAME_CASE(DIV_WU)
4818 NODE_NAME_CASE(MOD_W)
4819 NODE_NAME_CASE(MOD_WU)
4820 NODE_NAME_CASE(CLZ_W)
4821 NODE_NAME_CASE(CTZ_W)
4822 NODE_NAME_CASE(DBAR)
4823 NODE_NAME_CASE(IBAR)
4824 NODE_NAME_CASE(BREAK)
4825 NODE_NAME_CASE(SYSCALL)
4826 NODE_NAME_CASE(CRC_W_B_W)
4827 NODE_NAME_CASE(CRC_W_H_W)
4828 NODE_NAME_CASE(CRC_W_W_W)
4829 NODE_NAME_CASE(CRC_W_D_W)
4830 NODE_NAME_CASE(CRCC_W_B_W)
4831 NODE_NAME_CASE(CRCC_W_H_W)
4832 NODE_NAME_CASE(CRCC_W_W_W)
4833 NODE_NAME_CASE(CRCC_W_D_W)
4834 NODE_NAME_CASE(CSRRD)
4835 NODE_NAME_CASE(CSRWR)
4836 NODE_NAME_CASE(CSRXCHG)
4837 NODE_NAME_CASE(IOCSRRD_B)
4838 NODE_NAME_CASE(IOCSRRD_H)
4839 NODE_NAME_CASE(IOCSRRD_W)
4840 NODE_NAME_CASE(IOCSRRD_D)
4841 NODE_NAME_CASE(IOCSRWR_B)
4842 NODE_NAME_CASE(IOCSRWR_H)
4843 NODE_NAME_CASE(IOCSRWR_W)
4844 NODE_NAME_CASE(IOCSRWR_D)
4845 NODE_NAME_CASE(CPUCFG)
4846 NODE_NAME_CASE(MOVGR2FCSR)
4847 NODE_NAME_CASE(MOVFCSR2GR)
4848 NODE_NAME_CASE(CACOP_D)
4849 NODE_NAME_CASE(CACOP_W)
4850 NODE_NAME_CASE(VSHUF)
4851 NODE_NAME_CASE(VPICKEV)
4852 NODE_NAME_CASE(VPICKOD)
4853 NODE_NAME_CASE(VPACKEV)
4854 NODE_NAME_CASE(VPACKOD)
4855 NODE_NAME_CASE(VILVL)
4856 NODE_NAME_CASE(VILVH)
4857 NODE_NAME_CASE(VSHUF4I)
4858 NODE_NAME_CASE(VREPLVEI)
4859 NODE_NAME_CASE(VREPLGR2VR)
4860 NODE_NAME_CASE(XVPERMI)
4861 NODE_NAME_CASE(VPICK_SEXT_ELT)
4862 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4863 NODE_NAME_CASE(VREPLVE)
4864 NODE_NAME_CASE(VALL_ZERO)
4865 NODE_NAME_CASE(VANY_ZERO)
4866 NODE_NAME_CASE(VALL_NONZERO)
4867 NODE_NAME_CASE(VANY_NONZERO)
4868 NODE_NAME_CASE(FRECIPE)
4869 NODE_NAME_CASE(FRSQRTE)
4870 }
4871#undef NODE_NAME_CASE
4872 return nullptr;
4873}
4874
4875//===----------------------------------------------------------------------===//
4876// Calling Convention Implementation
4877//===----------------------------------------------------------------------===//
4878
4879// Eight general-purpose registers a0-a7 used for passing integer arguments,
4880// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4881// fixed-point arguments, and floating-point arguments when no FPR is available
4882// or with soft float ABI.
4883const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4884 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4885 LoongArch::R10, LoongArch::R11};
4886// Eight floating-point registers fa0-fa7 used for passing floating-point
4887// arguments, and fa0-fa1 are also used to return values.
4888const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4889 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4890 LoongArch::F6, LoongArch::F7};
4891// FPR32 and FPR64 alias each other.
4893 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4894 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4895
4896const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4897 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4898 LoongArch::VR6, LoongArch::VR7};
4899
4900const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4901 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4902 LoongArch::XR6, LoongArch::XR7};
4903
4904// Pass a 2*GRLen argument that has been split into two GRLen values through
4905// registers or the stack as necessary.
4906static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4907 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4908 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4909 ISD::ArgFlagsTy ArgFlags2) {
4910 unsigned GRLenInBytes = GRLen / 8;
4911 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4912 // At least one half can be passed via register.
4913 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4914 VA1.getLocVT(), CCValAssign::Full));
4915 } else {
4916 // Both halves must be passed on the stack, with proper alignment.
4917 Align StackAlign =
4918 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4919 State.addLoc(
4921 State.AllocateStack(GRLenInBytes, StackAlign),
4922 VA1.getLocVT(), CCValAssign::Full));
4924 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4925 LocVT2, CCValAssign::Full));
4926 return false;
4927 }
4928 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4929 // The second half can also be passed via register.
4930 State.addLoc(
4931 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4932 } else {
4933 // The second half is passed via the stack, without additional alignment.
4935 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4936 LocVT2, CCValAssign::Full));
4937 }
4938 return false;
4939}
4940
4941// Implements the LoongArch calling convention. Returns true upon failure.
4943 unsigned ValNo, MVT ValVT,
4944 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4945 CCState &State, bool IsFixed, bool IsRet,
4946 Type *OrigTy) {
4947 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4948 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4949 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4950 MVT LocVT = ValVT;
4951
4952 // Any return value split into more than two values can't be returned
4953 // directly.
4954 if (IsRet && ValNo > 1)
4955 return true;
4956
4957 // If passing a variadic argument, or if no FPR is available.
4958 bool UseGPRForFloat = true;
4959
4960 switch (ABI) {
4961 default:
4962 llvm_unreachable("Unexpected ABI");
4963 break;
4968 UseGPRForFloat = !IsFixed;
4969 break;
4972 break;
4973 }
4974
4975 // FPR32 and FPR64 alias each other.
4976 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4977 UseGPRForFloat = true;
4978
4979 if (UseGPRForFloat && ValVT == MVT::f32) {
4980 LocVT = GRLenVT;
4981 LocInfo = CCValAssign::BCvt;
4982 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4983 LocVT = MVT::i64;
4984 LocInfo = CCValAssign::BCvt;
4985 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4986 // TODO: Handle passing f64 on LA32 with D feature.
4987 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4988 }
4989
4990 // If this is a variadic argument, the LoongArch calling convention requires
4991 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4992 // byte alignment. An aligned register should be used regardless of whether
4993 // the original argument was split during legalisation or not. The argument
4994 // will not be passed by registers if the original type is larger than
4995 // 2*GRLen, so the register alignment rule does not apply.
4996 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4997 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4998 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4999 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
5000 // Skip 'odd' register if necessary.
5001 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
5002 State.AllocateReg(ArgGPRs);
5003 }
5004
5005 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
5006 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
5007 State.getPendingArgFlags();
5008
5009 assert(PendingLocs.size() == PendingArgFlags.size() &&
5010 "PendingLocs and PendingArgFlags out of sync");
5011
5012 // Split arguments might be passed indirectly, so keep track of the pending
5013 // values.
5014 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
5015 LocVT = GRLenVT;
5016 LocInfo = CCValAssign::Indirect;
5017 PendingLocs.push_back(
5018 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
5019 PendingArgFlags.push_back(ArgFlags);
5020 if (!ArgFlags.isSplitEnd()) {
5021 return false;
5022 }
5023 }
5024
5025 // If the split argument only had two elements, it should be passed directly
5026 // in registers or on the stack.
5027 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
5028 PendingLocs.size() <= 2) {
5029 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
5030 // Apply the normal calling convention rules to the first half of the
5031 // split argument.
5032 CCValAssign VA = PendingLocs[0];
5033 ISD::ArgFlagsTy AF = PendingArgFlags[0];
5034 PendingLocs.clear();
5035 PendingArgFlags.clear();
5036 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
5037 ArgFlags);
5038 }
5039
5040 // Allocate to a register if possible, or else a stack slot.
5041 Register Reg;
5042 unsigned StoreSizeBytes = GRLen / 8;
5043 Align StackAlign = Align(GRLen / 8);
5044
5045 if (ValVT == MVT::f32 && !UseGPRForFloat)
5046 Reg = State.AllocateReg(ArgFPR32s);
5047 else if (ValVT == MVT::f64 && !UseGPRForFloat)
5048 Reg = State.AllocateReg(ArgFPR64s);
5049 else if (ValVT.is128BitVector())
5050 Reg = State.AllocateReg(ArgVRs);
5051 else if (ValVT.is256BitVector())
5052 Reg = State.AllocateReg(ArgXRs);
5053 else
5054 Reg = State.AllocateReg(ArgGPRs);
5055
5056 unsigned StackOffset =
5057 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
5058
5059 // If we reach this point and PendingLocs is non-empty, we must be at the
5060 // end of a split argument that must be passed indirectly.
5061 if (!PendingLocs.empty()) {
5062 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
5063 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
5064 for (auto &It : PendingLocs) {
5065 if (Reg)
5066 It.convertToReg(Reg);
5067 else
5068 It.convertToMem(StackOffset);
5069 State.addLoc(It);
5070 }
5071 PendingLocs.clear();
5072 PendingArgFlags.clear();
5073 return false;
5074 }
5075 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
5076 "Expected an GRLenVT at this stage");
5077
5078 if (Reg) {
5079 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5080 return false;
5081 }
5082
5083 // When a floating-point value is passed on the stack, no bit-cast is needed.
5084 if (ValVT.isFloatingPoint()) {
5085 LocVT = ValVT;
5086 LocInfo = CCValAssign::Full;
5087 }
5088
5089 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5090 return false;
5091}
5092
5093void LoongArchTargetLowering::analyzeInputArgs(
5094 MachineFunction &MF, CCState &CCInfo,
5095 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
5096 LoongArchCCAssignFn Fn) const {
5098 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5099 MVT ArgVT = Ins[i].VT;
5100 Type *ArgTy = nullptr;
5101 if (IsRet)
5102 ArgTy = FType->getReturnType();
5103 else if (Ins[i].isOrigArg())
5104 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5107 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
5108 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
5109 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
5110 << '\n');
5111 llvm_unreachable("");
5112 }
5113 }
5114}
5115
5116void LoongArchTargetLowering::analyzeOutputArgs(
5117 MachineFunction &MF, CCState &CCInfo,
5118 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5119 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
5120 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5121 MVT ArgVT = Outs[i].VT;
5122 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5125 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
5126 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
5127 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
5128 << "\n");
5129 llvm_unreachable("");
5130 }
5131 }
5132}
5133
5134// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5135// values.
5137 const CCValAssign &VA, const SDLoc &DL) {
5138 switch (VA.getLocInfo()) {
5139 default:
5140 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5141 case CCValAssign::Full:
5143 break;
5144 case CCValAssign::BCvt:
5145 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5146 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
5147 else
5148 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5149 break;
5150 }
5151 return Val;
5152}
5153
5155 const CCValAssign &VA, const SDLoc &DL,
5156 const ISD::InputArg &In,
5157 const LoongArchTargetLowering &TLI) {
5160 EVT LocVT = VA.getLocVT();
5161 SDValue Val;
5162 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5163 Register VReg = RegInfo.createVirtualRegister(RC);
5164 RegInfo.addLiveIn(VA.getLocReg(), VReg);
5165 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5166
5167 // If input is sign extended from 32 bits, note it for the OptW pass.
5168 if (In.isOrigArg()) {
5169 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
5170 if (OrigArg->getType()->isIntegerTy()) {
5171 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
5172 // An input zero extended from i31 can also be considered sign extended.
5173 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
5174 (BitWidth < 32 && In.Flags.isZExt())) {
5177 LAFI->addSExt32Register(VReg);
5178 }
5179 }
5180 }
5181
5182 return convertLocVTToValVT(DAG, Val, VA, DL);
5183}
5184
5185// The caller is responsible for loading the full value if the argument is
5186// passed with CCValAssign::Indirect.
5188 const CCValAssign &VA, const SDLoc &DL) {
5190 MachineFrameInfo &MFI = MF.getFrameInfo();
5191 EVT ValVT = VA.getValVT();
5192 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
5193 /*IsImmutable=*/true);
5194 SDValue FIN = DAG.getFrameIndex(
5196
5197 ISD::LoadExtType ExtType;
5198 switch (VA.getLocInfo()) {
5199 default:
5200 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5201 case CCValAssign::Full:
5203 case CCValAssign::BCvt:
5204 ExtType = ISD::NON_EXTLOAD;
5205 break;
5206 }
5207 return DAG.getExtLoad(
5208 ExtType, DL, VA.getLocVT(), Chain, FIN,
5210}
5211
5213 const CCValAssign &VA, const SDLoc &DL) {
5214 EVT LocVT = VA.getLocVT();
5215
5216 switch (VA.getLocInfo()) {
5217 default:
5218 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5219 case CCValAssign::Full:
5220 break;
5221 case CCValAssign::BCvt:
5222 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5223 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
5224 else
5225 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5226 break;
5227 }
5228 return Val;
5229}
5230
5231static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5232 CCValAssign::LocInfo LocInfo,
5233 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5234 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5235 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5236 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5237 static const MCPhysReg GPRList[] = {
5238 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5239 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5240 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5241 if (MCRegister Reg = State.AllocateReg(GPRList)) {
5242 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5243 return false;
5244 }
5245 }
5246
5247 if (LocVT == MVT::f32) {
5248 // Pass in STG registers: F1, F2, F3, F4
5249 // fs0,fs1,fs2,fs3
5250 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5251 LoongArch::F26, LoongArch::F27};
5252 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
5253 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5254 return false;
5255 }
5256 }
5257
5258 if (LocVT == MVT::f64) {
5259 // Pass in STG registers: D1, D2, D3, D4
5260 // fs4,fs5,fs6,fs7
5261 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5262 LoongArch::F30_64, LoongArch::F31_64};
5263 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
5264 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5265 return false;
5266 }
5267 }
5268
5269 report_fatal_error("No registers left in GHC calling convention");
5270 return true;
5271}
5272
5273// Transform physical registers into virtual registers.
5275 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5276 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5277 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5278
5280
5281 switch (CallConv) {
5282 default:
5283 llvm_unreachable("Unsupported calling convention");
5284 case CallingConv::C:
5285 case CallingConv::Fast:
5286 break;
5287 case CallingConv::GHC:
5288 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5289 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5291 "GHC calling convention requires the F and D extensions");
5292 }
5293
5294 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5295 MVT GRLenVT = Subtarget.getGRLenVT();
5296 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5297 // Used with varargs to acumulate store chains.
5298 std::vector<SDValue> OutChains;
5299
5300 // Assign locations to all of the incoming arguments.
5302 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5303
5304 if (CallConv == CallingConv::GHC)
5306 else
5307 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5308
5309 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5310 CCValAssign &VA = ArgLocs[i];
5311 SDValue ArgValue;
5312 if (VA.isRegLoc())
5313 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5314 else
5315 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5316 if (VA.getLocInfo() == CCValAssign::Indirect) {
5317 // If the original argument was split and passed by reference, we need to
5318 // load all parts of it here (using the same address).
5319 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5321 unsigned ArgIndex = Ins[i].OrigArgIndex;
5322 unsigned ArgPartOffset = Ins[i].PartOffset;
5323 assert(ArgPartOffset == 0);
5324 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5325 CCValAssign &PartVA = ArgLocs[i + 1];
5326 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5327 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5328 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5329 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5331 ++i;
5332 }
5333 continue;
5334 }
5335 InVals.push_back(ArgValue);
5336 }
5337
5338 if (IsVarArg) {
5340 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5341 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5342 MachineFrameInfo &MFI = MF.getFrameInfo();
5343 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5344 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5345
5346 // Offset of the first variable argument from stack pointer, and size of
5347 // the vararg save area. For now, the varargs save area is either zero or
5348 // large enough to hold a0-a7.
5349 int VaArgOffset, VarArgsSaveSize;
5350
5351 // If all registers are allocated, then all varargs must be passed on the
5352 // stack and we don't need to save any argregs.
5353 if (ArgRegs.size() == Idx) {
5354 VaArgOffset = CCInfo.getStackSize();
5355 VarArgsSaveSize = 0;
5356 } else {
5357 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5358 VaArgOffset = -VarArgsSaveSize;
5359 }
5360
5361 // Record the frame index of the first variable argument
5362 // which is a value necessary to VASTART.
5363 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5364 LoongArchFI->setVarArgsFrameIndex(FI);
5365
5366 // If saving an odd number of registers then create an extra stack slot to
5367 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5368 // offsets to even-numbered registered remain 2*GRLen-aligned.
5369 if (Idx % 2) {
5370 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5371 true);
5372 VarArgsSaveSize += GRLenInBytes;
5373 }
5374
5375 // Copy the integer registers that may have been used for passing varargs
5376 // to the vararg save area.
5377 for (unsigned I = Idx; I < ArgRegs.size();
5378 ++I, VaArgOffset += GRLenInBytes) {
5379 const Register Reg = RegInfo.createVirtualRegister(RC);
5380 RegInfo.addLiveIn(ArgRegs[I], Reg);
5381 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5382 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5383 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5384 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5386 cast<StoreSDNode>(Store.getNode())
5387 ->getMemOperand()
5388 ->setValue((Value *)nullptr);
5389 OutChains.push_back(Store);
5390 }
5391 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5392 }
5393
5394 // All stores are grouped in one node to allow the matching between
5395 // the size of Ins and InVals. This only happens for vararg functions.
5396 if (!OutChains.empty()) {
5397 OutChains.push_back(Chain);
5398 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5399 }
5400
5401 return Chain;
5402}
5403
5405 return CI->isTailCall();
5406}
5407
5408// Check if the return value is used as only a return value, as otherwise
5409// we can't perform a tail-call.
5411 SDValue &Chain) const {
5412 if (N->getNumValues() != 1)
5413 return false;
5414 if (!N->hasNUsesOfValue(1, 0))
5415 return false;
5416
5417 SDNode *Copy = *N->user_begin();
5418 if (Copy->getOpcode() != ISD::CopyToReg)
5419 return false;
5420
5421 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5422 // isn't safe to perform a tail call.
5423 if (Copy->getGluedNode())
5424 return false;
5425
5426 // The copy must be used by a LoongArchISD::RET, and nothing else.
5427 bool HasRet = false;
5428 for (SDNode *Node : Copy->users()) {
5429 if (Node->getOpcode() != LoongArchISD::RET)
5430 return false;
5431 HasRet = true;
5432 }
5433
5434 if (!HasRet)
5435 return false;
5436
5437 Chain = Copy->getOperand(0);
5438 return true;
5439}
5440
5441// Check whether the call is eligible for tail call optimization.
5442bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5443 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5444 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5445
5446 auto CalleeCC = CLI.CallConv;
5447 auto &Outs = CLI.Outs;
5448 auto &Caller = MF.getFunction();
5449 auto CallerCC = Caller.getCallingConv();
5450
5451 // Do not tail call opt if the stack is used to pass parameters.
5452 if (CCInfo.getStackSize() != 0)
5453 return false;
5454
5455 // Do not tail call opt if any parameters need to be passed indirectly.
5456 for (auto &VA : ArgLocs)
5457 if (VA.getLocInfo() == CCValAssign::Indirect)
5458 return false;
5459
5460 // Do not tail call opt if either caller or callee uses struct return
5461 // semantics.
5462 auto IsCallerStructRet = Caller.hasStructRetAttr();
5463 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5464 if (IsCallerStructRet || IsCalleeStructRet)
5465 return false;
5466
5467 // Do not tail call opt if either the callee or caller has a byval argument.
5468 for (auto &Arg : Outs)
5469 if (Arg.Flags.isByVal())
5470 return false;
5471
5472 // The callee has to preserve all registers the caller needs to preserve.
5473 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5474 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5475 if (CalleeCC != CallerCC) {
5476 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5477 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5478 return false;
5479 }
5480 return true;
5481}
5482
5484 return DAG.getDataLayout().getPrefTypeAlign(
5485 VT.getTypeForEVT(*DAG.getContext()));
5486}
5487
5488// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5489// and output parameter nodes.
5490SDValue
5492 SmallVectorImpl<SDValue> &InVals) const {
5493 SelectionDAG &DAG = CLI.DAG;
5494 SDLoc &DL = CLI.DL;
5496 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5498 SDValue Chain = CLI.Chain;
5499 SDValue Callee = CLI.Callee;
5500 CallingConv::ID CallConv = CLI.CallConv;
5501 bool IsVarArg = CLI.IsVarArg;
5502 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5503 MVT GRLenVT = Subtarget.getGRLenVT();
5504 bool &IsTailCall = CLI.IsTailCall;
5505
5507
5508 // Analyze the operands of the call, assigning locations to each operand.
5510 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5511
5512 if (CallConv == CallingConv::GHC)
5513 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5514 else
5515 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5516
5517 // Check if it's really possible to do a tail call.
5518 if (IsTailCall)
5519 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5520
5521 if (IsTailCall)
5522 ++NumTailCalls;
5523 else if (CLI.CB && CLI.CB->isMustTailCall())
5524 report_fatal_error("failed to perform tail call elimination on a call "
5525 "site marked musttail");
5526
5527 // Get a count of how many bytes are to be pushed on the stack.
5528 unsigned NumBytes = ArgCCInfo.getStackSize();
5529
5530 // Create local copies for byval args.
5531 SmallVector<SDValue> ByValArgs;
5532 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5533 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5534 if (!Flags.isByVal())
5535 continue;
5536
5537 SDValue Arg = OutVals[i];
5538 unsigned Size = Flags.getByValSize();
5539 Align Alignment = Flags.getNonZeroByValAlign();
5540
5541 int FI =
5542 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5543 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5544 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5545
5546 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5547 /*IsVolatile=*/false,
5548 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5550 ByValArgs.push_back(FIPtr);
5551 }
5552
5553 if (!IsTailCall)
5554 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5555
5556 // Copy argument values to their designated locations.
5558 SmallVector<SDValue> MemOpChains;
5559 SDValue StackPtr;
5560 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5561 CCValAssign &VA = ArgLocs[i];
5562 SDValue ArgValue = OutVals[i];
5563 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5564
5565 // Promote the value if needed.
5566 // For now, only handle fully promoted and indirect arguments.
5567 if (VA.getLocInfo() == CCValAssign::Indirect) {
5568 // Store the argument in a stack slot and pass its address.
5569 Align StackAlign =
5570 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5571 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5572 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5573 // If the original argument was split and passed by reference, we need to
5574 // store the required parts of it here (and pass just one address).
5575 unsigned ArgIndex = Outs[i].OrigArgIndex;
5576 unsigned ArgPartOffset = Outs[i].PartOffset;
5577 assert(ArgPartOffset == 0);
5578 // Calculate the total size to store. We don't have access to what we're
5579 // actually storing other than performing the loop and collecting the
5580 // info.
5582 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5583 SDValue PartValue = OutVals[i + 1];
5584 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5585 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5586 EVT PartVT = PartValue.getValueType();
5587
5588 StoredSize += PartVT.getStoreSize();
5589 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5590 Parts.push_back(std::make_pair(PartValue, Offset));
5591 ++i;
5592 }
5593 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5594 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5595 MemOpChains.push_back(
5596 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5598 for (const auto &Part : Parts) {
5599 SDValue PartValue = Part.first;
5600 SDValue PartOffset = Part.second;
5602 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5603 MemOpChains.push_back(
5604 DAG.getStore(Chain, DL, PartValue, Address,
5606 }
5607 ArgValue = SpillSlot;
5608 } else {
5609 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5610 }
5611
5612 // Use local copy if it is a byval arg.
5613 if (Flags.isByVal())
5614 ArgValue = ByValArgs[j++];
5615
5616 if (VA.isRegLoc()) {
5617 // Queue up the argument copies and emit them at the end.
5618 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5619 } else {
5620 assert(VA.isMemLoc() && "Argument not register or memory");
5621 assert(!IsTailCall && "Tail call not allowed if stack is used "
5622 "for passing parameters");
5623
5624 // Work out the address of the stack slot.
5625 if (!StackPtr.getNode())
5626 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5628 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5630
5631 // Emit the store.
5632 MemOpChains.push_back(
5633 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5634 }
5635 }
5636
5637 // Join the stores, which are independent of one another.
5638 if (!MemOpChains.empty())
5639 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5640
5641 SDValue Glue;
5642
5643 // Build a sequence of copy-to-reg nodes, chained and glued together.
5644 for (auto &Reg : RegsToPass) {
5645 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5646 Glue = Chain.getValue(1);
5647 }
5648
5649 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5650 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5651 // split it and then direct call can be matched by PseudoCALL.
5652 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5653 const GlobalValue *GV = S->getGlobal();
5654 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5657 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5658 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5659 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5662 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5663 }
5664
5665 // The first call operand is the chain and the second is the target address.
5667 Ops.push_back(Chain);
5668 Ops.push_back(Callee);
5669
5670 // Add argument registers to the end of the list so that they are
5671 // known live into the call.
5672 for (auto &Reg : RegsToPass)
5673 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5674
5675 if (!IsTailCall) {
5676 // Add a register mask operand representing the call-preserved registers.
5677 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5678 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5679 assert(Mask && "Missing call preserved mask for calling convention");
5680 Ops.push_back(DAG.getRegisterMask(Mask));
5681 }
5682
5683 // Glue the call to the argument copies, if any.
5684 if (Glue.getNode())
5685 Ops.push_back(Glue);
5686
5687 // Emit the call.
5688 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5689 unsigned Op;
5690 switch (DAG.getTarget().getCodeModel()) {
5691 default:
5692 report_fatal_error("Unsupported code model");
5693 case CodeModel::Small:
5694 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5695 break;
5696 case CodeModel::Medium:
5697 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5699 break;
5700 case CodeModel::Large:
5701 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5703 break;
5704 }
5705
5706 if (IsTailCall) {
5708 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5709 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5710 return Ret;
5711 }
5712
5713 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5714 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5715 Glue = Chain.getValue(1);
5716
5717 // Mark the end of the call, which is glued to the call itself.
5718 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5719 Glue = Chain.getValue(1);
5720
5721 // Assign locations to each value returned by this call.
5723 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5724 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5725
5726 // Copy all of the result registers out of their specified physreg.
5727 for (auto &VA : RVLocs) {
5728 // Copy the value out.
5729 SDValue RetValue =
5730 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5731 // Glue the RetValue to the end of the call sequence.
5732 Chain = RetValue.getValue(1);
5733 Glue = RetValue.getValue(2);
5734
5735 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5736
5737 InVals.push_back(RetValue);
5738 }
5739
5740 return Chain;
5741}
5742
5744 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5745 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
5746 const Type *RetTy) const {
5748 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5749
5750 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5751 LoongArchABI::ABI ABI =
5752 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5753 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5754 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5755 nullptr))
5756 return false;
5757 }
5758 return true;
5759}
5760
5762 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5764 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5765 SelectionDAG &DAG) const {
5766 // Stores the assignment of the return value to a location.
5768
5769 // Info about the registers and stack slot.
5770 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5771 *DAG.getContext());
5772
5773 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5774 nullptr, CC_LoongArch);
5775 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5776 report_fatal_error("GHC functions return void only");
5777 SDValue Glue;
5778 SmallVector<SDValue, 4> RetOps(1, Chain);
5779
5780 // Copy the result values into the output registers.
5781 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5782 CCValAssign &VA = RVLocs[i];
5783 assert(VA.isRegLoc() && "Can only return in registers!");
5784
5785 // Handle a 'normal' return.
5786 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5787 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5788
5789 // Guarantee that all emitted copies are stuck together.
5790 Glue = Chain.getValue(1);
5791 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5792 }
5793
5794 RetOps[0] = Chain; // Update chain.
5795
5796 // Add the glue node if we have it.
5797 if (Glue.getNode())
5798 RetOps.push_back(Glue);
5799
5800 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5801}
5802
5804 EVT VT) const {
5805 if (!Subtarget.hasExtLSX())
5806 return false;
5807
5808 if (VT == MVT::f32) {
5809 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5810 return (masked == 0x3e000000 || masked == 0x40000000);
5811 }
5812
5813 if (VT == MVT::f64) {
5814 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5815 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5816 }
5817
5818 return false;
5819}
5820
5821bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5822 bool ForCodeSize) const {
5823 // TODO: Maybe need more checks here after vector extension is supported.
5824 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5825 return false;
5826 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5827 return false;
5828 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5829}
5830
5832 return true;
5833}
5834
5836 return true;
5837}
5838
5839bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5840 const Instruction *I) const {
5841 if (!Subtarget.is64Bit())
5842 return isa<LoadInst>(I) || isa<StoreInst>(I);
5843
5844 if (isa<LoadInst>(I))
5845 return true;
5846
5847 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5848 // require fences beacuse we can use amswap_db.[w/d].
5849 Type *Ty = I->getOperand(0)->getType();
5850 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5851 unsigned Size = Ty->getIntegerBitWidth();
5852 return (Size == 8 || Size == 16);
5853 }
5854
5855 return false;
5856}
5857
5859 LLVMContext &Context,
5860 EVT VT) const {
5861 if (!VT.isVector())
5862 return getPointerTy(DL);
5864}
5865
5867 // TODO: Support vectors.
5868 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5869}
5870
5872 const CallInst &I,
5873 MachineFunction &MF,
5874 unsigned Intrinsic) const {
5875 switch (Intrinsic) {
5876 default:
5877 return false;
5878 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5879 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5880 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5881 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5883 Info.memVT = MVT::i32;
5884 Info.ptrVal = I.getArgOperand(0);
5885 Info.offset = 0;
5886 Info.align = Align(4);
5889 return true;
5890 // TODO: Add more Intrinsics later.
5891 }
5892}
5893
5894// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5895// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5896// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5897// regression, we need to implement it manually.
5900
5902 Op == AtomicRMWInst::And) &&
5903 "Unable to expand");
5904 unsigned MinWordSize = 4;
5905
5906 IRBuilder<> Builder(AI);
5907 LLVMContext &Ctx = Builder.getContext();
5908 const DataLayout &DL = AI->getDataLayout();
5909 Type *ValueType = AI->getType();
5910 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
5911
5912 Value *Addr = AI->getPointerOperand();
5913 PointerType *PtrTy = cast<PointerType>(Addr->getType());
5914 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
5915
5916 Value *AlignedAddr = Builder.CreateIntrinsic(
5917 Intrinsic::ptrmask, {PtrTy, IntTy},
5918 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
5919 "AlignedAddr");
5920
5921 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
5922 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
5923 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
5924 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
5925 Value *Mask = Builder.CreateShl(
5926 ConstantInt::get(WordType,
5927 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
5928 ShiftAmt, "Mask");
5929 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
5930 Value *ValOperand_Shifted =
5931 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
5932 ShiftAmt, "ValOperand_Shifted");
5933 Value *NewOperand;
5934 if (Op == AtomicRMWInst::And)
5935 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
5936 else
5937 NewOperand = ValOperand_Shifted;
5938
5939 AtomicRMWInst *NewAI =
5940 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
5941 AI->getOrdering(), AI->getSyncScopeID());
5942
5943 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
5944 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
5945 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
5946 AI->replaceAllUsesWith(FinalOldResult);
5947 AI->eraseFromParent();
5948}
5949
5952 // TODO: Add more AtomicRMWInst that needs to be extended.
5953
5954 // Since floating-point operation requires a non-trivial set of data
5955 // operations, use CmpXChg to expand.
5956 if (AI->isFloatingPointOperation() ||
5962
5963 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
5966 AI->getOperation() == AtomicRMWInst::Sub)) {
5968 }
5969
5970 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5971 if (Subtarget.hasLAMCAS()) {
5972 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
5976 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
5978 }
5979
5980 if (Size == 8 || Size == 16)
5983}
5984
5985static Intrinsic::ID
5987 AtomicRMWInst::BinOp BinOp) {
5988 if (GRLen == 64) {
5989 switch (BinOp) {
5990 default:
5991 llvm_unreachable("Unexpected AtomicRMW BinOp");
5993 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5994 case AtomicRMWInst::Add:
5995 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5996 case AtomicRMWInst::Sub:
5997 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5999 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
6001 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
6003 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
6004 case AtomicRMWInst::Max:
6005 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
6006 case AtomicRMWInst::Min:
6007 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
6008 // TODO: support other AtomicRMWInst.
6009 }
6010 }
6011
6012 if (GRLen == 32) {
6013 switch (BinOp) {
6014 default:
6015 llvm_unreachable("Unexpected AtomicRMW BinOp");
6017 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
6018 case AtomicRMWInst::Add:
6019 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
6020 case AtomicRMWInst::Sub:
6021 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
6023 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
6024 // TODO: support other AtomicRMWInst.
6025 }
6026 }
6027
6028 llvm_unreachable("Unexpected GRLen\n");
6029}
6030
6033 AtomicCmpXchgInst *CI) const {
6034
6035 if (Subtarget.hasLAMCAS())
6037
6039 if (Size == 8 || Size == 16)
6042}
6043
6045 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
6046 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
6047 AtomicOrdering FailOrd = CI->getFailureOrdering();
6048 Value *FailureOrdering =
6049 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
6050
6051 // TODO: Support cmpxchg on LA32.
6052 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
6053 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
6054 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
6055 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6056 Type *Tys[] = {AlignedAddr->getType()};
6057 Value *Result = Builder.CreateIntrinsic(
6058 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
6059 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6060 return Result;
6061}
6062
6064 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
6065 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
6066 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
6067 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
6068 // mask, as this produces better code than the LL/SC loop emitted by
6069 // int_loongarch_masked_atomicrmw_xchg.
6070 if (AI->getOperation() == AtomicRMWInst::Xchg &&
6071 isa<ConstantInt>(AI->getValOperand())) {
6072 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
6073 if (CVal->isZero())
6074 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
6075 Builder.CreateNot(Mask, "Inv_Mask"),
6076 AI->getAlign(), Ord);
6077 if (CVal->isMinusOne())
6078 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
6079 AI->getAlign(), Ord);
6080 }
6081
6082 unsigned GRLen = Subtarget.getGRLen();
6083 Value *Ordering =
6084 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
6085 Type *Tys[] = {AlignedAddr->getType()};
6087 AI->getModule(),
6089
6090 if (GRLen == 64) {
6091 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6092 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6093 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6094 }
6095
6096 Value *Result;
6097
6098 // Must pass the shift amount needed to sign extend the loaded value prior
6099 // to performing a signed comparison for min/max. ShiftAmt is the number of
6100 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
6101 // is the number of bits to left+right shift the value in order to
6102 // sign-extend.
6103 if (AI->getOperation() == AtomicRMWInst::Min ||
6105 const DataLayout &DL = AI->getDataLayout();
6106 unsigned ValWidth =
6107 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6108 Value *SextShamt =
6109 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
6110 Result = Builder.CreateCall(LlwOpScwLoop,
6111 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6112 } else {
6113 Result =
6114 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6115 }
6116
6117 if (GRLen == 64)
6118 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6119 return Result;
6120}
6121
6123 const MachineFunction &MF, EVT VT) const {
6124 VT = VT.getScalarType();
6125
6126 if (!VT.isSimple())
6127 return false;
6128
6129 switch (VT.getSimpleVT().SimpleTy) {
6130 case MVT::f32:
6131 case MVT::f64:
6132 return true;
6133 default:
6134 break;
6135 }
6136
6137 return false;
6138}
6139
6141 const Constant *PersonalityFn) const {
6142 return LoongArch::R4;
6143}
6144
6146 const Constant *PersonalityFn) const {
6147 return LoongArch::R5;
6148}
6149
6150//===----------------------------------------------------------------------===//
6151// Target Optimization Hooks
6152//===----------------------------------------------------------------------===//
6153
6155 const LoongArchSubtarget &Subtarget) {
6156 // Feature FRECIPE instrucions relative accuracy is 2^-14.
6157 // IEEE float has 23 digits and double has 52 digits.
6158 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
6159 return RefinementSteps;
6160}
6161
6163 SelectionDAG &DAG, int Enabled,
6164 int &RefinementSteps,
6165 bool &UseOneConstNR,
6166 bool Reciprocal) const {
6167 if (Subtarget.hasFrecipe()) {
6168 SDLoc DL(Operand);
6169 EVT VT = Operand.getValueType();
6170
6171 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6172 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6173 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6174 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6175 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6176
6177 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6178 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6179
6180 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
6181 if (Reciprocal)
6182 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
6183
6184 return Estimate;
6185 }
6186 }
6187
6188 return SDValue();
6189}
6190
6192 SelectionDAG &DAG,
6193 int Enabled,
6194 int &RefinementSteps) const {
6195 if (Subtarget.hasFrecipe()) {
6196 SDLoc DL(Operand);
6197 EVT VT = Operand.getValueType();
6198
6199 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6200 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6201 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6202 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6203 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6204
6205 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6206 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6207
6208 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
6209 }
6210 }
6211
6212 return SDValue();
6213}
6214
6215//===----------------------------------------------------------------------===//
6216// LoongArch Inline Assembly Support
6217//===----------------------------------------------------------------------===//
6218
6220LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
6221 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6222 //
6223 // 'f': A floating-point register (if available).
6224 // 'k': A memory operand whose address is formed by a base register and
6225 // (optionally scaled) index register.
6226 // 'l': A signed 16-bit constant.
6227 // 'm': A memory operand whose address is formed by a base register and
6228 // offset that is suitable for use in instructions with the same
6229 // addressing mode as st.w and ld.w.
6230 // 'I': A signed 12-bit constant (for arithmetic instructions).
6231 // 'J': Integer zero.
6232 // 'K': An unsigned 12-bit constant (for logic instructions).
6233 // "ZB": An address that is held in a general-purpose register. The offset is
6234 // zero.
6235 // "ZC": A memory operand whose address is formed by a base register and
6236 // offset that is suitable for use in instructions with the same
6237 // addressing mode as ll.w and sc.w.
6238 if (Constraint.size() == 1) {
6239 switch (Constraint[0]) {
6240 default:
6241 break;
6242 case 'f':
6243 return C_RegisterClass;
6244 case 'l':
6245 case 'I':
6246 case 'J':
6247 case 'K':
6248 return C_Immediate;
6249 case 'k':
6250 return C_Memory;
6251 }
6252 }
6253
6254 if (Constraint == "ZC" || Constraint == "ZB")
6255 return C_Memory;
6256
6257 // 'm' is handled here.
6258 return TargetLowering::getConstraintType(Constraint);
6259}
6260
6261InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
6262 StringRef ConstraintCode) const {
6263 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
6268}
6269
6270std::pair<unsigned, const TargetRegisterClass *>
6271LoongArchTargetLowering::getRegForInlineAsmConstraint(
6272 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
6273 // First, see if this is a constraint that directly corresponds to a LoongArch
6274 // register class.
6275 if (Constraint.size() == 1) {
6276 switch (Constraint[0]) {
6277 case 'r':
6278 // TODO: Support fixed vectors up to GRLen?
6279 if (VT.isVector())
6280 break;
6281 return std::make_pair(0U, &LoongArch::GPRRegClass);
6282 case 'f':
6283 if (Subtarget.hasBasicF() && VT == MVT::f32)
6284 return std::make_pair(0U, &LoongArch::FPR32RegClass);
6285 if (Subtarget.hasBasicD() && VT == MVT::f64)
6286 return std::make_pair(0U, &LoongArch::FPR64RegClass);
6287 if (Subtarget.hasExtLSX() &&
6288 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
6289 return std::make_pair(0U, &LoongArch::LSX128RegClass);
6290 if (Subtarget.hasExtLASX() &&
6291 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
6292 return std::make_pair(0U, &LoongArch::LASX256RegClass);
6293 break;
6294 default:
6295 break;
6296 }
6297 }
6298
6299 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6300 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6301 // constraints while the official register name is prefixed with a '$'. So we
6302 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6303 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6304 // case insensitive, so no need to convert the constraint to upper case here.
6305 //
6306 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6307 // decode the usage of register name aliases into their official names. And
6308 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6309 // official register names.
6310 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
6311 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
6312 bool IsFP = Constraint[2] == 'f';
6313 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
6314 std::pair<unsigned, const TargetRegisterClass *> R;
6316 TRI, join_items("", Temp.first, Temp.second), VT);
6317 // Match those names to the widest floating point register type available.
6318 if (IsFP) {
6319 unsigned RegNo = R.first;
6320 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
6321 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
6322 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
6323 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
6324 }
6325 }
6326 }
6327 return R;
6328 }
6329
6330 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6331}
6332
6333void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6334 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
6335 SelectionDAG &DAG) const {
6336 // Currently only support length 1 constraints.
6337 if (Constraint.size() == 1) {
6338 switch (Constraint[0]) {
6339 case 'l':
6340 // Validate & create a 16-bit signed immediate operand.
6341 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6342 uint64_t CVal = C->getSExtValue();
6343 if (isInt<16>(CVal))
6344 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6345 Subtarget.getGRLenVT()));
6346 }
6347 return;
6348 case 'I':
6349 // Validate & create a 12-bit signed immediate operand.
6350 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6351 uint64_t CVal = C->getSExtValue();
6352 if (isInt<12>(CVal))
6353 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6354 Subtarget.getGRLenVT()));
6355 }
6356 return;
6357 case 'J':
6358 // Validate & create an integer zero operand.
6359 if (auto *C = dyn_cast<ConstantSDNode>(Op))
6360 if (C->getZExtValue() == 0)
6361 Ops.push_back(
6362 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
6363 return;
6364 case 'K':
6365 // Validate & create a 12-bit unsigned immediate operand.
6366 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6367 uint64_t CVal = C->getZExtValue();
6368 if (isUInt<12>(CVal))
6369 Ops.push_back(
6370 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6371 }
6372 return;
6373 default:
6374 break;
6375 }
6376 }
6377 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6378}
6379
6380#define GET_REGISTER_MATCHER
6381#include "LoongArchGenAsmMatcher.inc"
6382
6385 const MachineFunction &MF) const {
6386 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6387 std::string NewRegName = Name.second.str();
6388 Register Reg = MatchRegisterAltName(NewRegName);
6389 if (Reg == LoongArch::NoRegister)
6390 Reg = MatchRegisterName(NewRegName);
6391 if (Reg == LoongArch::NoRegister)
6393 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6394 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6395 if (!ReservedRegs.test(Reg))
6396 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6397 StringRef(RegName) + "\"."));
6398 return Reg;
6399}
6400
6402 EVT VT, SDValue C) const {
6403 // TODO: Support vectors.
6404 if (!VT.isScalarInteger())
6405 return false;
6406
6407 // Omit the optimization if the data size exceeds GRLen.
6408 if (VT.getSizeInBits() > Subtarget.getGRLen())
6409 return false;
6410
6411 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6412 const APInt &Imm = ConstNode->getAPIntValue();
6413 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6414 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6415 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6416 return true;
6417 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6418 if (ConstNode->hasOneUse() &&
6419 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6420 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6421 return true;
6422 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6423 // in which the immediate has two set bits. Or Break (MUL x, imm)
6424 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6425 // equals to (1 << s0) - (1 << s1).
6426 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6427 unsigned Shifts = Imm.countr_zero();
6428 // Reject immediates which can be composed via a single LUI.
6429 if (Shifts >= 12)
6430 return false;
6431 // Reject multiplications can be optimized to
6432 // (SLLI (ALSL x, x, 1/2/3/4), s).
6433 APInt ImmPop = Imm.ashr(Shifts);
6434 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6435 return false;
6436 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6437 // since it needs one more instruction than other 3 cases.
6438 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6439 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6440 (ImmSmall - Imm).isPowerOf2())
6441 return true;
6442 }
6443 }
6444
6445 return false;
6446}
6447
6449 const AddrMode &AM,
6450 Type *Ty, unsigned AS,
6451 Instruction *I) const {
6452 // LoongArch has four basic addressing modes:
6453 // 1. reg
6454 // 2. reg + 12-bit signed offset
6455 // 3. reg + 14-bit signed offset left-shifted by 2
6456 // 4. reg1 + reg2
6457 // TODO: Add more checks after support vector extension.
6458
6459 // No global is ever allowed as a base.
6460 if (AM.BaseGV)
6461 return false;
6462
6463 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6464 // with `UAL` feature.
6465 if (!isInt<12>(AM.BaseOffs) &&
6466 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6467 return false;
6468
6469 switch (AM.Scale) {
6470 case 0:
6471 // "r+i" or just "i", depending on HasBaseReg.
6472 break;
6473 case 1:
6474 // "r+r+i" is not allowed.
6475 if (AM.HasBaseReg && AM.BaseOffs)
6476 return false;
6477 // Otherwise we have "r+r" or "r+i".
6478 break;
6479 case 2:
6480 // "2*r+r" or "2*r+i" is not allowed.
6481 if (AM.HasBaseReg || AM.BaseOffs)
6482 return false;
6483 // Allow "2*r" as "r+r".
6484 break;
6485 default:
6486 return false;
6487 }
6488
6489 return true;
6490}
6491
6493 return isInt<12>(Imm);
6494}
6495
6497 return isInt<12>(Imm);
6498}
6499
6501 // Zexts are free if they can be combined with a load.
6502 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6503 // poorly with type legalization of compares preferring sext.
6504 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6505 EVT MemVT = LD->getMemoryVT();
6506 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6507 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6508 LD->getExtensionType() == ISD::ZEXTLOAD))
6509 return true;
6510 }
6511
6512 return TargetLowering::isZExtFree(Val, VT2);
6513}
6514
6516 EVT DstVT) const {
6517 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6518}
6519
6521 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6522}
6523
6525 // TODO: Support vectors.
6526 if (Y.getValueType().isVector())
6527 return false;
6528
6529 return !isa<ConstantSDNode>(Y);
6530}
6531
6533 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6534 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
6535}
6536
6538 Type *Ty, bool IsSigned) const {
6539 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
6540 return true;
6541
6542 return IsSigned;
6543}
6544
6546 // Return false to suppress the unnecessary extensions if the LibCall
6547 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6548 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6549 Type.getSizeInBits() < Subtarget.getGRLen()))
6550 return false;
6551 return true;
6552}
6553
6554// memcpy, and other memory intrinsics, typically tries to use wider load/store
6555// if the source/dest is aligned and the copy size is large enough. We therefore
6556// want to align such objects passed to memory intrinsics.
6558 unsigned &MinSize,
6559 Align &PrefAlign) const {
6560 if (!isa<MemIntrinsic>(CI))
6561 return false;
6562
6563 if (Subtarget.is64Bit()) {
6564 MinSize = 8;
6565 PrefAlign = Align(8);
6566 } else {
6567 MinSize = 4;
6568 PrefAlign = Align(4);
6569 }
6570
6571 return true;
6572}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
Value * getPointerOperand()
Definition: Instructions.h:870
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:861
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool isDSOLocal() const
Definition: GlobalValue.h:306
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2045
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1387
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033
LLVMContext & getContext() const
Definition: IRBuilder.h:195
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:70
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:586
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Class to represent pointers.
Definition: DerivedTypes.h:670
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:703
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:802
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:761
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:857
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:828
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:713
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:701
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:797
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Entry
Definition: COFF.h:844
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:236
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1494
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1292
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1319
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1610
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1590
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:287
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
@ None
Definition: CodeGenData.h:106
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:195
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)