LLVM 20.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/Statistic.h"
26#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/IntrinsicsLoongArch.h"
30#include "llvm/Support/Debug.h"
34
35using namespace llvm;
36
37#define DEBUG_TYPE "loongarch-isel-lowering"
38
39STATISTIC(NumTailCalls, "Number of tail calls");
40
41static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42 cl::desc("Trap on integer division by zero."),
43 cl::init(false));
44
46 const LoongArchSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 MVT GRLenVT = Subtarget.getGRLenVT();
50
51 // Set up the register classes.
52
53 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54 if (Subtarget.hasBasicF())
55 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56 if (Subtarget.hasBasicD())
57 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59 static const MVT::SimpleValueType LSXVTs[] = {
60 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61 static const MVT::SimpleValueType LASXVTs[] = {
62 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64 if (Subtarget.hasExtLSX())
65 for (MVT VT : LSXVTs)
66 addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68 if (Subtarget.hasExtLASX())
69 for (MVT VT : LASXVTs)
70 addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72 // Set operations for LA32 and LA64.
73
75 MVT::i1, Promote);
76
83
86 GRLenVT, Custom);
87
89
94
97
101
102 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103 // we get to know which of sll and revb.2h is faster.
106
107 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109 // and i32 could still be byte-swapped relatively cheaply.
111
117
120
121 // Set operations for LA64 only.
122
123 if (Subtarget.is64Bit()) {
141
145 Custom);
147 }
148
149 // Set operations for LA32 only.
150
151 if (!Subtarget.is64Bit()) {
157 }
158
160
161 static const ISD::CondCode FPCCToExpand[] = {
164
165 // Set operations for 'F' feature.
166
167 if (Subtarget.hasBasicF()) {
168 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
169 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
170 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
171
187
188 if (Subtarget.is64Bit())
190
191 if (!Subtarget.hasBasicD()) {
193 if (Subtarget.is64Bit()) {
196 }
197 }
198 }
199
200 // Set operations for 'D' feature.
201
202 if (Subtarget.hasBasicD()) {
203 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
204 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
205 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
206 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
207 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
208
224
225 if (Subtarget.is64Bit())
227 }
228
229 // Set operations for 'LSX' feature.
230
231 if (Subtarget.hasExtLSX()) {
233 // Expand all truncating stores and extending loads.
234 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
235 setTruncStoreAction(VT, InnerVT, Expand);
238 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
239 }
240 // By default everything must be expanded. Then we will selectively turn
241 // on ones that can be effectively codegen'd.
242 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
244 }
245
246 for (MVT VT : LSXVTs) {
250
254
258 }
259 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
262 Legal);
264 VT, Legal);
271 Expand);
272 }
273 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
275 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
277 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
280 }
281 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
289 VT, Expand);
290 }
292 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
293 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
294 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
295 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
296 }
297
298 // Set operations for 'LASX' feature.
299
300 if (Subtarget.hasExtLASX()) {
301 for (MVT VT : LASXVTs) {
305
310
314 }
315 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
318 Legal);
320 VT, Legal);
327 Expand);
328 }
329 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
331 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
333 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
336 }
337 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
345 VT, Expand);
346 }
347 }
348
349 // Set DAG combine for LA32 and LA64.
350
355
356 // Set DAG combine for 'LSX' feature.
357
358 if (Subtarget.hasExtLSX())
360
361 // Compute derived properties from the register classes.
363
365
368
370
372
373 // Function alignments.
375 // Set preferred alignments.
379
380 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
381 if (Subtarget.hasLAMCAS())
383}
384
386 const GlobalAddressSDNode *GA) const {
387 // In order to maximise the opportunity for common subexpression elimination,
388 // keep a separate ADD node for the global address offset instead of folding
389 // it in the global address node. Later peephole optimisations may choose to
390 // fold it back in when profitable.
391 return false;
392}
393
395 SelectionDAG &DAG) const {
396 switch (Op.getOpcode()) {
398 return lowerATOMIC_FENCE(Op, DAG);
400 return lowerEH_DWARF_CFA(Op, DAG);
402 return lowerGlobalAddress(Op, DAG);
404 return lowerGlobalTLSAddress(Op, DAG);
406 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
408 return lowerINTRINSIC_W_CHAIN(Op, DAG);
410 return lowerINTRINSIC_VOID(Op, DAG);
412 return lowerBlockAddress(Op, DAG);
413 case ISD::JumpTable:
414 return lowerJumpTable(Op, DAG);
415 case ISD::SHL_PARTS:
416 return lowerShiftLeftParts(Op, DAG);
417 case ISD::SRA_PARTS:
418 return lowerShiftRightParts(Op, DAG, true);
419 case ISD::SRL_PARTS:
420 return lowerShiftRightParts(Op, DAG, false);
422 return lowerConstantPool(Op, DAG);
423 case ISD::FP_TO_SINT:
424 return lowerFP_TO_SINT(Op, DAG);
425 case ISD::BITCAST:
426 return lowerBITCAST(Op, DAG);
427 case ISD::UINT_TO_FP:
428 return lowerUINT_TO_FP(Op, DAG);
429 case ISD::SINT_TO_FP:
430 return lowerSINT_TO_FP(Op, DAG);
431 case ISD::VASTART:
432 return lowerVASTART(Op, DAG);
433 case ISD::FRAMEADDR:
434 return lowerFRAMEADDR(Op, DAG);
435 case ISD::RETURNADDR:
436 return lowerRETURNADDR(Op, DAG);
438 return lowerWRITE_REGISTER(Op, DAG);
440 return lowerINSERT_VECTOR_ELT(Op, DAG);
442 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
444 return lowerBUILD_VECTOR(Op, DAG);
446 return lowerVECTOR_SHUFFLE(Op, DAG);
447 case ISD::BITREVERSE:
448 return lowerBITREVERSE(Op, DAG);
449 }
450 return SDValue();
451}
452
453SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
454 SelectionDAG &DAG) const {
455 EVT ResTy = Op->getValueType(0);
456 SDValue Src = Op->getOperand(0);
457 SDLoc DL(Op);
458
459 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
460 unsigned int OrigEltNum = ResTy.getVectorNumElements();
461 unsigned int NewEltNum = NewVT.getVectorNumElements();
462
463 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
464
466 for (unsigned int i = 0; i < NewEltNum; i++) {
467 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
468 DAG.getConstant(i, DL, MVT::i64));
469 SDValue RevOp = DAG.getNode((ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
472 DL, MVT::i64, Op);
473 Ops.push_back(RevOp);
474 }
475 SDValue Res =
476 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
477
478 switch (ResTy.getSimpleVT().SimpleTy) {
479 default:
480 return SDValue();
481 case MVT::v16i8:
482 case MVT::v32i8:
483 return Res;
484 case MVT::v8i16:
485 case MVT::v16i16:
486 case MVT::v4i32:
487 case MVT::v8i32: {
489 for (unsigned int i = 0; i < NewEltNum; i++)
490 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
491 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
492 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
493 }
494 }
495}
496
497/// Determine whether a range fits a regular pattern of values.
498/// This function accounts for the possibility of jumping over the End iterator.
499template <typename ValType>
500static bool
502 unsigned CheckStride,
504 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
505 auto &I = Begin;
506
507 while (I != End) {
508 if (*I != -1 && *I != ExpectedIndex)
509 return false;
510 ExpectedIndex += ExpectedIndexStride;
511
512 // Incrementing past End is undefined behaviour so we must increment one
513 // step at a time and check for End at each step.
514 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
515 ; // Empty loop body.
516 }
517 return true;
518}
519
520/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
521///
522/// VREPLVEI performs vector broadcast based on an element specified by an
523/// integer immediate, with its mask being similar to:
524/// <x, x, x, ...>
525/// where x is any valid index.
526///
527/// When undef's appear in the mask they are treated as if they were whatever
528/// value is necessary in order to fit the above form.
530 MVT VT, SDValue V1, SDValue V2,
531 SelectionDAG &DAG) {
532 int SplatIndex = -1;
533 for (const auto &M : Mask) {
534 if (M != -1) {
535 SplatIndex = M;
536 break;
537 }
538 }
539
540 if (SplatIndex == -1)
541 return DAG.getUNDEF(VT);
542
543 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
544 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
545 APInt Imm(64, SplatIndex);
546 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
547 DAG.getConstant(Imm, DL, MVT::i64));
548 }
549
550 return SDValue();
551}
552
553/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
554///
555/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
556/// elements according to a <4 x i2> constant (encoded as an integer immediate).
557///
558/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
559/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
560/// When undef's appear they are treated as if they were whatever value is
561/// necessary in order to fit the above forms.
562///
563/// For example:
564/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
565/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
566/// i32 7, i32 6, i32 5, i32 4>
567/// is lowered to:
568/// (VSHUF4I_H $v0, $v1, 27)
569/// where the 27 comes from:
570/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
572 MVT VT, SDValue V1, SDValue V2,
573 SelectionDAG &DAG) {
574
575 // When the size is less than 4, lower cost instructions may be used.
576 if (Mask.size() < 4)
577 return SDValue();
578
579 int SubMask[4] = {-1, -1, -1, -1};
580 for (unsigned i = 0; i < 4; ++i) {
581 for (unsigned j = i; j < Mask.size(); j += 4) {
582 int Idx = Mask[j];
583
584 // Convert from vector index to 4-element subvector index
585 // If an index refers to an element outside of the subvector then give up
586 if (Idx != -1) {
587 Idx -= 4 * (j / 4);
588 if (Idx < 0 || Idx >= 4)
589 return SDValue();
590 }
591
592 // If the mask has an undef, replace it with the current index.
593 // Note that it might still be undef if the current index is also undef
594 if (SubMask[i] == -1)
595 SubMask[i] = Idx;
596 // Check that non-undef values are the same as in the mask. If they
597 // aren't then give up
598 else if (Idx != -1 && Idx != SubMask[i])
599 return SDValue();
600 }
601 }
602
603 // Calculate the immediate. Replace any remaining undefs with zero
604 APInt Imm(64, 0);
605 for (int i = 3; i >= 0; --i) {
606 int Idx = SubMask[i];
607
608 if (Idx == -1)
609 Idx = 0;
610
611 Imm <<= 2;
612 Imm |= Idx & 0x3;
613 }
614
615 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
616 DAG.getConstant(Imm, DL, MVT::i64));
617}
618
619/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
620///
621/// VPACKEV interleaves the even elements from each vector.
622///
623/// It is possible to lower into VPACKEV when the mask consists of two of the
624/// following forms interleaved:
625/// <0, 2, 4, ...>
626/// <n, n+2, n+4, ...>
627/// where n is the number of elements in the vector.
628/// For example:
629/// <0, 0, 2, 2, 4, 4, ...>
630/// <0, n, 2, n+2, 4, n+4, ...>
631///
632/// When undef's appear in the mask they are treated as if they were whatever
633/// value is necessary in order to fit the above forms.
635 MVT VT, SDValue V1, SDValue V2,
636 SelectionDAG &DAG) {
637
638 const auto &Begin = Mask.begin();
639 const auto &End = Mask.end();
640 SDValue OriV1 = V1, OriV2 = V2;
641
642 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
643 V1 = OriV1;
644 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
645 V1 = OriV2;
646 else
647 return SDValue();
648
649 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
650 V2 = OriV1;
651 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
652 V2 = OriV2;
653 else
654 return SDValue();
655
656 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
657}
658
659/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
660///
661/// VPACKOD interleaves the odd elements from each vector.
662///
663/// It is possible to lower into VPACKOD when the mask consists of two of the
664/// following forms interleaved:
665/// <1, 3, 5, ...>
666/// <n+1, n+3, n+5, ...>
667/// where n is the number of elements in the vector.
668/// For example:
669/// <1, 1, 3, 3, 5, 5, ...>
670/// <1, n+1, 3, n+3, 5, n+5, ...>
671///
672/// When undef's appear in the mask they are treated as if they were whatever
673/// value is necessary in order to fit the above forms.
675 MVT VT, SDValue V1, SDValue V2,
676 SelectionDAG &DAG) {
677
678 const auto &Begin = Mask.begin();
679 const auto &End = Mask.end();
680 SDValue OriV1 = V1, OriV2 = V2;
681
682 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
683 V1 = OriV1;
684 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
685 V1 = OriV2;
686 else
687 return SDValue();
688
689 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
690 V2 = OriV1;
691 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
692 V2 = OriV2;
693 else
694 return SDValue();
695
696 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
697}
698
699/// Lower VECTOR_SHUFFLE into VILVH (if possible).
700///
701/// VILVH interleaves consecutive elements from the left (highest-indexed) half
702/// of each vector.
703///
704/// It is possible to lower into VILVH when the mask consists of two of the
705/// following forms interleaved:
706/// <x, x+1, x+2, ...>
707/// <n+x, n+x+1, n+x+2, ...>
708/// where n is the number of elements in the vector and x is half n.
709/// For example:
710/// <x, x, x+1, x+1, x+2, x+2, ...>
711/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
712///
713/// When undef's appear in the mask they are treated as if they were whatever
714/// value is necessary in order to fit the above forms.
716 MVT VT, SDValue V1, SDValue V2,
717 SelectionDAG &DAG) {
718
719 const auto &Begin = Mask.begin();
720 const auto &End = Mask.end();
721 unsigned HalfSize = Mask.size() / 2;
722 SDValue OriV1 = V1, OriV2 = V2;
723
724 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
725 V1 = OriV1;
726 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
727 V1 = OriV2;
728 else
729 return SDValue();
730
731 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
732 V2 = OriV1;
733 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
734 1))
735 V2 = OriV2;
736 else
737 return SDValue();
738
739 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
740}
741
742/// Lower VECTOR_SHUFFLE into VILVL (if possible).
743///
744/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
745/// of each vector.
746///
747/// It is possible to lower into VILVL when the mask consists of two of the
748/// following forms interleaved:
749/// <0, 1, 2, ...>
750/// <n, n+1, n+2, ...>
751/// where n is the number of elements in the vector.
752/// For example:
753/// <0, 0, 1, 1, 2, 2, ...>
754/// <0, n, 1, n+1, 2, n+2, ...>
755///
756/// When undef's appear in the mask they are treated as if they were whatever
757/// value is necessary in order to fit the above forms.
759 MVT VT, SDValue V1, SDValue V2,
760 SelectionDAG &DAG) {
761
762 const auto &Begin = Mask.begin();
763 const auto &End = Mask.end();
764 SDValue OriV1 = V1, OriV2 = V2;
765
766 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
767 V1 = OriV1;
768 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
769 V1 = OriV2;
770 else
771 return SDValue();
772
773 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
774 V2 = OriV1;
775 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
776 V2 = OriV2;
777 else
778 return SDValue();
779
780 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
781}
782
783/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
784///
785/// VPICKEV copies the even elements of each vector into the result vector.
786///
787/// It is possible to lower into VPICKEV when the mask consists of two of the
788/// following forms concatenated:
789/// <0, 2, 4, ...>
790/// <n, n+2, n+4, ...>
791/// where n is the number of elements in the vector.
792/// For example:
793/// <0, 2, 4, ..., 0, 2, 4, ...>
794/// <0, 2, 4, ..., n, n+2, n+4, ...>
795///
796/// When undef's appear in the mask they are treated as if they were whatever
797/// value is necessary in order to fit the above forms.
799 MVT VT, SDValue V1, SDValue V2,
800 SelectionDAG &DAG) {
801
802 const auto &Begin = Mask.begin();
803 const auto &Mid = Mask.begin() + Mask.size() / 2;
804 const auto &End = Mask.end();
805 SDValue OriV1 = V1, OriV2 = V2;
806
807 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
808 V1 = OriV1;
809 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
810 V1 = OriV2;
811 else
812 return SDValue();
813
814 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
815 V2 = OriV1;
816 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
817 V2 = OriV2;
818
819 else
820 return SDValue();
821
822 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
823}
824
825/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
826///
827/// VPICKOD copies the odd elements of each vector into the result vector.
828///
829/// It is possible to lower into VPICKOD when the mask consists of two of the
830/// following forms concatenated:
831/// <1, 3, 5, ...>
832/// <n+1, n+3, n+5, ...>
833/// where n is the number of elements in the vector.
834/// For example:
835/// <1, 3, 5, ..., 1, 3, 5, ...>
836/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
837///
838/// When undef's appear in the mask they are treated as if they were whatever
839/// value is necessary in order to fit the above forms.
841 MVT VT, SDValue V1, SDValue V2,
842 SelectionDAG &DAG) {
843
844 const auto &Begin = Mask.begin();
845 const auto &Mid = Mask.begin() + Mask.size() / 2;
846 const auto &End = Mask.end();
847 SDValue OriV1 = V1, OriV2 = V2;
848
849 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
850 V1 = OriV1;
851 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
852 V1 = OriV2;
853 else
854 return SDValue();
855
856 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
857 V2 = OriV1;
858 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
859 V2 = OriV2;
860 else
861 return SDValue();
862
863 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
864}
865
866/// Lower VECTOR_SHUFFLE into VSHUF.
867///
868/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
869/// adding it as an operand to the resulting VSHUF.
871 MVT VT, SDValue V1, SDValue V2,
872 SelectionDAG &DAG) {
873
875 for (auto M : Mask)
876 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
877
878 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
879 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
880
881 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
882 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
883 // VSHF concatenates the vectors in a bitwise fashion:
884 // <0b00, 0b01> + <0b10, 0b11> ->
885 // 0b0100 + 0b1110 -> 0b01001110
886 // <0b10, 0b11, 0b00, 0b01>
887 // We must therefore swap the operands to get the correct result.
888 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
889}
890
891/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
892///
893/// This routine breaks down the specific type of 128-bit shuffle and
894/// dispatches to the lowering routines accordingly.
896 SDValue V1, SDValue V2, SelectionDAG &DAG) {
897 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
898 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
899 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
900 "Vector type is unsupported for lsx!");
901 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
902 "Two operands have different types!");
903 assert(VT.getVectorNumElements() == Mask.size() &&
904 "Unexpected mask size for shuffle!");
905 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
906
907 SDValue Result;
908 // TODO: Add more comparison patterns.
909 if (V2.isUndef()) {
910 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
911 return Result;
912 if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
913 return Result;
914
915 // TODO: This comment may be enabled in the future to better match the
916 // pattern for instruction selection.
917 /* V2 = V1; */
918 }
919
920 // It is recommended not to change the pattern comparison order for better
921 // performance.
922 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
923 return Result;
924 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
925 return Result;
926 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
927 return Result;
928 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
929 return Result;
930 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
931 return Result;
932 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
933 return Result;
934 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
935 return Result;
936
937 return SDValue();
938}
939
940/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
941///
942/// It is a XVREPLVEI when the mask is:
943/// <x, x, x, ..., x+n, x+n, x+n, ...>
944/// where the number of x is equal to n and n is half the length of vector.
945///
946/// When undef's appear in the mask they are treated as if they were whatever
947/// value is necessary in order to fit the above form.
949 ArrayRef<int> Mask, MVT VT,
950 SDValue V1, SDValue V2,
951 SelectionDAG &DAG) {
952 int SplatIndex = -1;
953 for (const auto &M : Mask) {
954 if (M != -1) {
955 SplatIndex = M;
956 break;
957 }
958 }
959
960 if (SplatIndex == -1)
961 return DAG.getUNDEF(VT);
962
963 const auto &Begin = Mask.begin();
964 const auto &End = Mask.end();
965 unsigned HalfSize = Mask.size() / 2;
966
967 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
968 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
969 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
970 0)) {
971 APInt Imm(64, SplatIndex);
972 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
973 DAG.getConstant(Imm, DL, MVT::i64));
974 }
975
976 return SDValue();
977}
978
979/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
981 MVT VT, SDValue V1, SDValue V2,
982 SelectionDAG &DAG) {
983 // When the size is less than or equal to 4, lower cost instructions may be
984 // used.
985 if (Mask.size() <= 4)
986 return SDValue();
987 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
988}
989
990/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
992 MVT VT, SDValue V1, SDValue V2,
993 SelectionDAG &DAG) {
994 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
995}
996
997/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
999 MVT VT, SDValue V1, SDValue V2,
1000 SelectionDAG &DAG) {
1001 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
1002}
1003
1004/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
1006 MVT VT, SDValue V1, SDValue V2,
1007 SelectionDAG &DAG) {
1008
1009 const auto &Begin = Mask.begin();
1010 const auto &End = Mask.end();
1011 unsigned HalfSize = Mask.size() / 2;
1012 unsigned LeftSize = HalfSize / 2;
1013 SDValue OriV1 = V1, OriV2 = V2;
1014
1015 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
1016 1) &&
1017 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
1018 V1 = OriV1;
1019 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
1020 Mask.size() + HalfSize - LeftSize, 1) &&
1021 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1022 Mask.size() + HalfSize + LeftSize, 1))
1023 V1 = OriV2;
1024 else
1025 return SDValue();
1026
1027 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
1028 1) &&
1029 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
1030 1))
1031 V2 = OriV1;
1032 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
1033 Mask.size() + HalfSize - LeftSize, 1) &&
1034 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1035 Mask.size() + HalfSize + LeftSize, 1))
1036 V2 = OriV2;
1037 else
1038 return SDValue();
1039
1040 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1041}
1042
1043/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
1045 MVT VT, SDValue V1, SDValue V2,
1046 SelectionDAG &DAG) {
1047
1048 const auto &Begin = Mask.begin();
1049 const auto &End = Mask.end();
1050 unsigned HalfSize = Mask.size() / 2;
1051 SDValue OriV1 = V1, OriV2 = V2;
1052
1053 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
1054 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
1055 V1 = OriV1;
1056 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
1057 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
1058 Mask.size() + HalfSize, 1))
1059 V1 = OriV2;
1060 else
1061 return SDValue();
1062
1063 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
1064 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1065 V2 = OriV1;
1066 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1067 1) &&
1068 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1069 Mask.size() + HalfSize, 1))
1070 V2 = OriV2;
1071 else
1072 return SDValue();
1073
1074 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1075}
1076
1077/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1079 MVT VT, SDValue V1, SDValue V2,
1080 SelectionDAG &DAG) {
1081
1082 const auto &Begin = Mask.begin();
1083 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1084 const auto &Mid = Mask.begin() + Mask.size() / 2;
1085 const auto &RightMid = Mask.end() - Mask.size() / 4;
1086 const auto &End = Mask.end();
1087 unsigned HalfSize = Mask.size() / 2;
1088 SDValue OriV1 = V1, OriV2 = V2;
1089
1090 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1091 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1092 V1 = OriV1;
1093 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1094 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1095 V1 = OriV2;
1096 else
1097 return SDValue();
1098
1099 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1100 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1101 V2 = OriV1;
1102 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1103 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1104 V2 = OriV2;
1105
1106 else
1107 return SDValue();
1108
1109 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1110}
1111
1112/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1114 MVT VT, SDValue V1, SDValue V2,
1115 SelectionDAG &DAG) {
1116
1117 const auto &Begin = Mask.begin();
1118 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1119 const auto &Mid = Mask.begin() + Mask.size() / 2;
1120 const auto &RightMid = Mask.end() - Mask.size() / 4;
1121 const auto &End = Mask.end();
1122 unsigned HalfSize = Mask.size() / 2;
1123 SDValue OriV1 = V1, OriV2 = V2;
1124
1125 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1126 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1127 V1 = OriV1;
1128 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1129 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1130 2))
1131 V1 = OriV2;
1132 else
1133 return SDValue();
1134
1135 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1136 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1137 V2 = OriV1;
1138 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1139 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1140 2))
1141 V2 = OriV2;
1142 else
1143 return SDValue();
1144
1145 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1146}
1147
1148/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1150 MVT VT, SDValue V1, SDValue V2,
1151 SelectionDAG &DAG) {
1152
1153 int MaskSize = Mask.size();
1154 int HalfSize = Mask.size() / 2;
1155 const auto &Begin = Mask.begin();
1156 const auto &Mid = Mask.begin() + HalfSize;
1157 const auto &End = Mask.end();
1158
1159 // VECTOR_SHUFFLE concatenates the vectors:
1160 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1161 // shuffling ->
1162 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1163 //
1164 // XVSHUF concatenates the vectors:
1165 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1166 // shuffling ->
1167 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1168 SmallVector<SDValue, 8> MaskAlloc;
1169 for (auto it = Begin; it < Mid; it++) {
1170 if (*it < 0) // UNDEF
1171 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1172 else if ((*it >= 0 && *it < HalfSize) ||
1173 (*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1174 int M = *it < HalfSize ? *it : *it - HalfSize;
1175 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1176 } else
1177 return SDValue();
1178 }
1179 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1180
1181 for (auto it = Mid; it < End; it++) {
1182 if (*it < 0) // UNDEF
1183 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1184 else if ((*it >= HalfSize && *it < MaskSize) ||
1185 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1186 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1187 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1188 } else
1189 return SDValue();
1190 }
1191 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1192
1193 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1194 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1195 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1196}
1197
1198/// Shuffle vectors by lane to generate more optimized instructions.
1199/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1200///
1201/// Therefore, except for the following four cases, other cases are regarded
1202/// as cross-lane shuffles, where optimization is relatively limited.
1203///
1204/// - Shuffle high, low lanes of two inputs vector
1205/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1206/// - Shuffle low, high lanes of two inputs vector
1207/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1208/// - Shuffle low, low lanes of two inputs vector
1209/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1210/// - Shuffle high, high lanes of two inputs vector
1211/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1212///
1213/// The first case is the closest to LoongArch instructions and the other
1214/// cases need to be converted to it for processing.
1215///
1216/// This function may modify V1, V2 and Mask
1218 MutableArrayRef<int> Mask, MVT VT,
1219 SDValue &V1, SDValue &V2,
1220 SelectionDAG &DAG) {
1221
1222 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1223
1224 int MaskSize = Mask.size();
1225 int HalfSize = Mask.size() / 2;
1226
1227 HalfMaskType preMask = None, postMask = None;
1228
1229 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1230 return M < 0 || (M >= 0 && M < HalfSize) ||
1231 (M >= MaskSize && M < MaskSize + HalfSize);
1232 }))
1233 preMask = HighLaneTy;
1234 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1235 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1236 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1237 }))
1238 preMask = LowLaneTy;
1239
1240 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1241 return M < 0 || (M >= 0 && M < HalfSize) ||
1242 (M >= MaskSize && M < MaskSize + HalfSize);
1243 }))
1244 postMask = HighLaneTy;
1245 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1246 return M < 0 || (M >= HalfSize && M < MaskSize) ||
1247 (M >= MaskSize + HalfSize && M < MaskSize * 2);
1248 }))
1249 postMask = LowLaneTy;
1250
1251 // The pre-half of mask is high lane type, and the post-half of mask
1252 // is low lane type, which is closest to the LoongArch instructions.
1253 //
1254 // Note: In the LoongArch architecture, the high lane of mask corresponds
1255 // to the lower 128-bit of vector register, and the low lane of mask
1256 // corresponds the higher 128-bit of vector register.
1257 if (preMask == HighLaneTy && postMask == LowLaneTy) {
1258 return;
1259 }
1260 if (preMask == LowLaneTy && postMask == HighLaneTy) {
1261 V1 = DAG.getBitcast(MVT::v4i64, V1);
1262 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1263 DAG.getConstant(0b01001110, DL, MVT::i64));
1264 V1 = DAG.getBitcast(VT, V1);
1265
1266 if (!V2.isUndef()) {
1267 V2 = DAG.getBitcast(MVT::v4i64, V2);
1268 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1269 DAG.getConstant(0b01001110, DL, MVT::i64));
1270 V2 = DAG.getBitcast(VT, V2);
1271 }
1272
1273 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1274 *it = *it < 0 ? *it : *it - HalfSize;
1275 }
1276 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1277 *it = *it < 0 ? *it : *it + HalfSize;
1278 }
1279 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1280 V1 = DAG.getBitcast(MVT::v4i64, V1);
1281 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1282 DAG.getConstant(0b11101110, DL, MVT::i64));
1283 V1 = DAG.getBitcast(VT, V1);
1284
1285 if (!V2.isUndef()) {
1286 V2 = DAG.getBitcast(MVT::v4i64, V2);
1287 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1288 DAG.getConstant(0b11101110, DL, MVT::i64));
1289 V2 = DAG.getBitcast(VT, V2);
1290 }
1291
1292 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1293 *it = *it < 0 ? *it : *it - HalfSize;
1294 }
1295 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1296 V1 = DAG.getBitcast(MVT::v4i64, V1);
1297 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1298 DAG.getConstant(0b01000100, DL, MVT::i64));
1299 V1 = DAG.getBitcast(VT, V1);
1300
1301 if (!V2.isUndef()) {
1302 V2 = DAG.getBitcast(MVT::v4i64, V2);
1303 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1304 DAG.getConstant(0b01000100, DL, MVT::i64));
1305 V2 = DAG.getBitcast(VT, V2);
1306 }
1307
1308 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1309 *it = *it < 0 ? *it : *it + HalfSize;
1310 }
1311 } else { // cross-lane
1312 return;
1313 }
1314}
1315
1316/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1317///
1318/// This routine breaks down the specific type of 256-bit shuffle and
1319/// dispatches to the lowering routines accordingly.
1321 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1322 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1323 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1324 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1325 "Vector type is unsupported for lasx!");
1326 assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1327 "Two operands have different types!");
1328 assert(VT.getVectorNumElements() == Mask.size() &&
1329 "Unexpected mask size for shuffle!");
1330 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1331 assert(Mask.size() >= 4 && "Mask size is less than 4.");
1332
1333 // canonicalize non cross-lane shuffle vector
1334 SmallVector<int> NewMask(Mask);
1335 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1336
1337 SDValue Result;
1338 // TODO: Add more comparison patterns.
1339 if (V2.isUndef()) {
1340 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1341 return Result;
1342 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1343 return Result;
1344
1345 // TODO: This comment may be enabled in the future to better match the
1346 // pattern for instruction selection.
1347 /* V2 = V1; */
1348 }
1349
1350 // It is recommended not to change the pattern comparison order for better
1351 // performance.
1352 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1353 return Result;
1354 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1355 return Result;
1356 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1357 return Result;
1358 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1359 return Result;
1360 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1361 return Result;
1362 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1363 return Result;
1364 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1365 return Result;
1366
1367 return SDValue();
1368}
1369
1370SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1371 SelectionDAG &DAG) const {
1372 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1373 ArrayRef<int> OrigMask = SVOp->getMask();
1374 SDValue V1 = Op.getOperand(0);
1375 SDValue V2 = Op.getOperand(1);
1376 MVT VT = Op.getSimpleValueType();
1377 int NumElements = VT.getVectorNumElements();
1378 SDLoc DL(Op);
1379
1380 bool V1IsUndef = V1.isUndef();
1381 bool V2IsUndef = V2.isUndef();
1382 if (V1IsUndef && V2IsUndef)
1383 return DAG.getUNDEF(VT);
1384
1385 // When we create a shuffle node we put the UNDEF node to second operand,
1386 // but in some cases the first operand may be transformed to UNDEF.
1387 // In this case we should just commute the node.
1388 if (V1IsUndef)
1389 return DAG.getCommutedVectorShuffle(*SVOp);
1390
1391 // Check for non-undef masks pointing at an undef vector and make the masks
1392 // undef as well. This makes it easier to match the shuffle based solely on
1393 // the mask.
1394 if (V2IsUndef &&
1395 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1396 SmallVector<int, 8> NewMask(OrigMask);
1397 for (int &M : NewMask)
1398 if (M >= NumElements)
1399 M = -1;
1400 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1401 }
1402
1403 // Check for illegal shuffle mask element index values.
1404 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1405 (void)MaskUpperLimit;
1406 assert(llvm::all_of(OrigMask,
1407 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1408 "Out of bounds shuffle index");
1409
1410 // For each vector width, delegate to a specialized lowering routine.
1411 if (VT.is128BitVector())
1412 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1413
1414 if (VT.is256BitVector())
1415 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1416
1417 return SDValue();
1418}
1419
1420static bool isConstantOrUndef(const SDValue Op) {
1421 if (Op->isUndef())
1422 return true;
1423 if (isa<ConstantSDNode>(Op))
1424 return true;
1425 if (isa<ConstantFPSDNode>(Op))
1426 return true;
1427 return false;
1428}
1429
1431 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1432 if (isConstantOrUndef(Op->getOperand(i)))
1433 return true;
1434 return false;
1435}
1436
1437SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1438 SelectionDAG &DAG) const {
1439 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1440 EVT ResTy = Op->getValueType(0);
1441 SDLoc DL(Op);
1442 APInt SplatValue, SplatUndef;
1443 unsigned SplatBitSize;
1444 bool HasAnyUndefs;
1445 bool Is128Vec = ResTy.is128BitVector();
1446 bool Is256Vec = ResTy.is256BitVector();
1447
1448 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1449 (!Subtarget.hasExtLASX() || !Is256Vec))
1450 return SDValue();
1451
1452 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1453 /*MinSplatBits=*/8) &&
1454 SplatBitSize <= 64) {
1455 // We can only cope with 8, 16, 32, or 64-bit elements.
1456 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1457 SplatBitSize != 64)
1458 return SDValue();
1459
1460 EVT ViaVecTy;
1461
1462 switch (SplatBitSize) {
1463 default:
1464 return SDValue();
1465 case 8:
1466 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1467 break;
1468 case 16:
1469 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1470 break;
1471 case 32:
1472 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1473 break;
1474 case 64:
1475 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1476 break;
1477 }
1478
1479 // SelectionDAG::getConstant will promote SplatValue appropriately.
1480 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1481
1482 // Bitcast to the type we originally wanted.
1483 if (ViaVecTy != ResTy)
1484 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1485
1486 return Result;
1487 }
1488
1489 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1490 return Op;
1491
1493 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
1494 // The resulting code is the same length as the expansion, but it doesn't
1495 // use memory operations.
1496 EVT ResTy = Node->getValueType(0);
1497
1498 assert(ResTy.isVector());
1499
1500 unsigned NumElts = ResTy.getVectorNumElements();
1501 SDValue Vector = DAG.getUNDEF(ResTy);
1502 for (unsigned i = 0; i < NumElts; ++i) {
1504 Node->getOperand(i),
1505 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1506 }
1507 return Vector;
1508 }
1509
1510 return SDValue();
1511}
1512
1513SDValue
1514LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1515 SelectionDAG &DAG) const {
1516 EVT VecTy = Op->getOperand(0)->getValueType(0);
1517 SDValue Idx = Op->getOperand(1);
1518 EVT EltTy = VecTy.getVectorElementType();
1519 unsigned NumElts = VecTy.getVectorNumElements();
1520
1521 if (isa<ConstantSDNode>(Idx) &&
1522 (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1523 EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1524 return Op;
1525
1526 return SDValue();
1527}
1528
1529SDValue
1530LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1531 SelectionDAG &DAG) const {
1532 if (isa<ConstantSDNode>(Op->getOperand(2)))
1533 return Op;
1534 return SDValue();
1535}
1536
1537SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1538 SelectionDAG &DAG) const {
1539 SDLoc DL(Op);
1540 SyncScope::ID FenceSSID =
1541 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1542
1543 // singlethread fences only synchronize with signal handlers on the same
1544 // thread and thus only need to preserve instruction order, not actually
1545 // enforce memory ordering.
1546 if (FenceSSID == SyncScope::SingleThread)
1547 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1548 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1549
1550 return Op;
1551}
1552
1553SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1554 SelectionDAG &DAG) const {
1555
1556 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1557 DAG.getContext()->emitError(
1558 "On LA64, only 64-bit registers can be written.");
1559 return Op.getOperand(0);
1560 }
1561
1562 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1563 DAG.getContext()->emitError(
1564 "On LA32, only 32-bit registers can be written.");
1565 return Op.getOperand(0);
1566 }
1567
1568 return Op;
1569}
1570
1571SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1572 SelectionDAG &DAG) const {
1573 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1574 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1575 "be a constant integer");
1576 return SDValue();
1577 }
1578
1581 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1582 EVT VT = Op.getValueType();
1583 SDLoc DL(Op);
1584 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1585 unsigned Depth = Op.getConstantOperandVal(0);
1586 int GRLenInBytes = Subtarget.getGRLen() / 8;
1587
1588 while (Depth--) {
1589 int Offset = -(GRLenInBytes * 2);
1590 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1591 DAG.getSignedConstant(Offset, DL, VT));
1592 FrameAddr =
1593 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1594 }
1595 return FrameAddr;
1596}
1597
1598SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1599 SelectionDAG &DAG) const {
1601 return SDValue();
1602
1603 // Currently only support lowering return address for current frame.
1604 if (Op.getConstantOperandVal(0) != 0) {
1605 DAG.getContext()->emitError(
1606 "return address can only be determined for the current frame");
1607 return SDValue();
1608 }
1609
1612 MVT GRLenVT = Subtarget.getGRLenVT();
1613
1614 // Return the value of the return address register, marking it an implicit
1615 // live-in.
1616 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1617 getRegClassFor(GRLenVT));
1618 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1619}
1620
1621SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1622 SelectionDAG &DAG) const {
1624 auto Size = Subtarget.getGRLen() / 8;
1625 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1626 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1627}
1628
1629SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1630 SelectionDAG &DAG) const {
1632 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1633
1634 SDLoc DL(Op);
1635 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1637
1638 // vastart just stores the address of the VarArgsFrameIndex slot into the
1639 // memory location argument.
1640 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1641 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1642 MachinePointerInfo(SV));
1643}
1644
1645SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1646 SelectionDAG &DAG) const {
1647 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1648 !Subtarget.hasBasicD() && "unexpected target features");
1649
1650 SDLoc DL(Op);
1651 SDValue Op0 = Op.getOperand(0);
1652 if (Op0->getOpcode() == ISD::AND) {
1653 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1654 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1655 return Op;
1656 }
1657
1658 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1659 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1660 Op0.getConstantOperandVal(2) == UINT64_C(0))
1661 return Op;
1662
1663 if (Op0.getOpcode() == ISD::AssertZext &&
1664 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1665 return Op;
1666
1667 EVT OpVT = Op0.getValueType();
1668 EVT RetVT = Op.getValueType();
1669 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1670 MakeLibCallOptions CallOptions;
1671 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1672 SDValue Chain = SDValue();
1674 std::tie(Result, Chain) =
1675 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1676 return Result;
1677}
1678
1679SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1680 SelectionDAG &DAG) const {
1681 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1682 !Subtarget.hasBasicD() && "unexpected target features");
1683
1684 SDLoc DL(Op);
1685 SDValue Op0 = Op.getOperand(0);
1686
1687 if ((Op0.getOpcode() == ISD::AssertSext ||
1689 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1690 return Op;
1691
1692 EVT OpVT = Op0.getValueType();
1693 EVT RetVT = Op.getValueType();
1694 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1695 MakeLibCallOptions CallOptions;
1696 CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1697 SDValue Chain = SDValue();
1699 std::tie(Result, Chain) =
1700 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1701 return Result;
1702}
1703
1704SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1705 SelectionDAG &DAG) const {
1706
1707 SDLoc DL(Op);
1708 SDValue Op0 = Op.getOperand(0);
1709
1710 if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1711 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1712 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1713 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1714 }
1715 return Op;
1716}
1717
1718SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1719 SelectionDAG &DAG) const {
1720
1721 SDLoc DL(Op);
1722 SDValue Op0 = Op.getOperand(0);
1723
1724 if (Op0.getValueType() == MVT::f16)
1725 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
1726
1727 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1728 !Subtarget.hasBasicD()) {
1729 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
1730 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1731 }
1732
1733 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1734 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
1735 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1736}
1737
1739 SelectionDAG &DAG, unsigned Flags) {
1740 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1741}
1742
1744 SelectionDAG &DAG, unsigned Flags) {
1745 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1746 Flags);
1747}
1748
1750 SelectionDAG &DAG, unsigned Flags) {
1751 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1752 N->getOffset(), Flags);
1753}
1754
1756 SelectionDAG &DAG, unsigned Flags) {
1757 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1758}
1759
1760template <class NodeTy>
1761SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1763 bool IsLocal) const {
1764 SDLoc DL(N);
1765 EVT Ty = getPointerTy(DAG.getDataLayout());
1766 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1767 SDValue Load;
1768
1769 switch (M) {
1770 default:
1771 report_fatal_error("Unsupported code model");
1772
1773 case CodeModel::Large: {
1774 assert(Subtarget.is64Bit() && "Large code model requires LA64");
1775
1776 // This is not actually used, but is necessary for successfully matching
1777 // the PseudoLA_*_LARGE nodes.
1778 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1779 if (IsLocal) {
1780 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1781 // eventually becomes the desired 5-insn code sequence.
1782 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1783 Tmp, Addr),
1784 0);
1785 } else {
1786 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1787 // eventually becomes the desired 5-insn code sequence.
1788 Load = SDValue(
1789 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1790 0);
1791 }
1792 break;
1793 }
1794
1795 case CodeModel::Small:
1796 case CodeModel::Medium:
1797 if (IsLocal) {
1798 // This generates the pattern (PseudoLA_PCREL sym), which expands to
1799 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1800 Load = SDValue(
1801 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1802 } else {
1803 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1804 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1805 Load =
1806 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1807 }
1808 }
1809
1810 if (!IsLocal) {
1811 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1817 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1818 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1819 }
1820
1821 return Load;
1822}
1823
1824SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1825 SelectionDAG &DAG) const {
1826 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1827 DAG.getTarget().getCodeModel());
1828}
1829
1830SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1831 SelectionDAG &DAG) const {
1832 return getAddr(cast<JumpTableSDNode>(Op), DAG,
1833 DAG.getTarget().getCodeModel());
1834}
1835
1836SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1837 SelectionDAG &DAG) const {
1838 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1839 DAG.getTarget().getCodeModel());
1840}
1841
1842SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1843 SelectionDAG &DAG) const {
1844 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1845 assert(N->getOffset() == 0 && "unexpected offset in global node");
1846 auto CM = DAG.getTarget().getCodeModel();
1847 const GlobalValue *GV = N->getGlobal();
1848
1849 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1850 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1851 CM = *GCM;
1852 }
1853
1854 return getAddr(N, DAG, CM, GV->isDSOLocal());
1855}
1856
1857SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1858 SelectionDAG &DAG,
1859 unsigned Opc, bool UseGOT,
1860 bool Large) const {
1861 SDLoc DL(N);
1862 EVT Ty = getPointerTy(DAG.getDataLayout());
1863 MVT GRLenVT = Subtarget.getGRLenVT();
1864
1865 // This is not actually used, but is necessary for successfully matching the
1866 // PseudoLA_*_LARGE nodes.
1867 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1868 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1869
1870 // Only IE needs an extra argument for large code model.
1871 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
1872 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1873 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1874
1875 // If it is LE for normal/medium code model, the add tp operation will occur
1876 // during the pseudo-instruction expansion.
1877 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
1878 return Offset;
1879
1880 if (UseGOT) {
1881 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
1887 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1888 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1889 }
1890
1891 // Add the thread pointer.
1892 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1893 DAG.getRegister(LoongArch::R2, GRLenVT));
1894}
1895
1896SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1897 SelectionDAG &DAG,
1898 unsigned Opc,
1899 bool Large) const {
1900 SDLoc DL(N);
1901 EVT Ty = getPointerTy(DAG.getDataLayout());
1902 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1903
1904 // This is not actually used, but is necessary for successfully matching the
1905 // PseudoLA_*_LARGE nodes.
1906 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1907
1908 // Use a PC-relative addressing mode to access the dynamic GOT address.
1909 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1910 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1911 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1912
1913 // Prepare argument list to generate call.
1915 ArgListEntry Entry;
1916 Entry.Node = Load;
1917 Entry.Ty = CallTy;
1918 Args.push_back(Entry);
1919
1920 // Setup call to __tls_get_addr.
1922 CLI.setDebugLoc(DL)
1923 .setChain(DAG.getEntryNode())
1924 .setLibCallee(CallingConv::C, CallTy,
1925 DAG.getExternalSymbol("__tls_get_addr", Ty),
1926 std::move(Args));
1927
1928 return LowerCallTo(CLI).first;
1929}
1930
1931SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1932 SelectionDAG &DAG, unsigned Opc,
1933 bool Large) const {
1934 SDLoc DL(N);
1935 EVT Ty = getPointerTy(DAG.getDataLayout());
1936 const GlobalValue *GV = N->getGlobal();
1937
1938 // This is not actually used, but is necessary for successfully matching the
1939 // PseudoLA_*_LARGE nodes.
1940 SDValue Tmp = DAG.getConstant(0, DL, Ty);
1941
1942 // Use a PC-relative addressing mode to access the global dynamic GOT address.
1943 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1944 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1945 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1946 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1947}
1948
1949SDValue
1950LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1951 SelectionDAG &DAG) const {
1954 report_fatal_error("In GHC calling convention TLS is not supported");
1955
1956 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1957 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1958
1959 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1960 assert(N->getOffset() == 0 && "unexpected offset in global node");
1961
1962 if (DAG.getTarget().useEmulatedTLS())
1963 report_fatal_error("the emulated TLS is prohibited",
1964 /*GenCrashDiag=*/false);
1965
1966 bool IsDesc = DAG.getTarget().useTLSDESC();
1967
1968 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1970 // In this model, application code calls the dynamic linker function
1971 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1972 // runtime.
1973 if (!IsDesc)
1974 return getDynamicTLSAddr(N, DAG,
1975 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1976 : LoongArch::PseudoLA_TLS_GD,
1977 Large);
1978 break;
1980 // Same as GeneralDynamic, except for assembly modifiers and relocation
1981 // records.
1982 if (!IsDesc)
1983 return getDynamicTLSAddr(N, DAG,
1984 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1985 : LoongArch::PseudoLA_TLS_LD,
1986 Large);
1987 break;
1989 // This model uses the GOT to resolve TLS offsets.
1990 return getStaticTLSAddr(N, DAG,
1991 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1992 : LoongArch::PseudoLA_TLS_IE,
1993 /*UseGOT=*/true, Large);
1995 // This model is used when static linking as the TLS offsets are resolved
1996 // during program linking.
1997 //
1998 // This node doesn't need an extra argument for the large code model.
1999 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
2000 /*UseGOT=*/false, Large);
2001 }
2002
2003 return getTLSDescAddr(N, DAG,
2004 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
2005 : LoongArch::PseudoLA_TLS_DESC,
2006 Large);
2007}
2008
2009template <unsigned N>
2011 SelectionDAG &DAG, bool IsSigned = false) {
2012 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
2013 // Check the ImmArg.
2014 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2015 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2016 DAG.getContext()->emitError(Op->getOperationName(0) +
2017 ": argument out of range.");
2018 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
2019 }
2020 return SDValue();
2021}
2022
2023SDValue
2024LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
2025 SelectionDAG &DAG) const {
2026 SDLoc DL(Op);
2027 switch (Op.getConstantOperandVal(0)) {
2028 default:
2029 return SDValue(); // Don't custom lower most intrinsics.
2030 case Intrinsic::thread_pointer: {
2031 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2032 return DAG.getRegister(LoongArch::R2, PtrVT);
2033 }
2034 case Intrinsic::loongarch_lsx_vpickve2gr_d:
2035 case Intrinsic::loongarch_lsx_vpickve2gr_du:
2036 case Intrinsic::loongarch_lsx_vreplvei_d:
2037 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
2038 return checkIntrinsicImmArg<1>(Op, 2, DAG);
2039 case Intrinsic::loongarch_lsx_vreplvei_w:
2040 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
2041 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
2042 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
2043 case Intrinsic::loongarch_lasx_xvpickve_d:
2044 case Intrinsic::loongarch_lasx_xvpickve_d_f:
2045 return checkIntrinsicImmArg<2>(Op, 2, DAG);
2046 case Intrinsic::loongarch_lasx_xvinsve0_d:
2047 return checkIntrinsicImmArg<2>(Op, 3, DAG);
2048 case Intrinsic::loongarch_lsx_vsat_b:
2049 case Intrinsic::loongarch_lsx_vsat_bu:
2050 case Intrinsic::loongarch_lsx_vrotri_b:
2051 case Intrinsic::loongarch_lsx_vsllwil_h_b:
2052 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
2053 case Intrinsic::loongarch_lsx_vsrlri_b:
2054 case Intrinsic::loongarch_lsx_vsrari_b:
2055 case Intrinsic::loongarch_lsx_vreplvei_h:
2056 case Intrinsic::loongarch_lasx_xvsat_b:
2057 case Intrinsic::loongarch_lasx_xvsat_bu:
2058 case Intrinsic::loongarch_lasx_xvrotri_b:
2059 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
2060 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
2061 case Intrinsic::loongarch_lasx_xvsrlri_b:
2062 case Intrinsic::loongarch_lasx_xvsrari_b:
2063 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
2064 case Intrinsic::loongarch_lasx_xvpickve_w:
2065 case Intrinsic::loongarch_lasx_xvpickve_w_f:
2066 return checkIntrinsicImmArg<3>(Op, 2, DAG);
2067 case Intrinsic::loongarch_lasx_xvinsve0_w:
2068 return checkIntrinsicImmArg<3>(Op, 3, DAG);
2069 case Intrinsic::loongarch_lsx_vsat_h:
2070 case Intrinsic::loongarch_lsx_vsat_hu:
2071 case Intrinsic::loongarch_lsx_vrotri_h:
2072 case Intrinsic::loongarch_lsx_vsllwil_w_h:
2073 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
2074 case Intrinsic::loongarch_lsx_vsrlri_h:
2075 case Intrinsic::loongarch_lsx_vsrari_h:
2076 case Intrinsic::loongarch_lsx_vreplvei_b:
2077 case Intrinsic::loongarch_lasx_xvsat_h:
2078 case Intrinsic::loongarch_lasx_xvsat_hu:
2079 case Intrinsic::loongarch_lasx_xvrotri_h:
2080 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2081 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2082 case Intrinsic::loongarch_lasx_xvsrlri_h:
2083 case Intrinsic::loongarch_lasx_xvsrari_h:
2084 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2085 return checkIntrinsicImmArg<4>(Op, 2, DAG);
2086 case Intrinsic::loongarch_lsx_vsrlni_b_h:
2087 case Intrinsic::loongarch_lsx_vsrani_b_h:
2088 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2089 case Intrinsic::loongarch_lsx_vsrarni_b_h:
2090 case Intrinsic::loongarch_lsx_vssrlni_b_h:
2091 case Intrinsic::loongarch_lsx_vssrani_b_h:
2092 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2093 case Intrinsic::loongarch_lsx_vssrani_bu_h:
2094 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2095 case Intrinsic::loongarch_lsx_vssrarni_b_h:
2096 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2097 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2098 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2099 case Intrinsic::loongarch_lasx_xvsrani_b_h:
2100 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2101 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2102 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2103 case Intrinsic::loongarch_lasx_xvssrani_b_h:
2104 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2105 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2106 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2107 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2108 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2109 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2110 return checkIntrinsicImmArg<4>(Op, 3, DAG);
2111 case Intrinsic::loongarch_lsx_vsat_w:
2112 case Intrinsic::loongarch_lsx_vsat_wu:
2113 case Intrinsic::loongarch_lsx_vrotri_w:
2114 case Intrinsic::loongarch_lsx_vsllwil_d_w:
2115 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2116 case Intrinsic::loongarch_lsx_vsrlri_w:
2117 case Intrinsic::loongarch_lsx_vsrari_w:
2118 case Intrinsic::loongarch_lsx_vslei_bu:
2119 case Intrinsic::loongarch_lsx_vslei_hu:
2120 case Intrinsic::loongarch_lsx_vslei_wu:
2121 case Intrinsic::loongarch_lsx_vslei_du:
2122 case Intrinsic::loongarch_lsx_vslti_bu:
2123 case Intrinsic::loongarch_lsx_vslti_hu:
2124 case Intrinsic::loongarch_lsx_vslti_wu:
2125 case Intrinsic::loongarch_lsx_vslti_du:
2126 case Intrinsic::loongarch_lsx_vbsll_v:
2127 case Intrinsic::loongarch_lsx_vbsrl_v:
2128 case Intrinsic::loongarch_lasx_xvsat_w:
2129 case Intrinsic::loongarch_lasx_xvsat_wu:
2130 case Intrinsic::loongarch_lasx_xvrotri_w:
2131 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2132 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2133 case Intrinsic::loongarch_lasx_xvsrlri_w:
2134 case Intrinsic::loongarch_lasx_xvsrari_w:
2135 case Intrinsic::loongarch_lasx_xvslei_bu:
2136 case Intrinsic::loongarch_lasx_xvslei_hu:
2137 case Intrinsic::loongarch_lasx_xvslei_wu:
2138 case Intrinsic::loongarch_lasx_xvslei_du:
2139 case Intrinsic::loongarch_lasx_xvslti_bu:
2140 case Intrinsic::loongarch_lasx_xvslti_hu:
2141 case Intrinsic::loongarch_lasx_xvslti_wu:
2142 case Intrinsic::loongarch_lasx_xvslti_du:
2143 case Intrinsic::loongarch_lasx_xvbsll_v:
2144 case Intrinsic::loongarch_lasx_xvbsrl_v:
2145 return checkIntrinsicImmArg<5>(Op, 2, DAG);
2146 case Intrinsic::loongarch_lsx_vseqi_b:
2147 case Intrinsic::loongarch_lsx_vseqi_h:
2148 case Intrinsic::loongarch_lsx_vseqi_w:
2149 case Intrinsic::loongarch_lsx_vseqi_d:
2150 case Intrinsic::loongarch_lsx_vslei_b:
2151 case Intrinsic::loongarch_lsx_vslei_h:
2152 case Intrinsic::loongarch_lsx_vslei_w:
2153 case Intrinsic::loongarch_lsx_vslei_d:
2154 case Intrinsic::loongarch_lsx_vslti_b:
2155 case Intrinsic::loongarch_lsx_vslti_h:
2156 case Intrinsic::loongarch_lsx_vslti_w:
2157 case Intrinsic::loongarch_lsx_vslti_d:
2158 case Intrinsic::loongarch_lasx_xvseqi_b:
2159 case Intrinsic::loongarch_lasx_xvseqi_h:
2160 case Intrinsic::loongarch_lasx_xvseqi_w:
2161 case Intrinsic::loongarch_lasx_xvseqi_d:
2162 case Intrinsic::loongarch_lasx_xvslei_b:
2163 case Intrinsic::loongarch_lasx_xvslei_h:
2164 case Intrinsic::loongarch_lasx_xvslei_w:
2165 case Intrinsic::loongarch_lasx_xvslei_d:
2166 case Intrinsic::loongarch_lasx_xvslti_b:
2167 case Intrinsic::loongarch_lasx_xvslti_h:
2168 case Intrinsic::loongarch_lasx_xvslti_w:
2169 case Intrinsic::loongarch_lasx_xvslti_d:
2170 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2171 case Intrinsic::loongarch_lsx_vsrlni_h_w:
2172 case Intrinsic::loongarch_lsx_vsrani_h_w:
2173 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2174 case Intrinsic::loongarch_lsx_vsrarni_h_w:
2175 case Intrinsic::loongarch_lsx_vssrlni_h_w:
2176 case Intrinsic::loongarch_lsx_vssrani_h_w:
2177 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2178 case Intrinsic::loongarch_lsx_vssrani_hu_w:
2179 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2180 case Intrinsic::loongarch_lsx_vssrarni_h_w:
2181 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2182 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2183 case Intrinsic::loongarch_lsx_vfrstpi_b:
2184 case Intrinsic::loongarch_lsx_vfrstpi_h:
2185 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2186 case Intrinsic::loongarch_lasx_xvsrani_h_w:
2187 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2188 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2189 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2190 case Intrinsic::loongarch_lasx_xvssrani_h_w:
2191 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2192 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2193 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2194 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2195 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2196 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2197 case Intrinsic::loongarch_lasx_xvfrstpi_b:
2198 case Intrinsic::loongarch_lasx_xvfrstpi_h:
2199 return checkIntrinsicImmArg<5>(Op, 3, DAG);
2200 case Intrinsic::loongarch_lsx_vsat_d:
2201 case Intrinsic::loongarch_lsx_vsat_du:
2202 case Intrinsic::loongarch_lsx_vrotri_d:
2203 case Intrinsic::loongarch_lsx_vsrlri_d:
2204 case Intrinsic::loongarch_lsx_vsrari_d:
2205 case Intrinsic::loongarch_lasx_xvsat_d:
2206 case Intrinsic::loongarch_lasx_xvsat_du:
2207 case Intrinsic::loongarch_lasx_xvrotri_d:
2208 case Intrinsic::loongarch_lasx_xvsrlri_d:
2209 case Intrinsic::loongarch_lasx_xvsrari_d:
2210 return checkIntrinsicImmArg<6>(Op, 2, DAG);
2211 case Intrinsic::loongarch_lsx_vsrlni_w_d:
2212 case Intrinsic::loongarch_lsx_vsrani_w_d:
2213 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2214 case Intrinsic::loongarch_lsx_vsrarni_w_d:
2215 case Intrinsic::loongarch_lsx_vssrlni_w_d:
2216 case Intrinsic::loongarch_lsx_vssrani_w_d:
2217 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2218 case Intrinsic::loongarch_lsx_vssrani_wu_d:
2219 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2220 case Intrinsic::loongarch_lsx_vssrarni_w_d:
2221 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2222 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2223 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2224 case Intrinsic::loongarch_lasx_xvsrani_w_d:
2225 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2226 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2227 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2228 case Intrinsic::loongarch_lasx_xvssrani_w_d:
2229 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2230 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2231 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2232 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2233 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2234 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2235 return checkIntrinsicImmArg<6>(Op, 3, DAG);
2236 case Intrinsic::loongarch_lsx_vsrlni_d_q:
2237 case Intrinsic::loongarch_lsx_vsrani_d_q:
2238 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2239 case Intrinsic::loongarch_lsx_vsrarni_d_q:
2240 case Intrinsic::loongarch_lsx_vssrlni_d_q:
2241 case Intrinsic::loongarch_lsx_vssrani_d_q:
2242 case Intrinsic::loongarch_lsx_vssrlni_du_q:
2243 case Intrinsic::loongarch_lsx_vssrani_du_q:
2244 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2245 case Intrinsic::loongarch_lsx_vssrarni_d_q:
2246 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2247 case Intrinsic::loongarch_lsx_vssrarni_du_q:
2248 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2249 case Intrinsic::loongarch_lasx_xvsrani_d_q:
2250 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2251 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2252 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2253 case Intrinsic::loongarch_lasx_xvssrani_d_q:
2254 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2255 case Intrinsic::loongarch_lasx_xvssrani_du_q:
2256 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2257 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2258 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2259 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2260 return checkIntrinsicImmArg<7>(Op, 3, DAG);
2261 case Intrinsic::loongarch_lsx_vnori_b:
2262 case Intrinsic::loongarch_lsx_vshuf4i_b:
2263 case Intrinsic::loongarch_lsx_vshuf4i_h:
2264 case Intrinsic::loongarch_lsx_vshuf4i_w:
2265 case Intrinsic::loongarch_lasx_xvnori_b:
2266 case Intrinsic::loongarch_lasx_xvshuf4i_b:
2267 case Intrinsic::loongarch_lasx_xvshuf4i_h:
2268 case Intrinsic::loongarch_lasx_xvshuf4i_w:
2269 case Intrinsic::loongarch_lasx_xvpermi_d:
2270 return checkIntrinsicImmArg<8>(Op, 2, DAG);
2271 case Intrinsic::loongarch_lsx_vshuf4i_d:
2272 case Intrinsic::loongarch_lsx_vpermi_w:
2273 case Intrinsic::loongarch_lsx_vbitseli_b:
2274 case Intrinsic::loongarch_lsx_vextrins_b:
2275 case Intrinsic::loongarch_lsx_vextrins_h:
2276 case Intrinsic::loongarch_lsx_vextrins_w:
2277 case Intrinsic::loongarch_lsx_vextrins_d:
2278 case Intrinsic::loongarch_lasx_xvshuf4i_d:
2279 case Intrinsic::loongarch_lasx_xvpermi_w:
2280 case Intrinsic::loongarch_lasx_xvpermi_q:
2281 case Intrinsic::loongarch_lasx_xvbitseli_b:
2282 case Intrinsic::loongarch_lasx_xvextrins_b:
2283 case Intrinsic::loongarch_lasx_xvextrins_h:
2284 case Intrinsic::loongarch_lasx_xvextrins_w:
2285 case Intrinsic::loongarch_lasx_xvextrins_d:
2286 return checkIntrinsicImmArg<8>(Op, 3, DAG);
2287 case Intrinsic::loongarch_lsx_vrepli_b:
2288 case Intrinsic::loongarch_lsx_vrepli_h:
2289 case Intrinsic::loongarch_lsx_vrepli_w:
2290 case Intrinsic::loongarch_lsx_vrepli_d:
2291 case Intrinsic::loongarch_lasx_xvrepli_b:
2292 case Intrinsic::loongarch_lasx_xvrepli_h:
2293 case Intrinsic::loongarch_lasx_xvrepli_w:
2294 case Intrinsic::loongarch_lasx_xvrepli_d:
2295 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2296 case Intrinsic::loongarch_lsx_vldi:
2297 case Intrinsic::loongarch_lasx_xvldi:
2298 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2299 }
2300}
2301
2302// Helper function that emits error message for intrinsics with chain and return
2303// merge values of a UNDEF and the chain.
2305 StringRef ErrorMsg,
2306 SelectionDAG &DAG) {
2307 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2308 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2309 SDLoc(Op));
2310}
2311
2312SDValue
2313LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2314 SelectionDAG &DAG) const {
2315 SDLoc DL(Op);
2316 MVT GRLenVT = Subtarget.getGRLenVT();
2317 EVT VT = Op.getValueType();
2318 SDValue Chain = Op.getOperand(0);
2319 const StringRef ErrorMsgOOR = "argument out of range";
2320 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2321 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2322
2323 switch (Op.getConstantOperandVal(1)) {
2324 default:
2325 return Op;
2326 case Intrinsic::loongarch_crc_w_b_w:
2327 case Intrinsic::loongarch_crc_w_h_w:
2328 case Intrinsic::loongarch_crc_w_w_w:
2329 case Intrinsic::loongarch_crc_w_d_w:
2330 case Intrinsic::loongarch_crcc_w_b_w:
2331 case Intrinsic::loongarch_crcc_w_h_w:
2332 case Intrinsic::loongarch_crcc_w_w_w:
2333 case Intrinsic::loongarch_crcc_w_d_w:
2334 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2335 case Intrinsic::loongarch_csrrd_w:
2336 case Intrinsic::loongarch_csrrd_d: {
2337 unsigned Imm = Op.getConstantOperandVal(2);
2338 return !isUInt<14>(Imm)
2339 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2340 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2341 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2342 }
2343 case Intrinsic::loongarch_csrwr_w:
2344 case Intrinsic::loongarch_csrwr_d: {
2345 unsigned Imm = Op.getConstantOperandVal(3);
2346 return !isUInt<14>(Imm)
2347 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2348 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2349 {Chain, Op.getOperand(2),
2350 DAG.getConstant(Imm, DL, GRLenVT)});
2351 }
2352 case Intrinsic::loongarch_csrxchg_w:
2353 case Intrinsic::loongarch_csrxchg_d: {
2354 unsigned Imm = Op.getConstantOperandVal(4);
2355 return !isUInt<14>(Imm)
2356 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2357 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2358 {Chain, Op.getOperand(2), Op.getOperand(3),
2359 DAG.getConstant(Imm, DL, GRLenVT)});
2360 }
2361 case Intrinsic::loongarch_iocsrrd_d: {
2362 return DAG.getNode(
2363 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2364 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2365 }
2366#define IOCSRRD_CASE(NAME, NODE) \
2367 case Intrinsic::loongarch_##NAME: { \
2368 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2369 {Chain, Op.getOperand(2)}); \
2370 }
2371 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2372 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2373 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2374#undef IOCSRRD_CASE
2375 case Intrinsic::loongarch_cpucfg: {
2376 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2377 {Chain, Op.getOperand(2)});
2378 }
2379 case Intrinsic::loongarch_lddir_d: {
2380 unsigned Imm = Op.getConstantOperandVal(3);
2381 return !isUInt<8>(Imm)
2382 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2383 : Op;
2384 }
2385 case Intrinsic::loongarch_movfcsr2gr: {
2386 if (!Subtarget.hasBasicF())
2387 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2388 unsigned Imm = Op.getConstantOperandVal(2);
2389 return !isUInt<2>(Imm)
2390 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2391 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2392 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2393 }
2394 case Intrinsic::loongarch_lsx_vld:
2395 case Intrinsic::loongarch_lsx_vldrepl_b:
2396 case Intrinsic::loongarch_lasx_xvld:
2397 case Intrinsic::loongarch_lasx_xvldrepl_b:
2398 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2399 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2400 : SDValue();
2401 case Intrinsic::loongarch_lsx_vldrepl_h:
2402 case Intrinsic::loongarch_lasx_xvldrepl_h:
2403 return !isShiftedInt<11, 1>(
2404 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2406 Op, "argument out of range or not a multiple of 2", DAG)
2407 : SDValue();
2408 case Intrinsic::loongarch_lsx_vldrepl_w:
2409 case Intrinsic::loongarch_lasx_xvldrepl_w:
2410 return !isShiftedInt<10, 2>(
2411 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2413 Op, "argument out of range or not a multiple of 4", DAG)
2414 : SDValue();
2415 case Intrinsic::loongarch_lsx_vldrepl_d:
2416 case Intrinsic::loongarch_lasx_xvldrepl_d:
2417 return !isShiftedInt<9, 3>(
2418 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2420 Op, "argument out of range or not a multiple of 8", DAG)
2421 : SDValue();
2422 }
2423}
2424
2425// Helper function that emits error message for intrinsics with void return
2426// value and return the chain.
2428 SelectionDAG &DAG) {
2429
2430 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2431 return Op.getOperand(0);
2432}
2433
2434SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2435 SelectionDAG &DAG) const {
2436 SDLoc DL(Op);
2437 MVT GRLenVT = Subtarget.getGRLenVT();
2438 SDValue Chain = Op.getOperand(0);
2439 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2440 SDValue Op2 = Op.getOperand(2);
2441 const StringRef ErrorMsgOOR = "argument out of range";
2442 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2443 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2444 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2445
2446 switch (IntrinsicEnum) {
2447 default:
2448 // TODO: Add more Intrinsics.
2449 return SDValue();
2450 case Intrinsic::loongarch_cacop_d:
2451 case Intrinsic::loongarch_cacop_w: {
2452 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2453 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2454 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2455 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2456 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2457 unsigned Imm1 = Op2->getAsZExtVal();
2458 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2459 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2460 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2461 return Op;
2462 }
2463 case Intrinsic::loongarch_dbar: {
2464 unsigned Imm = Op2->getAsZExtVal();
2465 return !isUInt<15>(Imm)
2466 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2467 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2468 DAG.getConstant(Imm, DL, GRLenVT));
2469 }
2470 case Intrinsic::loongarch_ibar: {
2471 unsigned Imm = Op2->getAsZExtVal();
2472 return !isUInt<15>(Imm)
2473 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2474 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2475 DAG.getConstant(Imm, DL, GRLenVT));
2476 }
2477 case Intrinsic::loongarch_break: {
2478 unsigned Imm = Op2->getAsZExtVal();
2479 return !isUInt<15>(Imm)
2480 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2481 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2482 DAG.getConstant(Imm, DL, GRLenVT));
2483 }
2484 case Intrinsic::loongarch_movgr2fcsr: {
2485 if (!Subtarget.hasBasicF())
2486 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2487 unsigned Imm = Op2->getAsZExtVal();
2488 return !isUInt<2>(Imm)
2489 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2490 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2491 DAG.getConstant(Imm, DL, GRLenVT),
2492 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2493 Op.getOperand(3)));
2494 }
2495 case Intrinsic::loongarch_syscall: {
2496 unsigned Imm = Op2->getAsZExtVal();
2497 return !isUInt<15>(Imm)
2498 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2499 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2500 DAG.getConstant(Imm, DL, GRLenVT));
2501 }
2502#define IOCSRWR_CASE(NAME, NODE) \
2503 case Intrinsic::loongarch_##NAME: { \
2504 SDValue Op3 = Op.getOperand(3); \
2505 return Subtarget.is64Bit() \
2506 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2507 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2508 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2509 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2510 Op3); \
2511 }
2512 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2513 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2514 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2515#undef IOCSRWR_CASE
2516 case Intrinsic::loongarch_iocsrwr_d: {
2517 return !Subtarget.is64Bit()
2518 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2519 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2520 Op2,
2521 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2522 Op.getOperand(3)));
2523 }
2524#define ASRT_LE_GT_CASE(NAME) \
2525 case Intrinsic::loongarch_##NAME: { \
2526 return !Subtarget.is64Bit() \
2527 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2528 : Op; \
2529 }
2530 ASRT_LE_GT_CASE(asrtle_d)
2531 ASRT_LE_GT_CASE(asrtgt_d)
2532#undef ASRT_LE_GT_CASE
2533 case Intrinsic::loongarch_ldpte_d: {
2534 unsigned Imm = Op.getConstantOperandVal(3);
2535 return !Subtarget.is64Bit()
2536 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2537 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2538 : Op;
2539 }
2540 case Intrinsic::loongarch_lsx_vst:
2541 case Intrinsic::loongarch_lasx_xvst:
2542 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2543 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2544 : SDValue();
2545 case Intrinsic::loongarch_lasx_xvstelm_b:
2546 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2547 !isUInt<5>(Op.getConstantOperandVal(5)))
2548 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2549 : SDValue();
2550 case Intrinsic::loongarch_lsx_vstelm_b:
2551 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2552 !isUInt<4>(Op.getConstantOperandVal(5)))
2553 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2554 : SDValue();
2555 case Intrinsic::loongarch_lasx_xvstelm_h:
2556 return (!isShiftedInt<8, 1>(
2557 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2558 !isUInt<4>(Op.getConstantOperandVal(5)))
2560 Op, "argument out of range or not a multiple of 2", DAG)
2561 : SDValue();
2562 case Intrinsic::loongarch_lsx_vstelm_h:
2563 return (!isShiftedInt<8, 1>(
2564 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2565 !isUInt<3>(Op.getConstantOperandVal(5)))
2567 Op, "argument out of range or not a multiple of 2", DAG)
2568 : SDValue();
2569 case Intrinsic::loongarch_lasx_xvstelm_w:
2570 return (!isShiftedInt<8, 2>(
2571 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2572 !isUInt<3>(Op.getConstantOperandVal(5)))
2574 Op, "argument out of range or not a multiple of 4", DAG)
2575 : SDValue();
2576 case Intrinsic::loongarch_lsx_vstelm_w:
2577 return (!isShiftedInt<8, 2>(
2578 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2579 !isUInt<2>(Op.getConstantOperandVal(5)))
2581 Op, "argument out of range or not a multiple of 4", DAG)
2582 : SDValue();
2583 case Intrinsic::loongarch_lasx_xvstelm_d:
2584 return (!isShiftedInt<8, 3>(
2585 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2586 !isUInt<2>(Op.getConstantOperandVal(5)))
2588 Op, "argument out of range or not a multiple of 8", DAG)
2589 : SDValue();
2590 case Intrinsic::loongarch_lsx_vstelm_d:
2591 return (!isShiftedInt<8, 3>(
2592 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2593 !isUInt<1>(Op.getConstantOperandVal(5)))
2595 Op, "argument out of range or not a multiple of 8", DAG)
2596 : SDValue();
2597 }
2598}
2599
2600SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2601 SelectionDAG &DAG) const {
2602 SDLoc DL(Op);
2603 SDValue Lo = Op.getOperand(0);
2604 SDValue Hi = Op.getOperand(1);
2605 SDValue Shamt = Op.getOperand(2);
2606 EVT VT = Lo.getValueType();
2607
2608 // if Shamt-GRLen < 0: // Shamt < GRLen
2609 // Lo = Lo << Shamt
2610 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2611 // else:
2612 // Lo = 0
2613 // Hi = Lo << (Shamt-GRLen)
2614
2615 SDValue Zero = DAG.getConstant(0, DL, VT);
2616 SDValue One = DAG.getConstant(1, DL, VT);
2617 SDValue MinusGRLen =
2618 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2619 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2620 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2621 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2622
2623 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2624 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2625 SDValue ShiftRightLo =
2626 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2627 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2628 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2629 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2630
2631 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2632
2633 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2634 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2635
2636 SDValue Parts[2] = {Lo, Hi};
2637 return DAG.getMergeValues(Parts, DL);
2638}
2639
2640SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2641 SelectionDAG &DAG,
2642 bool IsSRA) const {
2643 SDLoc DL(Op);
2644 SDValue Lo = Op.getOperand(0);
2645 SDValue Hi = Op.getOperand(1);
2646 SDValue Shamt = Op.getOperand(2);
2647 EVT VT = Lo.getValueType();
2648
2649 // SRA expansion:
2650 // if Shamt-GRLen < 0: // Shamt < GRLen
2651 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2652 // Hi = Hi >>s Shamt
2653 // else:
2654 // Lo = Hi >>s (Shamt-GRLen);
2655 // Hi = Hi >>s (GRLen-1)
2656 //
2657 // SRL expansion:
2658 // if Shamt-GRLen < 0: // Shamt < GRLen
2659 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2660 // Hi = Hi >>u Shamt
2661 // else:
2662 // Lo = Hi >>u (Shamt-GRLen);
2663 // Hi = 0;
2664
2665 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2666
2667 SDValue Zero = DAG.getConstant(0, DL, VT);
2668 SDValue One = DAG.getConstant(1, DL, VT);
2669 SDValue MinusGRLen =
2670 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
2671 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2672 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2673 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2674
2675 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2676 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2677 SDValue ShiftLeftHi =
2678 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2679 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2680 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2681 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2682 SDValue HiFalse =
2683 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2684
2685 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2686
2687 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2688 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2689
2690 SDValue Parts[2] = {Lo, Hi};
2691 return DAG.getMergeValues(Parts, DL);
2692}
2693
2694// Returns the opcode of the target-specific SDNode that implements the 32-bit
2695// form of the given Opcode.
2697 switch (Opcode) {
2698 default:
2699 llvm_unreachable("Unexpected opcode");
2700 case ISD::SDIV:
2701 return LoongArchISD::DIV_W;
2702 case ISD::UDIV:
2703 return LoongArchISD::DIV_WU;
2704 case ISD::SREM:
2705 return LoongArchISD::MOD_W;
2706 case ISD::UREM:
2707 return LoongArchISD::MOD_WU;
2708 case ISD::SHL:
2709 return LoongArchISD::SLL_W;
2710 case ISD::SRA:
2711 return LoongArchISD::SRA_W;
2712 case ISD::SRL:
2713 return LoongArchISD::SRL_W;
2714 case ISD::ROTL:
2715 case ISD::ROTR:
2716 return LoongArchISD::ROTR_W;
2717 case ISD::CTTZ:
2718 return LoongArchISD::CTZ_W;
2719 case ISD::CTLZ:
2720 return LoongArchISD::CLZ_W;
2721 }
2722}
2723
2724// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2725// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2726// otherwise be promoted to i64, making it difficult to select the
2727// SLL_W/.../*W later one because the fact the operation was originally of
2728// type i8/i16/i32 is lost.
2730 unsigned ExtOpc = ISD::ANY_EXTEND) {
2731 SDLoc DL(N);
2732 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2733 SDValue NewOp0, NewRes;
2734
2735 switch (NumOp) {
2736 default:
2737 llvm_unreachable("Unexpected NumOp");
2738 case 1: {
2739 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2740 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2741 break;
2742 }
2743 case 2: {
2744 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2745 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2746 if (N->getOpcode() == ISD::ROTL) {
2747 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2748 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2749 }
2750 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2751 break;
2752 }
2753 // TODO:Handle more NumOp.
2754 }
2755
2756 // ReplaceNodeResults requires we maintain the same type for the return
2757 // value.
2758 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2759}
2760
2761// Converts the given 32-bit operation to a i64 operation with signed extension
2762// semantic to reduce the signed extension instructions.
2764 SDLoc DL(N);
2765 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2766 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2767 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2768 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2769 DAG.getValueType(MVT::i32));
2770 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2771}
2772
2773// Helper function that emits error message for intrinsics with/without chain
2774// and return a UNDEF or and the chain as the results.
2777 StringRef ErrorMsg, bool WithChain = true) {
2778 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2779 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2780 if (!WithChain)
2781 return;
2782 Results.push_back(N->getOperand(0));
2783}
2784
2785template <unsigned N>
2786static void
2788 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2789 unsigned ResOp) {
2790 const StringRef ErrorMsgOOR = "argument out of range";
2791 unsigned Imm = Node->getConstantOperandVal(2);
2792 if (!isUInt<N>(Imm)) {
2794 /*WithChain=*/false);
2795 return;
2796 }
2797 SDLoc DL(Node);
2798 SDValue Vec = Node->getOperand(1);
2799
2800 SDValue PickElt =
2801 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2802 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2804 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2805 PickElt.getValue(0)));
2806}
2807
2810 SelectionDAG &DAG,
2811 const LoongArchSubtarget &Subtarget,
2812 unsigned ResOp) {
2813 SDLoc DL(N);
2814 SDValue Vec = N->getOperand(1);
2815
2816 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2817 Results.push_back(
2818 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2819}
2820
2821static void
2823 SelectionDAG &DAG,
2824 const LoongArchSubtarget &Subtarget) {
2825 switch (N->getConstantOperandVal(0)) {
2826 default:
2827 llvm_unreachable("Unexpected Intrinsic.");
2828 case Intrinsic::loongarch_lsx_vpickve2gr_b:
2829 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2831 break;
2832 case Intrinsic::loongarch_lsx_vpickve2gr_h:
2833 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2834 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2836 break;
2837 case Intrinsic::loongarch_lsx_vpickve2gr_w:
2838 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2840 break;
2841 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2842 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2844 break;
2845 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2846 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2847 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2849 break;
2850 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2851 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2853 break;
2854 case Intrinsic::loongarch_lsx_bz_b:
2855 case Intrinsic::loongarch_lsx_bz_h:
2856 case Intrinsic::loongarch_lsx_bz_w:
2857 case Intrinsic::loongarch_lsx_bz_d:
2858 case Intrinsic::loongarch_lasx_xbz_b:
2859 case Intrinsic::loongarch_lasx_xbz_h:
2860 case Intrinsic::loongarch_lasx_xbz_w:
2861 case Intrinsic::loongarch_lasx_xbz_d:
2862 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2864 break;
2865 case Intrinsic::loongarch_lsx_bz_v:
2866 case Intrinsic::loongarch_lasx_xbz_v:
2867 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2869 break;
2870 case Intrinsic::loongarch_lsx_bnz_b:
2871 case Intrinsic::loongarch_lsx_bnz_h:
2872 case Intrinsic::loongarch_lsx_bnz_w:
2873 case Intrinsic::loongarch_lsx_bnz_d:
2874 case Intrinsic::loongarch_lasx_xbnz_b:
2875 case Intrinsic::loongarch_lasx_xbnz_h:
2876 case Intrinsic::loongarch_lasx_xbnz_w:
2877 case Intrinsic::loongarch_lasx_xbnz_d:
2878 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2880 break;
2881 case Intrinsic::loongarch_lsx_bnz_v:
2882 case Intrinsic::loongarch_lasx_xbnz_v:
2883 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2885 break;
2886 }
2887}
2888
2891 SDLoc DL(N);
2892 EVT VT = N->getValueType(0);
2893 switch (N->getOpcode()) {
2894 default:
2895 llvm_unreachable("Don't know how to legalize this operation");
2896 case ISD::ADD:
2897 case ISD::SUB:
2898 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2899 "Unexpected custom legalisation");
2900 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2901 break;
2902 case ISD::SDIV:
2903 case ISD::UDIV:
2904 case ISD::SREM:
2905 case ISD::UREM:
2906 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2907 "Unexpected custom legalisation");
2908 Results.push_back(customLegalizeToWOp(N, DAG, 2,
2909 Subtarget.hasDiv32() && VT == MVT::i32
2911 : ISD::SIGN_EXTEND));
2912 break;
2913 case ISD::SHL:
2914 case ISD::SRA:
2915 case ISD::SRL:
2916 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2917 "Unexpected custom legalisation");
2918 if (N->getOperand(1).getOpcode() != ISD::Constant) {
2919 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2920 break;
2921 }
2922 break;
2923 case ISD::ROTL:
2924 case ISD::ROTR:
2925 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2926 "Unexpected custom legalisation");
2927 Results.push_back(customLegalizeToWOp(N, DAG, 2));
2928 break;
2929 case ISD::FP_TO_SINT: {
2930 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2931 "Unexpected custom legalisation");
2932 SDValue Src = N->getOperand(0);
2933 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2934 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2936 if (Src.getValueType() == MVT::f16)
2937 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
2938 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2939 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2940 return;
2941 }
2942 // If the FP type needs to be softened, emit a library call using the 'si'
2943 // version. If we left it to default legalization we'd end up with 'di'.
2944 RTLIB::Libcall LC;
2945 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2946 MakeLibCallOptions CallOptions;
2947 EVT OpVT = Src.getValueType();
2948 CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2949 SDValue Chain = SDValue();
2950 SDValue Result;
2951 std::tie(Result, Chain) =
2952 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2953 Results.push_back(Result);
2954 break;
2955 }
2956 case ISD::BITCAST: {
2957 SDValue Src = N->getOperand(0);
2958 EVT SrcVT = Src.getValueType();
2959 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2960 Subtarget.hasBasicF()) {
2961 SDValue Dst =
2962 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2963 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2964 }
2965 break;
2966 }
2967 case ISD::FP_TO_UINT: {
2968 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2969 "Unexpected custom legalisation");
2970 auto &TLI = DAG.getTargetLoweringInfo();
2971 SDValue Tmp1, Tmp2;
2972 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2973 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2974 break;
2975 }
2976 case ISD::BSWAP: {
2977 SDValue Src = N->getOperand(0);
2978 assert((VT == MVT::i16 || VT == MVT::i32) &&
2979 "Unexpected custom legalization");
2980 MVT GRLenVT = Subtarget.getGRLenVT();
2981 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2982 SDValue Tmp;
2983 switch (VT.getSizeInBits()) {
2984 default:
2985 llvm_unreachable("Unexpected operand width");
2986 case 16:
2987 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2988 break;
2989 case 32:
2990 // Only LA64 will get to here due to the size mismatch between VT and
2991 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2992 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2993 break;
2994 }
2995 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2996 break;
2997 }
2998 case ISD::BITREVERSE: {
2999 SDValue Src = N->getOperand(0);
3000 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
3001 "Unexpected custom legalization");
3002 MVT GRLenVT = Subtarget.getGRLenVT();
3003 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
3004 SDValue Tmp;
3005 switch (VT.getSizeInBits()) {
3006 default:
3007 llvm_unreachable("Unexpected operand width");
3008 case 8:
3009 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
3010 break;
3011 case 32:
3012 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
3013 break;
3014 }
3015 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
3016 break;
3017 }
3018 case ISD::CTLZ:
3019 case ISD::CTTZ: {
3020 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
3021 "Unexpected custom legalisation");
3022 Results.push_back(customLegalizeToWOp(N, DAG, 1));
3023 break;
3024 }
3026 SDValue Chain = N->getOperand(0);
3027 SDValue Op2 = N->getOperand(2);
3028 MVT GRLenVT = Subtarget.getGRLenVT();
3029 const StringRef ErrorMsgOOR = "argument out of range";
3030 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3031 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3032
3033 switch (N->getConstantOperandVal(1)) {
3034 default:
3035 llvm_unreachable("Unexpected Intrinsic.");
3036 case Intrinsic::loongarch_movfcsr2gr: {
3037 if (!Subtarget.hasBasicF()) {
3038 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
3039 return;
3040 }
3041 unsigned Imm = Op2->getAsZExtVal();
3042 if (!isUInt<2>(Imm)) {
3043 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3044 return;
3045 }
3046 SDValue MOVFCSR2GRResults = DAG.getNode(
3047 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
3048 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3049 Results.push_back(
3050 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
3051 Results.push_back(MOVFCSR2GRResults.getValue(1));
3052 break;
3053 }
3054#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
3055 case Intrinsic::loongarch_##NAME: { \
3056 SDValue NODE = DAG.getNode( \
3057 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3058 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3059 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3060 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3061 Results.push_back(NODE.getValue(1)); \
3062 break; \
3063 }
3064 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
3065 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
3066 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
3067 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
3068 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
3069 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
3070#undef CRC_CASE_EXT_BINARYOP
3071
3072#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
3073 case Intrinsic::loongarch_##NAME: { \
3074 SDValue NODE = DAG.getNode( \
3075 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3076 {Chain, Op2, \
3077 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
3078 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
3079 Results.push_back(NODE.getValue(1)); \
3080 break; \
3081 }
3082 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
3083 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
3084#undef CRC_CASE_EXT_UNARYOP
3085#define CSR_CASE(ID) \
3086 case Intrinsic::loongarch_##ID: { \
3087 if (!Subtarget.is64Bit()) \
3088 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3089 break; \
3090 }
3091 CSR_CASE(csrrd_d);
3092 CSR_CASE(csrwr_d);
3093 CSR_CASE(csrxchg_d);
3094 CSR_CASE(iocsrrd_d);
3095#undef CSR_CASE
3096 case Intrinsic::loongarch_csrrd_w: {
3097 unsigned Imm = Op2->getAsZExtVal();
3098 if (!isUInt<14>(Imm)) {
3099 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3100 return;
3101 }
3102 SDValue CSRRDResults =
3103 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3104 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3105 Results.push_back(
3106 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3107 Results.push_back(CSRRDResults.getValue(1));
3108 break;
3109 }
3110 case Intrinsic::loongarch_csrwr_w: {
3111 unsigned Imm = N->getConstantOperandVal(3);
3112 if (!isUInt<14>(Imm)) {
3113 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3114 return;
3115 }
3116 SDValue CSRWRResults =
3117 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3118 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3119 DAG.getConstant(Imm, DL, GRLenVT)});
3120 Results.push_back(
3121 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3122 Results.push_back(CSRWRResults.getValue(1));
3123 break;
3124 }
3125 case Intrinsic::loongarch_csrxchg_w: {
3126 unsigned Imm = N->getConstantOperandVal(4);
3127 if (!isUInt<14>(Imm)) {
3128 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3129 return;
3130 }
3131 SDValue CSRXCHGResults = DAG.getNode(
3132 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3133 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3134 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3135 DAG.getConstant(Imm, DL, GRLenVT)});
3136 Results.push_back(
3137 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3138 Results.push_back(CSRXCHGResults.getValue(1));
3139 break;
3140 }
3141#define IOCSRRD_CASE(NAME, NODE) \
3142 case Intrinsic::loongarch_##NAME: { \
3143 SDValue IOCSRRDResults = \
3144 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3145 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3146 Results.push_back( \
3147 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3148 Results.push_back(IOCSRRDResults.getValue(1)); \
3149 break; \
3150 }
3151 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3152 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3153 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3154#undef IOCSRRD_CASE
3155 case Intrinsic::loongarch_cpucfg: {
3156 SDValue CPUCFGResults =
3157 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3158 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3159 Results.push_back(
3160 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3161 Results.push_back(CPUCFGResults.getValue(1));
3162 break;
3163 }
3164 case Intrinsic::loongarch_lddir_d: {
3165 if (!Subtarget.is64Bit()) {
3166 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3167 return;
3168 }
3169 break;
3170 }
3171 }
3172 break;
3173 }
3174 case ISD::READ_REGISTER: {
3175 if (Subtarget.is64Bit())
3176 DAG.getContext()->emitError(
3177 "On LA64, only 64-bit registers can be read.");
3178 else
3179 DAG.getContext()->emitError(
3180 "On LA32, only 32-bit registers can be read.");
3181 Results.push_back(DAG.getUNDEF(VT));
3182 Results.push_back(N->getOperand(0));
3183 break;
3184 }
3186 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3187 break;
3188 }
3189 case ISD::LROUND: {
3190 SDValue Op0 = N->getOperand(0);
3191 EVT OpVT = Op0.getValueType();
3192 RTLIB::Libcall LC =
3193 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
3194 MakeLibCallOptions CallOptions;
3195 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
3196 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
3197 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
3198 Results.push_back(Result);
3199 break;
3200 }
3201 }
3202}
3203
3206 const LoongArchSubtarget &Subtarget) {
3207 if (DCI.isBeforeLegalizeOps())
3208 return SDValue();
3209
3210 SDValue FirstOperand = N->getOperand(0);
3211 SDValue SecondOperand = N->getOperand(1);
3212 unsigned FirstOperandOpc = FirstOperand.getOpcode();
3213 EVT ValTy = N->getValueType(0);
3214 SDLoc DL(N);
3215 uint64_t lsb, msb;
3216 unsigned SMIdx, SMLen;
3217 ConstantSDNode *CN;
3218 SDValue NewOperand;
3219 MVT GRLenVT = Subtarget.getGRLenVT();
3220
3221 // Op's second operand must be a shifted mask.
3222 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3223 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3224 return SDValue();
3225
3226 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3227 // Pattern match BSTRPICK.
3228 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3229 // => BSTRPICK $dst, $src, msb, lsb
3230 // where msb = lsb + len - 1
3231
3232 // The second operand of the shift must be an immediate.
3233 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3234 return SDValue();
3235
3236 lsb = CN->getZExtValue();
3237
3238 // Return if the shifted mask does not start at bit 0 or the sum of its
3239 // length and lsb exceeds the word's size.
3240 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3241 return SDValue();
3242
3243 NewOperand = FirstOperand.getOperand(0);
3244 } else {
3245 // Pattern match BSTRPICK.
3246 // $dst = and $src, (2**len- 1) , if len > 12
3247 // => BSTRPICK $dst, $src, msb, lsb
3248 // where lsb = 0 and msb = len - 1
3249
3250 // If the mask is <= 0xfff, andi can be used instead.
3251 if (CN->getZExtValue() <= 0xfff)
3252 return SDValue();
3253
3254 // Return if the MSB exceeds.
3255 if (SMIdx + SMLen > ValTy.getSizeInBits())
3256 return SDValue();
3257
3258 if (SMIdx > 0) {
3259 // Omit if the constant has more than 2 uses. This a conservative
3260 // decision. Whether it is a win depends on the HW microarchitecture.
3261 // However it should always be better for 1 and 2 uses.
3262 if (CN->use_size() > 2)
3263 return SDValue();
3264 // Return if the constant can be composed by a single LU12I.W.
3265 if ((CN->getZExtValue() & 0xfff) == 0)
3266 return SDValue();
3267 // Return if the constand can be composed by a single ADDI with
3268 // the zero register.
3269 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3270 return SDValue();
3271 }
3272
3273 lsb = SMIdx;
3274 NewOperand = FirstOperand;
3275 }
3276
3277 msb = lsb + SMLen - 1;
3278 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3279 DAG.getConstant(msb, DL, GRLenVT),
3280 DAG.getConstant(lsb, DL, GRLenVT));
3281 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3282 return NR0;
3283 // Try to optimize to
3284 // bstrpick $Rd, $Rs, msb, lsb
3285 // slli $Rd, $Rd, lsb
3286 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3287 DAG.getConstant(lsb, DL, GRLenVT));
3288}
3289
3292 const LoongArchSubtarget &Subtarget) {
3293 if (DCI.isBeforeLegalizeOps())
3294 return SDValue();
3295
3296 // $dst = srl (and $src, Mask), Shamt
3297 // =>
3298 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3299 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3300 //
3301
3302 SDValue FirstOperand = N->getOperand(0);
3303 ConstantSDNode *CN;
3304 EVT ValTy = N->getValueType(0);
3305 SDLoc DL(N);
3306 MVT GRLenVT = Subtarget.getGRLenVT();
3307 unsigned MaskIdx, MaskLen;
3308 uint64_t Shamt;
3309
3310 // The first operand must be an AND and the second operand of the AND must be
3311 // a shifted mask.
3312 if (FirstOperand.getOpcode() != ISD::AND ||
3313 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3314 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3315 return SDValue();
3316
3317 // The second operand (shift amount) must be an immediate.
3318 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3319 return SDValue();
3320
3321 Shamt = CN->getZExtValue();
3322 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3323 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3324 FirstOperand->getOperand(0),
3325 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3326 DAG.getConstant(Shamt, DL, GRLenVT));
3327
3328 return SDValue();
3329}
3330
3333 const LoongArchSubtarget &Subtarget) {
3334 MVT GRLenVT = Subtarget.getGRLenVT();
3335 EVT ValTy = N->getValueType(0);
3336 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3337 ConstantSDNode *CN0, *CN1;
3338 SDLoc DL(N);
3339 unsigned ValBits = ValTy.getSizeInBits();
3340 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3341 unsigned Shamt;
3342 bool SwapAndRetried = false;
3343
3344 if (DCI.isBeforeLegalizeOps())
3345 return SDValue();
3346
3347 if (ValBits != 32 && ValBits != 64)
3348 return SDValue();
3349
3350Retry:
3351 // 1st pattern to match BSTRINS:
3352 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
3353 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3354 // =>
3355 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3356 if (N0.getOpcode() == ISD::AND &&
3357 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3358 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3359 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3360 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3361 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3362 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3363 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3364 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3365 (MaskIdx0 + MaskLen0 <= ValBits)) {
3366 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3367 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3368 N1.getOperand(0).getOperand(0),
3369 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3370 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3371 }
3372
3373 // 2nd pattern to match BSTRINS:
3374 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
3375 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3376 // =>
3377 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3378 if (N0.getOpcode() == ISD::AND &&
3379 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3380 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3381 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3382 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3383 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3384 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3385 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3386 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3387 (MaskIdx0 + MaskLen0 <= ValBits)) {
3388 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3389 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3390 N1.getOperand(0).getOperand(0),
3391 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3392 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3393 }
3394
3395 // 3rd pattern to match BSTRINS:
3396 // R = or (and X, mask0), (and Y, mask1)
3397 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3398 // =>
3399 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3400 // where msb = lsb + size - 1
3401 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3402 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3403 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3404 (MaskIdx0 + MaskLen0 <= 64) &&
3405 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3406 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3407 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3408 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3409 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3410 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3411 DAG.getConstant(ValBits == 32
3412 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3413 : (MaskIdx0 + MaskLen0 - 1),
3414 DL, GRLenVT),
3415 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3416 }
3417
3418 // 4th pattern to match BSTRINS:
3419 // R = or (and X, mask), (shl Y, shamt)
3420 // where mask = (2**shamt - 1)
3421 // =>
3422 // R = BSTRINS X, Y, ValBits - 1, shamt
3423 // where ValBits = 32 or 64
3424 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3425 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3426 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3427 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3428 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
3429 (MaskIdx0 + MaskLen0 <= ValBits)) {
3430 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3431 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3432 N1.getOperand(0),
3433 DAG.getConstant((ValBits - 1), DL, GRLenVT),
3434 DAG.getConstant(Shamt, DL, GRLenVT));
3435 }
3436
3437 // 5th pattern to match BSTRINS:
3438 // R = or (and X, mask), const
3439 // where ~mask = (2**size - 1) << lsb, mask & const = 0
3440 // =>
3441 // R = BSTRINS X, (const >> lsb), msb, lsb
3442 // where msb = lsb + size - 1
3443 if (N0.getOpcode() == ISD::AND &&
3444 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3445 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3446 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3447 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3448 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3449 return DAG.getNode(
3450 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3451 DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3452 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3453 : (MaskIdx0 + MaskLen0 - 1),
3454 DL, GRLenVT),
3455 DAG.getConstant(MaskIdx0, DL, GRLenVT));
3456 }
3457
3458 // 6th pattern.
3459 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3460 // by the incoming bits are known to be zero.
3461 // =>
3462 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3463 //
3464 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3465 // pattern is more common than the 1st. So we put the 1st before the 6th in
3466 // order to match as many nodes as possible.
3467 ConstantSDNode *CNMask, *CNShamt;
3468 unsigned MaskIdx, MaskLen;
3469 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3470 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3471 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3472 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3473 CNShamt->getZExtValue() + MaskLen <= ValBits) {
3474 Shamt = CNShamt->getZExtValue();
3475 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3476 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3477 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3478 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3479 N1.getOperand(0).getOperand(0),
3480 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3481 DAG.getConstant(Shamt, DL, GRLenVT));
3482 }
3483 }
3484
3485 // 7th pattern.
3486 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3487 // overwritten by the incoming bits are known to be zero.
3488 // =>
3489 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3490 //
3491 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3492 // before the 7th in order to match as many nodes as possible.
3493 if (N1.getOpcode() == ISD::AND &&
3494 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3495 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3496 N1.getOperand(0).getOpcode() == ISD::SHL &&
3497 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3498 CNShamt->getZExtValue() == MaskIdx) {
3499 APInt ShMask(ValBits, CNMask->getZExtValue());
3500 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3501 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3502 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3503 N1.getOperand(0).getOperand(0),
3504 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3505 DAG.getConstant(MaskIdx, DL, GRLenVT));
3506 }
3507 }
3508
3509 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3510 if (!SwapAndRetried) {
3511 std::swap(N0, N1);
3512 SwapAndRetried = true;
3513 goto Retry;
3514 }
3515
3516 SwapAndRetried = false;
3517Retry2:
3518 // 8th pattern.
3519 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
3520 // the incoming bits are known to be zero.
3521 // =>
3522 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3523 //
3524 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3525 // we put it here in order to match as many nodes as possible or generate less
3526 // instructions.
3527 if (N1.getOpcode() == ISD::AND &&
3528 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3529 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3530 APInt ShMask(ValBits, CNMask->getZExtValue());
3531 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3532 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3533 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3534 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3535 N1->getOperand(0),
3536 DAG.getConstant(MaskIdx, DL, GRLenVT)),
3537 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3538 DAG.getConstant(MaskIdx, DL, GRLenVT));
3539 }
3540 }
3541 // Swap N0/N1 and retry.
3542 if (!SwapAndRetried) {
3543 std::swap(N0, N1);
3544 SwapAndRetried = true;
3545 goto Retry2;
3546 }
3547
3548 return SDValue();
3549}
3550
3551static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3552 ExtType = ISD::NON_EXTLOAD;
3553
3554 switch (V.getNode()->getOpcode()) {
3555 case ISD::LOAD: {
3556 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3557 if ((LoadNode->getMemoryVT() == MVT::i8) ||
3558 (LoadNode->getMemoryVT() == MVT::i16)) {
3559 ExtType = LoadNode->getExtensionType();
3560 return true;
3561 }
3562 return false;
3563 }
3564 case ISD::AssertSext: {
3565 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3566 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3567 ExtType = ISD::SEXTLOAD;
3568 return true;
3569 }
3570 return false;
3571 }
3572 case ISD::AssertZext: {
3573 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3574 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3575 ExtType = ISD::ZEXTLOAD;
3576 return true;
3577 }
3578 return false;
3579 }
3580 default:
3581 return false;
3582 }
3583
3584 return false;
3585}
3586
3587// Eliminate redundant truncation and zero-extension nodes.
3588// * Case 1:
3589// +------------+ +------------+ +------------+
3590// | Input1 | | Input2 | | CC |
3591// +------------+ +------------+ +------------+
3592// | | |
3593// V V +----+
3594// +------------+ +------------+ |
3595// | TRUNCATE | | TRUNCATE | |
3596// +------------+ +------------+ |
3597// | | |
3598// V V |
3599// +------------+ +------------+ |
3600// | ZERO_EXT | | ZERO_EXT | |
3601// +------------+ +------------+ |
3602// | | |
3603// | +-------------+ |
3604// V V | |
3605// +----------------+ | |
3606// | AND | | |
3607// +----------------+ | |
3608// | | |
3609// +---------------+ | |
3610// | | |
3611// V V V
3612// +-------------+
3613// | CMP |
3614// +-------------+
3615// * Case 2:
3616// +------------+ +------------+ +-------------+ +------------+ +------------+
3617// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3618// +------------+ +------------+ +-------------+ +------------+ +------------+
3619// | | | | |
3620// V | | | |
3621// +------------+ | | | |
3622// | XOR |<---------------------+ | |
3623// +------------+ | | |
3624// | | | |
3625// V V +---------------+ |
3626// +------------+ +------------+ | |
3627// | TRUNCATE | | TRUNCATE | | +-------------------------+
3628// +------------+ +------------+ | |
3629// | | | |
3630// V V | |
3631// +------------+ +------------+ | |
3632// | ZERO_EXT | | ZERO_EXT | | |
3633// +------------+ +------------+ | |
3634// | | | |
3635// V V | |
3636// +----------------+ | |
3637// | AND | | |
3638// +----------------+ | |
3639// | | |
3640// +---------------+ | |
3641// | | |
3642// V V V
3643// +-------------+
3644// | CMP |
3645// +-------------+
3648 const LoongArchSubtarget &Subtarget) {
3649 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3650
3651 SDNode *AndNode = N->getOperand(0).getNode();
3652 if (AndNode->getOpcode() != ISD::AND)
3653 return SDValue();
3654
3655 SDValue AndInputValue2 = AndNode->getOperand(1);
3656 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3657 return SDValue();
3658
3659 SDValue CmpInputValue = N->getOperand(1);
3660 SDValue AndInputValue1 = AndNode->getOperand(0);
3661 if (AndInputValue1.getOpcode() == ISD::XOR) {
3662 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3663 return SDValue();
3664 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3665 if (!CN || CN->getSExtValue() != -1)
3666 return SDValue();
3667 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3668 if (!CN || CN->getSExtValue() != 0)
3669 return SDValue();
3670 AndInputValue1 = AndInputValue1.getOperand(0);
3671 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3672 return SDValue();
3673 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3674 if (AndInputValue2 != CmpInputValue)
3675 return SDValue();
3676 } else {
3677 return SDValue();
3678 }
3679
3680 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3681 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3682 return SDValue();
3683
3684 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3685 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3686 return SDValue();
3687
3688 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3689 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3690 ISD::LoadExtType ExtType1;
3691 ISD::LoadExtType ExtType2;
3692
3693 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3694 !checkValueWidth(TruncInputValue2, ExtType2))
3695 return SDValue();
3696
3697 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3698 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3699 return SDValue();
3700
3701 if ((ExtType2 != ISD::ZEXTLOAD) &&
3702 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3703 return SDValue();
3704
3705 // These truncation and zero-extension nodes are not necessary, remove them.
3706 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3707 TruncInputValue1, TruncInputValue2);
3708 SDValue NewSetCC =
3709 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3710 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3711 return SDValue(N, 0);
3712}
3713
3714// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3717 const LoongArchSubtarget &Subtarget) {
3718 if (DCI.isBeforeLegalizeOps())
3719 return SDValue();
3720
3721 SDValue Src = N->getOperand(0);
3722 if (Src.getOpcode() != LoongArchISD::REVB_2W)
3723 return SDValue();
3724
3725 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3726 Src.getOperand(0));
3727}
3728
3729template <unsigned N>
3731 SelectionDAG &DAG,
3732 const LoongArchSubtarget &Subtarget,
3733 bool IsSigned = false) {
3734 SDLoc DL(Node);
3735 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3736 // Check the ImmArg.
3737 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3738 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3739 DAG.getContext()->emitError(Node->getOperationName(0) +
3740 ": argument out of range.");
3741 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3742 }
3743 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3744}
3745
3746template <unsigned N>
3747static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3748 SelectionDAG &DAG, bool IsSigned = false) {
3749 SDLoc DL(Node);
3750 EVT ResTy = Node->getValueType(0);
3751 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3752
3753 // Check the ImmArg.
3754 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3755 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3756 DAG.getContext()->emitError(Node->getOperationName(0) +
3757 ": argument out of range.");
3758 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3759 }
3760 return DAG.getConstant(
3762 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3763 DL, ResTy);
3764}
3765
3767 SDLoc DL(Node);
3768 EVT ResTy = Node->getValueType(0);
3769 SDValue Vec = Node->getOperand(2);
3770 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3771 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3772}
3773
3775 SDLoc DL(Node);
3776 EVT ResTy = Node->getValueType(0);
3777 SDValue One = DAG.getConstant(1, DL, ResTy);
3778 SDValue Bit =
3779 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3780
3781 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3782 DAG.getNOT(DL, Bit, ResTy));
3783}
3784
3785template <unsigned N>
3787 SDLoc DL(Node);
3788 EVT ResTy = Node->getValueType(0);
3789 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3790 // Check the unsigned ImmArg.
3791 if (!isUInt<N>(CImm->getZExtValue())) {
3792 DAG.getContext()->emitError(Node->getOperationName(0) +
3793 ": argument out of range.");
3794 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3795 }
3796
3797 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3798 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3799
3800 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3801}
3802
3803template <unsigned N>
3805 SDLoc DL(Node);
3806 EVT ResTy = Node->getValueType(0);
3807 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3808 // Check the unsigned ImmArg.
3809 if (!isUInt<N>(CImm->getZExtValue())) {
3810 DAG.getContext()->emitError(Node->getOperationName(0) +
3811 ": argument out of range.");
3812 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3813 }
3814
3815 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3816 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3817 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3818}
3819
3820template <unsigned N>
3822 SDLoc DL(Node);
3823 EVT ResTy = Node->getValueType(0);
3824 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3825 // Check the unsigned ImmArg.
3826 if (!isUInt<N>(CImm->getZExtValue())) {
3827 DAG.getContext()->emitError(Node->getOperationName(0) +
3828 ": argument out of range.");
3829 return DAG.getNode(ISD::UNDEF, DL, ResTy);
3830 }
3831
3832 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3833 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3834 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3835}
3836
3837static SDValue
3840 const LoongArchSubtarget &Subtarget) {
3841 SDLoc DL(N);
3842 switch (N->getConstantOperandVal(0)) {
3843 default:
3844 break;
3845 case Intrinsic::loongarch_lsx_vadd_b:
3846 case Intrinsic::loongarch_lsx_vadd_h:
3847 case Intrinsic::loongarch_lsx_vadd_w:
3848 case Intrinsic::loongarch_lsx_vadd_d:
3849 case Intrinsic::loongarch_lasx_xvadd_b:
3850 case Intrinsic::loongarch_lasx_xvadd_h:
3851 case Intrinsic::loongarch_lasx_xvadd_w:
3852 case Intrinsic::loongarch_lasx_xvadd_d:
3853 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3854 N->getOperand(2));
3855 case Intrinsic::loongarch_lsx_vaddi_bu:
3856 case Intrinsic::loongarch_lsx_vaddi_hu:
3857 case Intrinsic::loongarch_lsx_vaddi_wu:
3858 case Intrinsic::loongarch_lsx_vaddi_du:
3859 case Intrinsic::loongarch_lasx_xvaddi_bu:
3860 case Intrinsic::loongarch_lasx_xvaddi_hu:
3861 case Intrinsic::loongarch_lasx_xvaddi_wu:
3862 case Intrinsic::loongarch_lasx_xvaddi_du:
3863 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3864 lowerVectorSplatImm<5>(N, 2, DAG));
3865 case Intrinsic::loongarch_lsx_vsub_b:
3866 case Intrinsic::loongarch_lsx_vsub_h:
3867 case Intrinsic::loongarch_lsx_vsub_w:
3868 case Intrinsic::loongarch_lsx_vsub_d:
3869 case Intrinsic::loongarch_lasx_xvsub_b:
3870 case Intrinsic::loongarch_lasx_xvsub_h:
3871 case Intrinsic::loongarch_lasx_xvsub_w:
3872 case Intrinsic::loongarch_lasx_xvsub_d:
3873 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3874 N->getOperand(2));
3875 case Intrinsic::loongarch_lsx_vsubi_bu:
3876 case Intrinsic::loongarch_lsx_vsubi_hu:
3877 case Intrinsic::loongarch_lsx_vsubi_wu:
3878 case Intrinsic::loongarch_lsx_vsubi_du:
3879 case Intrinsic::loongarch_lasx_xvsubi_bu:
3880 case Intrinsic::loongarch_lasx_xvsubi_hu:
3881 case Intrinsic::loongarch_lasx_xvsubi_wu:
3882 case Intrinsic::loongarch_lasx_xvsubi_du:
3883 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3884 lowerVectorSplatImm<5>(N, 2, DAG));
3885 case Intrinsic::loongarch_lsx_vneg_b:
3886 case Intrinsic::loongarch_lsx_vneg_h:
3887 case Intrinsic::loongarch_lsx_vneg_w:
3888 case Intrinsic::loongarch_lsx_vneg_d:
3889 case Intrinsic::loongarch_lasx_xvneg_b:
3890 case Intrinsic::loongarch_lasx_xvneg_h:
3891 case Intrinsic::loongarch_lasx_xvneg_w:
3892 case Intrinsic::loongarch_lasx_xvneg_d:
3893 return DAG.getNode(
3894 ISD::SUB, DL, N->getValueType(0),
3895 DAG.getConstant(
3896 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3897 /*isSigned=*/true),
3898 SDLoc(N), N->getValueType(0)),
3899 N->getOperand(1));
3900 case Intrinsic::loongarch_lsx_vmax_b:
3901 case Intrinsic::loongarch_lsx_vmax_h:
3902 case Intrinsic::loongarch_lsx_vmax_w:
3903 case Intrinsic::loongarch_lsx_vmax_d:
3904 case Intrinsic::loongarch_lasx_xvmax_b:
3905 case Intrinsic::loongarch_lasx_xvmax_h:
3906 case Intrinsic::loongarch_lasx_xvmax_w:
3907 case Intrinsic::loongarch_lasx_xvmax_d:
3908 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3909 N->getOperand(2));
3910 case Intrinsic::loongarch_lsx_vmax_bu:
3911 case Intrinsic::loongarch_lsx_vmax_hu:
3912 case Intrinsic::loongarch_lsx_vmax_wu:
3913 case Intrinsic::loongarch_lsx_vmax_du:
3914 case Intrinsic::loongarch_lasx_xvmax_bu:
3915 case Intrinsic::loongarch_lasx_xvmax_hu:
3916 case Intrinsic::loongarch_lasx_xvmax_wu:
3917 case Intrinsic::loongarch_lasx_xvmax_du:
3918 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3919 N->getOperand(2));
3920 case Intrinsic::loongarch_lsx_vmaxi_b:
3921 case Intrinsic::loongarch_lsx_vmaxi_h:
3922 case Intrinsic::loongarch_lsx_vmaxi_w:
3923 case Intrinsic::loongarch_lsx_vmaxi_d:
3924 case Intrinsic::loongarch_lasx_xvmaxi_b:
3925 case Intrinsic::loongarch_lasx_xvmaxi_h:
3926 case Intrinsic::loongarch_lasx_xvmaxi_w:
3927 case Intrinsic::loongarch_lasx_xvmaxi_d:
3928 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3929 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3930 case Intrinsic::loongarch_lsx_vmaxi_bu:
3931 case Intrinsic::loongarch_lsx_vmaxi_hu:
3932 case Intrinsic::loongarch_lsx_vmaxi_wu:
3933 case Intrinsic::loongarch_lsx_vmaxi_du:
3934 case Intrinsic::loongarch_lasx_xvmaxi_bu:
3935 case Intrinsic::loongarch_lasx_xvmaxi_hu:
3936 case Intrinsic::loongarch_lasx_xvmaxi_wu:
3937 case Intrinsic::loongarch_lasx_xvmaxi_du:
3938 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3939 lowerVectorSplatImm<5>(N, 2, DAG));
3940 case Intrinsic::loongarch_lsx_vmin_b:
3941 case Intrinsic::loongarch_lsx_vmin_h:
3942 case Intrinsic::loongarch_lsx_vmin_w:
3943 case Intrinsic::loongarch_lsx_vmin_d:
3944 case Intrinsic::loongarch_lasx_xvmin_b:
3945 case Intrinsic::loongarch_lasx_xvmin_h:
3946 case Intrinsic::loongarch_lasx_xvmin_w:
3947 case Intrinsic::loongarch_lasx_xvmin_d:
3948 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3949 N->getOperand(2));
3950 case Intrinsic::loongarch_lsx_vmin_bu:
3951 case Intrinsic::loongarch_lsx_vmin_hu:
3952 case Intrinsic::loongarch_lsx_vmin_wu:
3953 case Intrinsic::loongarch_lsx_vmin_du:
3954 case Intrinsic::loongarch_lasx_xvmin_bu:
3955 case Intrinsic::loongarch_lasx_xvmin_hu:
3956 case Intrinsic::loongarch_lasx_xvmin_wu:
3957 case Intrinsic::loongarch_lasx_xvmin_du:
3958 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3959 N->getOperand(2));
3960 case Intrinsic::loongarch_lsx_vmini_b:
3961 case Intrinsic::loongarch_lsx_vmini_h:
3962 case Intrinsic::loongarch_lsx_vmini_w:
3963 case Intrinsic::loongarch_lsx_vmini_d:
3964 case Intrinsic::loongarch_lasx_xvmini_b:
3965 case Intrinsic::loongarch_lasx_xvmini_h:
3966 case Intrinsic::loongarch_lasx_xvmini_w:
3967 case Intrinsic::loongarch_lasx_xvmini_d:
3968 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3969 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3970 case Intrinsic::loongarch_lsx_vmini_bu:
3971 case Intrinsic::loongarch_lsx_vmini_hu:
3972 case Intrinsic::loongarch_lsx_vmini_wu:
3973 case Intrinsic::loongarch_lsx_vmini_du:
3974 case Intrinsic::loongarch_lasx_xvmini_bu:
3975 case Intrinsic::loongarch_lasx_xvmini_hu:
3976 case Intrinsic::loongarch_lasx_xvmini_wu:
3977 case Intrinsic::loongarch_lasx_xvmini_du:
3978 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3979 lowerVectorSplatImm<5>(N, 2, DAG));
3980 case Intrinsic::loongarch_lsx_vmul_b:
3981 case Intrinsic::loongarch_lsx_vmul_h:
3982 case Intrinsic::loongarch_lsx_vmul_w:
3983 case Intrinsic::loongarch_lsx_vmul_d:
3984 case Intrinsic::loongarch_lasx_xvmul_b:
3985 case Intrinsic::loongarch_lasx_xvmul_h:
3986 case Intrinsic::loongarch_lasx_xvmul_w:
3987 case Intrinsic::loongarch_lasx_xvmul_d:
3988 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3989 N->getOperand(2));
3990 case Intrinsic::loongarch_lsx_vmadd_b:
3991 case Intrinsic::loongarch_lsx_vmadd_h:
3992 case Intrinsic::loongarch_lsx_vmadd_w:
3993 case Intrinsic::loongarch_lsx_vmadd_d:
3994 case Intrinsic::loongarch_lasx_xvmadd_b:
3995 case Intrinsic::loongarch_lasx_xvmadd_h:
3996 case Intrinsic::loongarch_lasx_xvmadd_w:
3997 case Intrinsic::loongarch_lasx_xvmadd_d: {
3998 EVT ResTy = N->getValueType(0);
3999 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
4000 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4001 N->getOperand(3)));
4002 }
4003 case Intrinsic::loongarch_lsx_vmsub_b:
4004 case Intrinsic::loongarch_lsx_vmsub_h:
4005 case Intrinsic::loongarch_lsx_vmsub_w:
4006 case Intrinsic::loongarch_lsx_vmsub_d:
4007 case Intrinsic::loongarch_lasx_xvmsub_b:
4008 case Intrinsic::loongarch_lasx_xvmsub_h:
4009 case Intrinsic::loongarch_lasx_xvmsub_w:
4010 case Intrinsic::loongarch_lasx_xvmsub_d: {
4011 EVT ResTy = N->getValueType(0);
4012 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
4013 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
4014 N->getOperand(3)));
4015 }
4016 case Intrinsic::loongarch_lsx_vdiv_b:
4017 case Intrinsic::loongarch_lsx_vdiv_h:
4018 case Intrinsic::loongarch_lsx_vdiv_w:
4019 case Intrinsic::loongarch_lsx_vdiv_d:
4020 case Intrinsic::loongarch_lasx_xvdiv_b:
4021 case Intrinsic::loongarch_lasx_xvdiv_h:
4022 case Intrinsic::loongarch_lasx_xvdiv_w:
4023 case Intrinsic::loongarch_lasx_xvdiv_d:
4024 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
4025 N->getOperand(2));
4026 case Intrinsic::loongarch_lsx_vdiv_bu:
4027 case Intrinsic::loongarch_lsx_vdiv_hu:
4028 case Intrinsic::loongarch_lsx_vdiv_wu:
4029 case Intrinsic::loongarch_lsx_vdiv_du:
4030 case Intrinsic::loongarch_lasx_xvdiv_bu:
4031 case Intrinsic::loongarch_lasx_xvdiv_hu:
4032 case Intrinsic::loongarch_lasx_xvdiv_wu:
4033 case Intrinsic::loongarch_lasx_xvdiv_du:
4034 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
4035 N->getOperand(2));
4036 case Intrinsic::loongarch_lsx_vmod_b:
4037 case Intrinsic::loongarch_lsx_vmod_h:
4038 case Intrinsic::loongarch_lsx_vmod_w:
4039 case Intrinsic::loongarch_lsx_vmod_d:
4040 case Intrinsic::loongarch_lasx_xvmod_b:
4041 case Intrinsic::loongarch_lasx_xvmod_h:
4042 case Intrinsic::loongarch_lasx_xvmod_w:
4043 case Intrinsic::loongarch_lasx_xvmod_d:
4044 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
4045 N->getOperand(2));
4046 case Intrinsic::loongarch_lsx_vmod_bu:
4047 case Intrinsic::loongarch_lsx_vmod_hu:
4048 case Intrinsic::loongarch_lsx_vmod_wu:
4049 case Intrinsic::loongarch_lsx_vmod_du:
4050 case Intrinsic::loongarch_lasx_xvmod_bu:
4051 case Intrinsic::loongarch_lasx_xvmod_hu:
4052 case Intrinsic::loongarch_lasx_xvmod_wu:
4053 case Intrinsic::loongarch_lasx_xvmod_du:
4054 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
4055 N->getOperand(2));
4056 case Intrinsic::loongarch_lsx_vand_v:
4057 case Intrinsic::loongarch_lasx_xvand_v:
4058 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4059 N->getOperand(2));
4060 case Intrinsic::loongarch_lsx_vor_v:
4061 case Intrinsic::loongarch_lasx_xvor_v:
4062 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4063 N->getOperand(2));
4064 case Intrinsic::loongarch_lsx_vxor_v:
4065 case Intrinsic::loongarch_lasx_xvxor_v:
4066 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4067 N->getOperand(2));
4068 case Intrinsic::loongarch_lsx_vnor_v:
4069 case Intrinsic::loongarch_lasx_xvnor_v: {
4070 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4071 N->getOperand(2));
4072 return DAG.getNOT(DL, Res, Res->getValueType(0));
4073 }
4074 case Intrinsic::loongarch_lsx_vandi_b:
4075 case Intrinsic::loongarch_lasx_xvandi_b:
4076 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
4077 lowerVectorSplatImm<8>(N, 2, DAG));
4078 case Intrinsic::loongarch_lsx_vori_b:
4079 case Intrinsic::loongarch_lasx_xvori_b:
4080 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
4081 lowerVectorSplatImm<8>(N, 2, DAG));
4082 case Intrinsic::loongarch_lsx_vxori_b:
4083 case Intrinsic::loongarch_lasx_xvxori_b:
4084 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
4085 lowerVectorSplatImm<8>(N, 2, DAG));
4086 case Intrinsic::loongarch_lsx_vsll_b:
4087 case Intrinsic::loongarch_lsx_vsll_h:
4088 case Intrinsic::loongarch_lsx_vsll_w:
4089 case Intrinsic::loongarch_lsx_vsll_d:
4090 case Intrinsic::loongarch_lasx_xvsll_b:
4091 case Intrinsic::loongarch_lasx_xvsll_h:
4092 case Intrinsic::loongarch_lasx_xvsll_w:
4093 case Intrinsic::loongarch_lasx_xvsll_d:
4094 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4095 truncateVecElts(N, DAG));
4096 case Intrinsic::loongarch_lsx_vslli_b:
4097 case Intrinsic::loongarch_lasx_xvslli_b:
4098 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4099 lowerVectorSplatImm<3>(N, 2, DAG));
4100 case Intrinsic::loongarch_lsx_vslli_h:
4101 case Intrinsic::loongarch_lasx_xvslli_h:
4102 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4103 lowerVectorSplatImm<4>(N, 2, DAG));
4104 case Intrinsic::loongarch_lsx_vslli_w:
4105 case Intrinsic::loongarch_lasx_xvslli_w:
4106 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4107 lowerVectorSplatImm<5>(N, 2, DAG));
4108 case Intrinsic::loongarch_lsx_vslli_d:
4109 case Intrinsic::loongarch_lasx_xvslli_d:
4110 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4111 lowerVectorSplatImm<6>(N, 2, DAG));
4112 case Intrinsic::loongarch_lsx_vsrl_b:
4113 case Intrinsic::loongarch_lsx_vsrl_h:
4114 case Intrinsic::loongarch_lsx_vsrl_w:
4115 case Intrinsic::loongarch_lsx_vsrl_d:
4116 case Intrinsic::loongarch_lasx_xvsrl_b:
4117 case Intrinsic::loongarch_lasx_xvsrl_h:
4118 case Intrinsic::loongarch_lasx_xvsrl_w:
4119 case Intrinsic::loongarch_lasx_xvsrl_d:
4120 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4121 truncateVecElts(N, DAG));
4122 case Intrinsic::loongarch_lsx_vsrli_b:
4123 case Intrinsic::loongarch_lasx_xvsrli_b:
4124 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4125 lowerVectorSplatImm<3>(N, 2, DAG));
4126 case Intrinsic::loongarch_lsx_vsrli_h:
4127 case Intrinsic::loongarch_lasx_xvsrli_h:
4128 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4129 lowerVectorSplatImm<4>(N, 2, DAG));
4130 case Intrinsic::loongarch_lsx_vsrli_w:
4131 case Intrinsic::loongarch_lasx_xvsrli_w:
4132 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4133 lowerVectorSplatImm<5>(N, 2, DAG));
4134 case Intrinsic::loongarch_lsx_vsrli_d:
4135 case Intrinsic::loongarch_lasx_xvsrli_d:
4136 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4137 lowerVectorSplatImm<6>(N, 2, DAG));
4138 case Intrinsic::loongarch_lsx_vsra_b:
4139 case Intrinsic::loongarch_lsx_vsra_h:
4140 case Intrinsic::loongarch_lsx_vsra_w:
4141 case Intrinsic::loongarch_lsx_vsra_d:
4142 case Intrinsic::loongarch_lasx_xvsra_b:
4143 case Intrinsic::loongarch_lasx_xvsra_h:
4144 case Intrinsic::loongarch_lasx_xvsra_w:
4145 case Intrinsic::loongarch_lasx_xvsra_d:
4146 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4147 truncateVecElts(N, DAG));
4148 case Intrinsic::loongarch_lsx_vsrai_b:
4149 case Intrinsic::loongarch_lasx_xvsrai_b:
4150 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4151 lowerVectorSplatImm<3>(N, 2, DAG));
4152 case Intrinsic::loongarch_lsx_vsrai_h:
4153 case Intrinsic::loongarch_lasx_xvsrai_h:
4154 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4155 lowerVectorSplatImm<4>(N, 2, DAG));
4156 case Intrinsic::loongarch_lsx_vsrai_w:
4157 case Intrinsic::loongarch_lasx_xvsrai_w:
4158 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4159 lowerVectorSplatImm<5>(N, 2, DAG));
4160 case Intrinsic::loongarch_lsx_vsrai_d:
4161 case Intrinsic::loongarch_lasx_xvsrai_d:
4162 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4163 lowerVectorSplatImm<6>(N, 2, DAG));
4164 case Intrinsic::loongarch_lsx_vclz_b:
4165 case Intrinsic::loongarch_lsx_vclz_h:
4166 case Intrinsic::loongarch_lsx_vclz_w:
4167 case Intrinsic::loongarch_lsx_vclz_d:
4168 case Intrinsic::loongarch_lasx_xvclz_b:
4169 case Intrinsic::loongarch_lasx_xvclz_h:
4170 case Intrinsic::loongarch_lasx_xvclz_w:
4171 case Intrinsic::loongarch_lasx_xvclz_d:
4172 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4173 case Intrinsic::loongarch_lsx_vpcnt_b:
4174 case Intrinsic::loongarch_lsx_vpcnt_h:
4175 case Intrinsic::loongarch_lsx_vpcnt_w:
4176 case Intrinsic::loongarch_lsx_vpcnt_d:
4177 case Intrinsic::loongarch_lasx_xvpcnt_b:
4178 case Intrinsic::loongarch_lasx_xvpcnt_h:
4179 case Intrinsic::loongarch_lasx_xvpcnt_w:
4180 case Intrinsic::loongarch_lasx_xvpcnt_d:
4181 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4182 case Intrinsic::loongarch_lsx_vbitclr_b:
4183 case Intrinsic::loongarch_lsx_vbitclr_h:
4184 case Intrinsic::loongarch_lsx_vbitclr_w:
4185 case Intrinsic::loongarch_lsx_vbitclr_d:
4186 case Intrinsic::loongarch_lasx_xvbitclr_b:
4187 case Intrinsic::loongarch_lasx_xvbitclr_h:
4188 case Intrinsic::loongarch_lasx_xvbitclr_w:
4189 case Intrinsic::loongarch_lasx_xvbitclr_d:
4190 return lowerVectorBitClear(N, DAG);
4191 case Intrinsic::loongarch_lsx_vbitclri_b:
4192 case Intrinsic::loongarch_lasx_xvbitclri_b:
4193 return lowerVectorBitClearImm<3>(N, DAG);
4194 case Intrinsic::loongarch_lsx_vbitclri_h:
4195 case Intrinsic::loongarch_lasx_xvbitclri_h:
4196 return lowerVectorBitClearImm<4>(N, DAG);
4197 case Intrinsic::loongarch_lsx_vbitclri_w:
4198 case Intrinsic::loongarch_lasx_xvbitclri_w:
4199 return lowerVectorBitClearImm<5>(N, DAG);
4200 case Intrinsic::loongarch_lsx_vbitclri_d:
4201 case Intrinsic::loongarch_lasx_xvbitclri_d:
4202 return lowerVectorBitClearImm<6>(N, DAG);
4203 case Intrinsic::loongarch_lsx_vbitset_b:
4204 case Intrinsic::loongarch_lsx_vbitset_h:
4205 case Intrinsic::loongarch_lsx_vbitset_w:
4206 case Intrinsic::loongarch_lsx_vbitset_d:
4207 case Intrinsic::loongarch_lasx_xvbitset_b:
4208 case Intrinsic::loongarch_lasx_xvbitset_h:
4209 case Intrinsic::loongarch_lasx_xvbitset_w:
4210 case Intrinsic::loongarch_lasx_xvbitset_d: {
4211 EVT VecTy = N->getValueType(0);
4212 SDValue One = DAG.getConstant(1, DL, VecTy);
4213 return DAG.getNode(
4214 ISD::OR, DL, VecTy, N->getOperand(1),
4215 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4216 }
4217 case Intrinsic::loongarch_lsx_vbitseti_b:
4218 case Intrinsic::loongarch_lasx_xvbitseti_b:
4219 return lowerVectorBitSetImm<3>(N, DAG);
4220 case Intrinsic::loongarch_lsx_vbitseti_h:
4221 case Intrinsic::loongarch_lasx_xvbitseti_h:
4222 return lowerVectorBitSetImm<4>(N, DAG);
4223 case Intrinsic::loongarch_lsx_vbitseti_w:
4224 case Intrinsic::loongarch_lasx_xvbitseti_w:
4225 return lowerVectorBitSetImm<5>(N, DAG);
4226 case Intrinsic::loongarch_lsx_vbitseti_d:
4227 case Intrinsic::loongarch_lasx_xvbitseti_d:
4228 return lowerVectorBitSetImm<6>(N, DAG);
4229 case Intrinsic::loongarch_lsx_vbitrev_b:
4230 case Intrinsic::loongarch_lsx_vbitrev_h:
4231 case Intrinsic::loongarch_lsx_vbitrev_w:
4232 case Intrinsic::loongarch_lsx_vbitrev_d:
4233 case Intrinsic::loongarch_lasx_xvbitrev_b:
4234 case Intrinsic::loongarch_lasx_xvbitrev_h:
4235 case Intrinsic::loongarch_lasx_xvbitrev_w:
4236 case Intrinsic::loongarch_lasx_xvbitrev_d: {
4237 EVT VecTy = N->getValueType(0);
4238 SDValue One = DAG.getConstant(1, DL, VecTy);
4239 return DAG.getNode(
4240 ISD::XOR, DL, VecTy, N->getOperand(1),
4241 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4242 }
4243 case Intrinsic::loongarch_lsx_vbitrevi_b:
4244 case Intrinsic::loongarch_lasx_xvbitrevi_b:
4245 return lowerVectorBitRevImm<3>(N, DAG);
4246 case Intrinsic::loongarch_lsx_vbitrevi_h:
4247 case Intrinsic::loongarch_lasx_xvbitrevi_h:
4248 return lowerVectorBitRevImm<4>(N, DAG);
4249 case Intrinsic::loongarch_lsx_vbitrevi_w:
4250 case Intrinsic::loongarch_lasx_xvbitrevi_w:
4251 return lowerVectorBitRevImm<5>(N, DAG);
4252 case Intrinsic::loongarch_lsx_vbitrevi_d:
4253 case Intrinsic::loongarch_lasx_xvbitrevi_d:
4254 return lowerVectorBitRevImm<6>(N, DAG);
4255 case Intrinsic::loongarch_lsx_vfadd_s:
4256 case Intrinsic::loongarch_lsx_vfadd_d:
4257 case Intrinsic::loongarch_lasx_xvfadd_s:
4258 case Intrinsic::loongarch_lasx_xvfadd_d:
4259 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4260 N->getOperand(2));
4261 case Intrinsic::loongarch_lsx_vfsub_s:
4262 case Intrinsic::loongarch_lsx_vfsub_d:
4263 case Intrinsic::loongarch_lasx_xvfsub_s:
4264 case Intrinsic::loongarch_lasx_xvfsub_d:
4265 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4266 N->getOperand(2));
4267 case Intrinsic::loongarch_lsx_vfmul_s:
4268 case Intrinsic::loongarch_lsx_vfmul_d:
4269 case Intrinsic::loongarch_lasx_xvfmul_s:
4270 case Intrinsic::loongarch_lasx_xvfmul_d:
4271 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4272 N->getOperand(2));
4273 case Intrinsic::loongarch_lsx_vfdiv_s:
4274 case Intrinsic::loongarch_lsx_vfdiv_d:
4275 case Intrinsic::loongarch_lasx_xvfdiv_s:
4276 case Intrinsic::loongarch_lasx_xvfdiv_d:
4277 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4278 N->getOperand(2));
4279 case Intrinsic::loongarch_lsx_vfmadd_s:
4280 case Intrinsic::loongarch_lsx_vfmadd_d:
4281 case Intrinsic::loongarch_lasx_xvfmadd_s:
4282 case Intrinsic::loongarch_lasx_xvfmadd_d:
4283 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4284 N->getOperand(2), N->getOperand(3));
4285 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4286 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4287 N->getOperand(1), N->getOperand(2),
4288 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4289 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4290 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4291 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4292 N->getOperand(1), N->getOperand(2),
4293 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4294 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4295 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4296 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4297 N->getOperand(1), N->getOperand(2),
4298 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4299 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4300 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4301 N->getOperand(1), N->getOperand(2),
4302 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4303 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4304 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4305 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4306 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4307 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4308 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4309 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4310 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
4311 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
4312 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4313 N->getOperand(1)));
4314 case Intrinsic::loongarch_lsx_vreplve_b:
4315 case Intrinsic::loongarch_lsx_vreplve_h:
4316 case Intrinsic::loongarch_lsx_vreplve_w:
4317 case Intrinsic::loongarch_lsx_vreplve_d:
4318 case Intrinsic::loongarch_lasx_xvreplve_b:
4319 case Intrinsic::loongarch_lasx_xvreplve_h:
4320 case Intrinsic::loongarch_lasx_xvreplve_w:
4321 case Intrinsic::loongarch_lasx_xvreplve_d:
4322 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4323 N->getOperand(1),
4324 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4325 N->getOperand(2)));
4326 }
4327 return SDValue();
4328}
4329
4331 DAGCombinerInfo &DCI) const {
4332 SelectionDAG &DAG = DCI.DAG;
4333 switch (N->getOpcode()) {
4334 default:
4335 break;
4336 case ISD::AND:
4337 return performANDCombine(N, DAG, DCI, Subtarget);
4338 case ISD::OR:
4339 return performORCombine(N, DAG, DCI, Subtarget);
4340 case ISD::SETCC:
4341 return performSETCCCombine(N, DAG, DCI, Subtarget);
4342 case ISD::SRL:
4343 return performSRLCombine(N, DAG, DCI, Subtarget);
4345 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4347 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4348 }
4349 return SDValue();
4350}
4351
4354 if (!ZeroDivCheck)
4355 return MBB;
4356
4357 // Build instructions:
4358 // MBB:
4359 // div(or mod) $dst, $dividend, $divisor
4360 // bnez $divisor, SinkMBB
4361 // BreakMBB:
4362 // break 7 // BRK_DIVZERO
4363 // SinkMBB:
4364 // fallthrough
4365 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4367 MachineFunction *MF = MBB->getParent();
4368 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4369 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4370 MF->insert(It, BreakMBB);
4371 MF->insert(It, SinkMBB);
4372
4373 // Transfer the remainder of MBB and its successor edges to SinkMBB.
4374 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4375 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4376
4377 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4378 DebugLoc DL = MI.getDebugLoc();
4379 MachineOperand &Divisor = MI.getOperand(2);
4380 Register DivisorReg = Divisor.getReg();
4381
4382 // MBB:
4383 BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4384 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4385 .addMBB(SinkMBB);
4386 MBB->addSuccessor(BreakMBB);
4387 MBB->addSuccessor(SinkMBB);
4388
4389 // BreakMBB:
4390 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
4391 // definition of BRK_DIVZERO.
4392 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4393 BreakMBB->addSuccessor(SinkMBB);
4394
4395 // Clear Divisor's kill flag.
4396 Divisor.setIsKill(false);
4397
4398 return SinkMBB;
4399}
4400
4401static MachineBasicBlock *
4403 const LoongArchSubtarget &Subtarget) {
4404 unsigned CondOpc;
4405 switch (MI.getOpcode()) {
4406 default:
4407 llvm_unreachable("Unexpected opcode");
4408 case LoongArch::PseudoVBZ:
4409 CondOpc = LoongArch::VSETEQZ_V;
4410 break;
4411 case LoongArch::PseudoVBZ_B:
4412 CondOpc = LoongArch::VSETANYEQZ_B;
4413 break;
4414 case LoongArch::PseudoVBZ_H:
4415 CondOpc = LoongArch::VSETANYEQZ_H;
4416 break;
4417 case LoongArch::PseudoVBZ_W:
4418 CondOpc = LoongArch::VSETANYEQZ_W;
4419 break;
4420 case LoongArch::PseudoVBZ_D:
4421 CondOpc = LoongArch::VSETANYEQZ_D;
4422 break;
4423 case LoongArch::PseudoVBNZ:
4424 CondOpc = LoongArch::VSETNEZ_V;
4425 break;
4426 case LoongArch::PseudoVBNZ_B:
4427 CondOpc = LoongArch::VSETALLNEZ_B;
4428 break;
4429 case LoongArch::PseudoVBNZ_H:
4430 CondOpc = LoongArch::VSETALLNEZ_H;
4431 break;
4432 case LoongArch::PseudoVBNZ_W:
4433 CondOpc = LoongArch::VSETALLNEZ_W;
4434 break;
4435 case LoongArch::PseudoVBNZ_D:
4436 CondOpc = LoongArch::VSETALLNEZ_D;
4437 break;
4438 case LoongArch::PseudoXVBZ:
4439 CondOpc = LoongArch::XVSETEQZ_V;
4440 break;
4441 case LoongArch::PseudoXVBZ_B:
4442 CondOpc = LoongArch::XVSETANYEQZ_B;
4443 break;
4444 case LoongArch::PseudoXVBZ_H:
4445 CondOpc = LoongArch::XVSETANYEQZ_H;
4446 break;
4447 case LoongArch::PseudoXVBZ_W:
4448 CondOpc = LoongArch::XVSETANYEQZ_W;
4449 break;
4450 case LoongArch::PseudoXVBZ_D:
4451 CondOpc = LoongArch::XVSETANYEQZ_D;
4452 break;
4453 case LoongArch::PseudoXVBNZ:
4454 CondOpc = LoongArch::XVSETNEZ_V;
4455 break;
4456 case LoongArch::PseudoXVBNZ_B:
4457 CondOpc = LoongArch::XVSETALLNEZ_B;
4458 break;
4459 case LoongArch::PseudoXVBNZ_H:
4460 CondOpc = LoongArch::XVSETALLNEZ_H;
4461 break;
4462 case LoongArch::PseudoXVBNZ_W:
4463 CondOpc = LoongArch::XVSETALLNEZ_W;
4464 break;
4465 case LoongArch::PseudoXVBNZ_D:
4466 CondOpc = LoongArch::XVSETALLNEZ_D;
4467 break;
4468 }
4469
4470 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4471 const BasicBlock *LLVM_BB = BB->getBasicBlock();
4472 DebugLoc DL = MI.getDebugLoc();
4475
4476 MachineFunction *F = BB->getParent();
4477 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4478 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4479 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4480
4481 F->insert(It, FalseBB);
4482 F->insert(It, TrueBB);
4483 F->insert(It, SinkBB);
4484
4485 // Transfer the remainder of MBB and its successor edges to Sink.
4486 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4488
4489 // Insert the real instruction to BB.
4490 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4491 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4492
4493 // Insert branch.
4494 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4495 BB->addSuccessor(FalseBB);
4496 BB->addSuccessor(TrueBB);
4497
4498 // FalseBB.
4499 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4500 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4501 .addReg(LoongArch::R0)
4502 .addImm(0);
4503 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4504 FalseBB->addSuccessor(SinkBB);
4505
4506 // TrueBB.
4507 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4508 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4509 .addReg(LoongArch::R0)
4510 .addImm(1);
4511 TrueBB->addSuccessor(SinkBB);
4512
4513 // SinkBB: merge the results.
4514 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4515 MI.getOperand(0).getReg())
4516 .addReg(RD1)
4517 .addMBB(FalseBB)
4518 .addReg(RD2)
4519 .addMBB(TrueBB);
4520
4521 // The pseudo instruction is gone now.
4522 MI.eraseFromParent();
4523 return SinkBB;
4524}
4525
4526static MachineBasicBlock *
4528 const LoongArchSubtarget &Subtarget) {
4529 unsigned InsOp;
4530 unsigned HalfSize;
4531 switch (MI.getOpcode()) {
4532 default:
4533 llvm_unreachable("Unexpected opcode");
4534 case LoongArch::PseudoXVINSGR2VR_B:
4535 HalfSize = 16;
4536 InsOp = LoongArch::VINSGR2VR_B;
4537 break;
4538 case LoongArch::PseudoXVINSGR2VR_H:
4539 HalfSize = 8;
4540 InsOp = LoongArch::VINSGR2VR_H;
4541 break;
4542 }
4543 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4544 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4545 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4546 DebugLoc DL = MI.getDebugLoc();
4548 // XDst = vector_insert XSrc, Elt, Idx
4549 Register XDst = MI.getOperand(0).getReg();
4550 Register XSrc = MI.getOperand(1).getReg();
4551 Register Elt = MI.getOperand(2).getReg();
4552 unsigned Idx = MI.getOperand(3).getImm();
4553
4554 Register ScratchReg1 = XSrc;
4555 if (Idx >= HalfSize) {
4556 ScratchReg1 = MRI.createVirtualRegister(RC);
4557 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4558 .addReg(XSrc)
4559 .addReg(XSrc)
4560 .addImm(1);
4561 }
4562
4563 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4564 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4565 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4566 .addReg(ScratchReg1, 0, LoongArch::sub_128);
4567 BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4568 .addReg(ScratchSubReg1)
4569 .addReg(Elt)
4570 .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4571
4572 Register ScratchReg2 = XDst;
4573 if (Idx >= HalfSize)
4574 ScratchReg2 = MRI.createVirtualRegister(RC);
4575
4576 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4577 .addImm(0)
4578 .addReg(ScratchSubReg2)
4579 .addImm(LoongArch::sub_128);
4580
4581 if (Idx >= HalfSize)
4582 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4583 .addReg(XSrc)
4584 .addReg(ScratchReg2)
4585 .addImm(2);
4586
4587 MI.eraseFromParent();
4588 return BB;
4589}
4590
4593 const LoongArchSubtarget &Subtarget) {
4594 assert(Subtarget.hasExtLSX());
4595 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4596 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
4597 DebugLoc DL = MI.getDebugLoc();
4599 Register Dst = MI.getOperand(0).getReg();
4600 Register Src = MI.getOperand(1).getReg();
4601 Register ScratchReg1 = MRI.createVirtualRegister(RC);
4602 Register ScratchReg2 = MRI.createVirtualRegister(RC);
4603 Register ScratchReg3 = MRI.createVirtualRegister(RC);
4604
4605 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
4606 BuildMI(*BB, MI, DL,
4607 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
4608 : LoongArch::VINSGR2VR_W),
4609 ScratchReg2)
4610 .addReg(ScratchReg1)
4611 .addReg(Src)
4612 .addImm(0);
4613 BuildMI(
4614 *BB, MI, DL,
4615 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
4616 ScratchReg3)
4617 .addReg(ScratchReg2);
4618 BuildMI(*BB, MI, DL,
4619 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
4620 : LoongArch::VPICKVE2GR_W),
4621 Dst)
4622 .addReg(ScratchReg3)
4623 .addImm(0);
4624
4625 MI.eraseFromParent();
4626 return BB;
4627}
4628
4629MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4630 MachineInstr &MI, MachineBasicBlock *BB) const {
4631 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4632 DebugLoc DL = MI.getDebugLoc();
4633
4634 switch (MI.getOpcode()) {
4635 default:
4636 llvm_unreachable("Unexpected instr type to insert");
4637 case LoongArch::DIV_W:
4638 case LoongArch::DIV_WU:
4639 case LoongArch::MOD_W:
4640 case LoongArch::MOD_WU:
4641 case LoongArch::DIV_D:
4642 case LoongArch::DIV_DU:
4643 case LoongArch::MOD_D:
4644 case LoongArch::MOD_DU:
4645 return insertDivByZeroTrap(MI, BB);
4646 break;
4647 case LoongArch::WRFCSR: {
4648 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4649 LoongArch::FCSR0 + MI.getOperand(0).getImm())
4650 .addReg(MI.getOperand(1).getReg());
4651 MI.eraseFromParent();
4652 return BB;
4653 }
4654 case LoongArch::RDFCSR: {
4655 MachineInstr *ReadFCSR =
4656 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4657 MI.getOperand(0).getReg())
4658 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4659 ReadFCSR->getOperand(1).setIsUndef();
4660 MI.eraseFromParent();
4661 return BB;
4662 }
4663 case LoongArch::PseudoVBZ:
4664 case LoongArch::PseudoVBZ_B:
4665 case LoongArch::PseudoVBZ_H:
4666 case LoongArch::PseudoVBZ_W:
4667 case LoongArch::PseudoVBZ_D:
4668 case LoongArch::PseudoVBNZ:
4669 case LoongArch::PseudoVBNZ_B:
4670 case LoongArch::PseudoVBNZ_H:
4671 case LoongArch::PseudoVBNZ_W:
4672 case LoongArch::PseudoVBNZ_D:
4673 case LoongArch::PseudoXVBZ:
4674 case LoongArch::PseudoXVBZ_B:
4675 case LoongArch::PseudoXVBZ_H:
4676 case LoongArch::PseudoXVBZ_W:
4677 case LoongArch::PseudoXVBZ_D:
4678 case LoongArch::PseudoXVBNZ:
4679 case LoongArch::PseudoXVBNZ_B:
4680 case LoongArch::PseudoXVBNZ_H:
4681 case LoongArch::PseudoXVBNZ_W:
4682 case LoongArch::PseudoXVBNZ_D:
4683 return emitVecCondBranchPseudo(MI, BB, Subtarget);
4684 case LoongArch::PseudoXVINSGR2VR_B:
4685 case LoongArch::PseudoXVINSGR2VR_H:
4686 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4687 case LoongArch::PseudoCTPOP:
4688 return emitPseudoCTPOP(MI, BB, Subtarget);
4689 case TargetOpcode::STATEPOINT:
4690 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
4691 // while bl call instruction (where statepoint will be lowered at the
4692 // end) has implicit def. This def is early-clobber as it will be set at
4693 // the moment of the call and earlier than any use is read.
4694 // Add this implicit dead def here as a workaround.
4695 MI.addOperand(*MI.getMF(),
4697 LoongArch::R1, /*isDef*/ true,
4698 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
4699 /*isUndef*/ false, /*isEarlyClobber*/ true));
4700 if (!Subtarget.is64Bit())
4701 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
4702 return emitPatchPoint(MI, BB);
4703 }
4704}
4705
4707 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4708 unsigned *Fast) const {
4709 if (!Subtarget.hasUAL())
4710 return false;
4711
4712 // TODO: set reasonable speed number.
4713 if (Fast)
4714 *Fast = 1;
4715 return true;
4716}
4717
4718const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4719 switch ((LoongArchISD::NodeType)Opcode) {
4721 break;
4722
4723#define NODE_NAME_CASE(node) \
4724 case LoongArchISD::node: \
4725 return "LoongArchISD::" #node;
4726
4727 // TODO: Add more target-dependent nodes later.
4728 NODE_NAME_CASE(CALL)
4729 NODE_NAME_CASE(CALL_MEDIUM)
4730 NODE_NAME_CASE(CALL_LARGE)
4731 NODE_NAME_CASE(RET)
4732 NODE_NAME_CASE(TAIL)
4733 NODE_NAME_CASE(TAIL_MEDIUM)
4734 NODE_NAME_CASE(TAIL_LARGE)
4735 NODE_NAME_CASE(SLL_W)
4736 NODE_NAME_CASE(SRA_W)
4737 NODE_NAME_CASE(SRL_W)
4738 NODE_NAME_CASE(BSTRINS)
4739 NODE_NAME_CASE(BSTRPICK)
4740 NODE_NAME_CASE(MOVGR2FR_W_LA64)
4741 NODE_NAME_CASE(MOVFR2GR_S_LA64)
4742 NODE_NAME_CASE(FTINT)
4743 NODE_NAME_CASE(REVB_2H)
4744 NODE_NAME_CASE(REVB_2W)
4745 NODE_NAME_CASE(BITREV_4B)
4746 NODE_NAME_CASE(BITREV_8B)
4747 NODE_NAME_CASE(BITREV_W)
4748 NODE_NAME_CASE(ROTR_W)
4749 NODE_NAME_CASE(ROTL_W)
4750 NODE_NAME_CASE(DIV_W)
4751 NODE_NAME_CASE(DIV_WU)
4752 NODE_NAME_CASE(MOD_W)
4753 NODE_NAME_CASE(MOD_WU)
4754 NODE_NAME_CASE(CLZ_W)
4755 NODE_NAME_CASE(CTZ_W)
4756 NODE_NAME_CASE(DBAR)
4757 NODE_NAME_CASE(IBAR)
4758 NODE_NAME_CASE(BREAK)
4759 NODE_NAME_CASE(SYSCALL)
4760 NODE_NAME_CASE(CRC_W_B_W)
4761 NODE_NAME_CASE(CRC_W_H_W)
4762 NODE_NAME_CASE(CRC_W_W_W)
4763 NODE_NAME_CASE(CRC_W_D_W)
4764 NODE_NAME_CASE(CRCC_W_B_W)
4765 NODE_NAME_CASE(CRCC_W_H_W)
4766 NODE_NAME_CASE(CRCC_W_W_W)
4767 NODE_NAME_CASE(CRCC_W_D_W)
4768 NODE_NAME_CASE(CSRRD)
4769 NODE_NAME_CASE(CSRWR)
4770 NODE_NAME_CASE(CSRXCHG)
4771 NODE_NAME_CASE(IOCSRRD_B)
4772 NODE_NAME_CASE(IOCSRRD_H)
4773 NODE_NAME_CASE(IOCSRRD_W)
4774 NODE_NAME_CASE(IOCSRRD_D)
4775 NODE_NAME_CASE(IOCSRWR_B)
4776 NODE_NAME_CASE(IOCSRWR_H)
4777 NODE_NAME_CASE(IOCSRWR_W)
4778 NODE_NAME_CASE(IOCSRWR_D)
4779 NODE_NAME_CASE(CPUCFG)
4780 NODE_NAME_CASE(MOVGR2FCSR)
4781 NODE_NAME_CASE(MOVFCSR2GR)
4782 NODE_NAME_CASE(CACOP_D)
4783 NODE_NAME_CASE(CACOP_W)
4784 NODE_NAME_CASE(VSHUF)
4785 NODE_NAME_CASE(VPICKEV)
4786 NODE_NAME_CASE(VPICKOD)
4787 NODE_NAME_CASE(VPACKEV)
4788 NODE_NAME_CASE(VPACKOD)
4789 NODE_NAME_CASE(VILVL)
4790 NODE_NAME_CASE(VILVH)
4791 NODE_NAME_CASE(VSHUF4I)
4792 NODE_NAME_CASE(VREPLVEI)
4793 NODE_NAME_CASE(VREPLGR2VR)
4794 NODE_NAME_CASE(XVPERMI)
4795 NODE_NAME_CASE(VPICK_SEXT_ELT)
4796 NODE_NAME_CASE(VPICK_ZEXT_ELT)
4797 NODE_NAME_CASE(VREPLVE)
4798 NODE_NAME_CASE(VALL_ZERO)
4799 NODE_NAME_CASE(VANY_ZERO)
4800 NODE_NAME_CASE(VALL_NONZERO)
4801 NODE_NAME_CASE(VANY_NONZERO)
4802 NODE_NAME_CASE(FRECIPE)
4803 NODE_NAME_CASE(FRSQRTE)
4804 }
4805#undef NODE_NAME_CASE
4806 return nullptr;
4807}
4808
4809//===----------------------------------------------------------------------===//
4810// Calling Convention Implementation
4811//===----------------------------------------------------------------------===//
4812
4813// Eight general-purpose registers a0-a7 used for passing integer arguments,
4814// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4815// fixed-point arguments, and floating-point arguments when no FPR is available
4816// or with soft float ABI.
4817const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4818 LoongArch::R7, LoongArch::R8, LoongArch::R9,
4819 LoongArch::R10, LoongArch::R11};
4820// Eight floating-point registers fa0-fa7 used for passing floating-point
4821// arguments, and fa0-fa1 are also used to return values.
4822const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4823 LoongArch::F3, LoongArch::F4, LoongArch::F5,
4824 LoongArch::F6, LoongArch::F7};
4825// FPR32 and FPR64 alias each other.
4827 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4828 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4829
4830const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4831 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4832 LoongArch::VR6, LoongArch::VR7};
4833
4834const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4835 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4836 LoongArch::XR6, LoongArch::XR7};
4837
4838// Pass a 2*GRLen argument that has been split into two GRLen values through
4839// registers or the stack as necessary.
4840static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4841 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4842 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4843 ISD::ArgFlagsTy ArgFlags2) {
4844 unsigned GRLenInBytes = GRLen / 8;
4845 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4846 // At least one half can be passed via register.
4847 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4848 VA1.getLocVT(), CCValAssign::Full));
4849 } else {
4850 // Both halves must be passed on the stack, with proper alignment.
4851 Align StackAlign =
4852 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4853 State.addLoc(
4855 State.AllocateStack(GRLenInBytes, StackAlign),
4856 VA1.getLocVT(), CCValAssign::Full));
4858 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4859 LocVT2, CCValAssign::Full));
4860 return false;
4861 }
4862 if (Register Reg = State.AllocateReg(ArgGPRs)) {
4863 // The second half can also be passed via register.
4864 State.addLoc(
4865 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4866 } else {
4867 // The second half is passed via the stack, without additional alignment.
4869 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4870 LocVT2, CCValAssign::Full));
4871 }
4872 return false;
4873}
4874
4875// Implements the LoongArch calling convention. Returns true upon failure.
4877 unsigned ValNo, MVT ValVT,
4878 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4879 CCState &State, bool IsFixed, bool IsRet,
4880 Type *OrigTy) {
4881 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4882 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4883 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4884 MVT LocVT = ValVT;
4885
4886 // Any return value split into more than two values can't be returned
4887 // directly.
4888 if (IsRet && ValNo > 1)
4889 return true;
4890
4891 // If passing a variadic argument, or if no FPR is available.
4892 bool UseGPRForFloat = true;
4893
4894 switch (ABI) {
4895 default:
4896 llvm_unreachable("Unexpected ABI");
4897 break;
4902 UseGPRForFloat = !IsFixed;
4903 break;
4906 break;
4907 }
4908
4909 // FPR32 and FPR64 alias each other.
4910 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4911 UseGPRForFloat = true;
4912
4913 if (UseGPRForFloat && ValVT == MVT::f32) {
4914 LocVT = GRLenVT;
4915 LocInfo = CCValAssign::BCvt;
4916 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4917 LocVT = MVT::i64;
4918 LocInfo = CCValAssign::BCvt;
4919 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4920 // TODO: Handle passing f64 on LA32 with D feature.
4921 report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4922 }
4923
4924 // If this is a variadic argument, the LoongArch calling convention requires
4925 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4926 // byte alignment. An aligned register should be used regardless of whether
4927 // the original argument was split during legalisation or not. The argument
4928 // will not be passed by registers if the original type is larger than
4929 // 2*GRLen, so the register alignment rule does not apply.
4930 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4931 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4932 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4933 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4934 // Skip 'odd' register if necessary.
4935 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4936 State.AllocateReg(ArgGPRs);
4937 }
4938
4939 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4940 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4941 State.getPendingArgFlags();
4942
4943 assert(PendingLocs.size() == PendingArgFlags.size() &&
4944 "PendingLocs and PendingArgFlags out of sync");
4945
4946 // Split arguments might be passed indirectly, so keep track of the pending
4947 // values.
4948 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4949 LocVT = GRLenVT;
4950 LocInfo = CCValAssign::Indirect;
4951 PendingLocs.push_back(
4952 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4953 PendingArgFlags.push_back(ArgFlags);
4954 if (!ArgFlags.isSplitEnd()) {
4955 return false;
4956 }
4957 }
4958
4959 // If the split argument only had two elements, it should be passed directly
4960 // in registers or on the stack.
4961 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4962 PendingLocs.size() <= 2) {
4963 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4964 // Apply the normal calling convention rules to the first half of the
4965 // split argument.
4966 CCValAssign VA = PendingLocs[0];
4967 ISD::ArgFlagsTy AF = PendingArgFlags[0];
4968 PendingLocs.clear();
4969 PendingArgFlags.clear();
4970 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4971 ArgFlags);
4972 }
4973
4974 // Allocate to a register if possible, or else a stack slot.
4975 Register Reg;
4976 unsigned StoreSizeBytes = GRLen / 8;
4977 Align StackAlign = Align(GRLen / 8);
4978
4979 if (ValVT == MVT::f32 && !UseGPRForFloat)
4980 Reg = State.AllocateReg(ArgFPR32s);
4981 else if (ValVT == MVT::f64 && !UseGPRForFloat)
4982 Reg = State.AllocateReg(ArgFPR64s);
4983 else if (ValVT.is128BitVector())
4984 Reg = State.AllocateReg(ArgVRs);
4985 else if (ValVT.is256BitVector())
4986 Reg = State.AllocateReg(ArgXRs);
4987 else
4988 Reg = State.AllocateReg(ArgGPRs);
4989
4990 unsigned StackOffset =
4991 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4992
4993 // If we reach this point and PendingLocs is non-empty, we must be at the
4994 // end of a split argument that must be passed indirectly.
4995 if (!PendingLocs.empty()) {
4996 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4997 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4998 for (auto &It : PendingLocs) {
4999 if (Reg)
5000 It.convertToReg(Reg);
5001 else
5002 It.convertToMem(StackOffset);
5003 State.addLoc(It);
5004 }
5005 PendingLocs.clear();
5006 PendingArgFlags.clear();
5007 return false;
5008 }
5009 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
5010 "Expected an GRLenVT at this stage");
5011
5012 if (Reg) {
5013 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5014 return false;
5015 }
5016
5017 // When a floating-point value is passed on the stack, no bit-cast is needed.
5018 if (ValVT.isFloatingPoint()) {
5019 LocVT = ValVT;
5020 LocInfo = CCValAssign::Full;
5021 }
5022
5023 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
5024 return false;
5025}
5026
5027void LoongArchTargetLowering::analyzeInputArgs(
5028 MachineFunction &MF, CCState &CCInfo,
5029 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
5030 LoongArchCCAssignFn Fn) const {
5032 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5033 MVT ArgVT = Ins[i].VT;
5034 Type *ArgTy = nullptr;
5035 if (IsRet)
5036 ArgTy = FType->getReturnType();
5037 else if (Ins[i].isOrigArg())
5038 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
5041 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
5042 CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
5043 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
5044 << '\n');
5045 llvm_unreachable("");
5046 }
5047 }
5048}
5049
5050void LoongArchTargetLowering::analyzeOutputArgs(
5051 MachineFunction &MF, CCState &CCInfo,
5052 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
5053 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
5054 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5055 MVT ArgVT = Outs[i].VT;
5056 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
5059 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
5060 CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
5061 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
5062 << "\n");
5063 llvm_unreachable("");
5064 }
5065 }
5066}
5067
5068// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
5069// values.
5071 const CCValAssign &VA, const SDLoc &DL) {
5072 switch (VA.getLocInfo()) {
5073 default:
5074 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5075 case CCValAssign::Full:
5077 break;
5078 case CCValAssign::BCvt:
5079 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5080 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
5081 else
5082 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5083 break;
5084 }
5085 return Val;
5086}
5087
5089 const CCValAssign &VA, const SDLoc &DL,
5090 const ISD::InputArg &In,
5091 const LoongArchTargetLowering &TLI) {
5094 EVT LocVT = VA.getLocVT();
5095 SDValue Val;
5096 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
5097 Register VReg = RegInfo.createVirtualRegister(RC);
5098 RegInfo.addLiveIn(VA.getLocReg(), VReg);
5099 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
5100
5101 // If input is sign extended from 32 bits, note it for the OptW pass.
5102 if (In.isOrigArg()) {
5103 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
5104 if (OrigArg->getType()->isIntegerTy()) {
5105 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
5106 // An input zero extended from i31 can also be considered sign extended.
5107 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
5108 (BitWidth < 32 && In.Flags.isZExt())) {
5111 LAFI->addSExt32Register(VReg);
5112 }
5113 }
5114 }
5115
5116 return convertLocVTToValVT(DAG, Val, VA, DL);
5117}
5118
5119// The caller is responsible for loading the full value if the argument is
5120// passed with CCValAssign::Indirect.
5122 const CCValAssign &VA, const SDLoc &DL) {
5124 MachineFrameInfo &MFI = MF.getFrameInfo();
5125 EVT ValVT = VA.getValVT();
5126 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
5127 /*IsImmutable=*/true);
5128 SDValue FIN = DAG.getFrameIndex(
5130
5131 ISD::LoadExtType ExtType;
5132 switch (VA.getLocInfo()) {
5133 default:
5134 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5135 case CCValAssign::Full:
5137 case CCValAssign::BCvt:
5138 ExtType = ISD::NON_EXTLOAD;
5139 break;
5140 }
5141 return DAG.getExtLoad(
5142 ExtType, DL, VA.getLocVT(), Chain, FIN,
5144}
5145
5147 const CCValAssign &VA, const SDLoc &DL) {
5148 EVT LocVT = VA.getLocVT();
5149
5150 switch (VA.getLocInfo()) {
5151 default:
5152 llvm_unreachable("Unexpected CCValAssign::LocInfo");
5153 case CCValAssign::Full:
5154 break;
5155 case CCValAssign::BCvt:
5156 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
5157 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
5158 else
5159 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5160 break;
5161 }
5162 return Val;
5163}
5164
5165static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5166 CCValAssign::LocInfo LocInfo,
5167 ISD::ArgFlagsTy ArgFlags, CCState &State) {
5168 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5169 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5170 // s0 s1 s2 s3 s4 s5 s6 s7 s8
5171 static const MCPhysReg GPRList[] = {
5172 LoongArch::R23, LoongArch::R24, LoongArch::R25,
5173 LoongArch::R26, LoongArch::R27, LoongArch::R28,
5174 LoongArch::R29, LoongArch::R30, LoongArch::R31};
5175 if (MCRegister Reg = State.AllocateReg(GPRList)) {
5176 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5177 return false;
5178 }
5179 }
5180
5181 if (LocVT == MVT::f32) {
5182 // Pass in STG registers: F1, F2, F3, F4
5183 // fs0,fs1,fs2,fs3
5184 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5185 LoongArch::F26, LoongArch::F27};
5186 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
5187 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5188 return false;
5189 }
5190 }
5191
5192 if (LocVT == MVT::f64) {
5193 // Pass in STG registers: D1, D2, D3, D4
5194 // fs4,fs5,fs6,fs7
5195 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5196 LoongArch::F30_64, LoongArch::F31_64};
5197 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
5198 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5199 return false;
5200 }
5201 }
5202
5203 report_fatal_error("No registers left in GHC calling convention");
5204 return true;
5205}
5206
5207// Transform physical registers into virtual registers.
5209 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5210 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5211 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5212
5214
5215 switch (CallConv) {
5216 default:
5217 llvm_unreachable("Unsupported calling convention");
5218 case CallingConv::C:
5219 case CallingConv::Fast:
5220 break;
5221 case CallingConv::GHC:
5222 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5223 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5225 "GHC calling convention requires the F and D extensions");
5226 }
5227
5228 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5229 MVT GRLenVT = Subtarget.getGRLenVT();
5230 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5231 // Used with varargs to acumulate store chains.
5232 std::vector<SDValue> OutChains;
5233
5234 // Assign locations to all of the incoming arguments.
5236 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5237
5238 if (CallConv == CallingConv::GHC)
5240 else
5241 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5242
5243 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5244 CCValAssign &VA = ArgLocs[i];
5245 SDValue ArgValue;
5246 if (VA.isRegLoc())
5247 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5248 else
5249 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5250 if (VA.getLocInfo() == CCValAssign::Indirect) {
5251 // If the original argument was split and passed by reference, we need to
5252 // load all parts of it here (using the same address).
5253 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5255 unsigned ArgIndex = Ins[i].OrigArgIndex;
5256 unsigned ArgPartOffset = Ins[i].PartOffset;
5257 assert(ArgPartOffset == 0);
5258 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5259 CCValAssign &PartVA = ArgLocs[i + 1];
5260 unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5261 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5262 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5263 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5265 ++i;
5266 }
5267 continue;
5268 }
5269 InVals.push_back(ArgValue);
5270 }
5271
5272 if (IsVarArg) {
5274 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5275 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5276 MachineFrameInfo &MFI = MF.getFrameInfo();
5277 MachineRegisterInfo &RegInfo = MF.getRegInfo();
5278 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5279
5280 // Offset of the first variable argument from stack pointer, and size of
5281 // the vararg save area. For now, the varargs save area is either zero or
5282 // large enough to hold a0-a7.
5283 int VaArgOffset, VarArgsSaveSize;
5284
5285 // If all registers are allocated, then all varargs must be passed on the
5286 // stack and we don't need to save any argregs.
5287 if (ArgRegs.size() == Idx) {
5288 VaArgOffset = CCInfo.getStackSize();
5289 VarArgsSaveSize = 0;
5290 } else {
5291 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5292 VaArgOffset = -VarArgsSaveSize;
5293 }
5294
5295 // Record the frame index of the first variable argument
5296 // which is a value necessary to VASTART.
5297 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5298 LoongArchFI->setVarArgsFrameIndex(FI);
5299
5300 // If saving an odd number of registers then create an extra stack slot to
5301 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5302 // offsets to even-numbered registered remain 2*GRLen-aligned.
5303 if (Idx % 2) {
5304 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5305 true);
5306 VarArgsSaveSize += GRLenInBytes;
5307 }
5308
5309 // Copy the integer registers that may have been used for passing varargs
5310 // to the vararg save area.
5311 for (unsigned I = Idx; I < ArgRegs.size();
5312 ++I, VaArgOffset += GRLenInBytes) {
5313 const Register Reg = RegInfo.createVirtualRegister(RC);
5314 RegInfo.addLiveIn(ArgRegs[I], Reg);
5315 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5316 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5317 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5318 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5320 cast<StoreSDNode>(Store.getNode())
5321 ->getMemOperand()
5322 ->setValue((Value *)nullptr);
5323 OutChains.push_back(Store);
5324 }
5325 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5326 }
5327
5328 // All stores are grouped in one node to allow the matching between
5329 // the size of Ins and InVals. This only happens for vararg functions.
5330 if (!OutChains.empty()) {
5331 OutChains.push_back(Chain);
5332 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5333 }
5334
5335 return Chain;
5336}
5337
5339 return CI->isTailCall();
5340}
5341
5342// Check if the return value is used as only a return value, as otherwise
5343// we can't perform a tail-call.
5345 SDValue &Chain) const {
5346 if (N->getNumValues() != 1)
5347 return false;
5348 if (!N->hasNUsesOfValue(1, 0))
5349 return false;
5350
5351 SDNode *Copy = *N->user_begin();
5352 if (Copy->getOpcode() != ISD::CopyToReg)
5353 return false;
5354
5355 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
5356 // isn't safe to perform a tail call.
5357 if (Copy->getGluedNode())
5358 return false;
5359
5360 // The copy must be used by a LoongArchISD::RET, and nothing else.
5361 bool HasRet = false;
5362 for (SDNode *Node : Copy->users()) {
5363 if (Node->getOpcode() != LoongArchISD::RET)
5364 return false;
5365 HasRet = true;
5366 }
5367
5368 if (!HasRet)
5369 return false;
5370
5371 Chain = Copy->getOperand(0);
5372 return true;
5373}
5374
5375// Check whether the call is eligible for tail call optimization.
5376bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5377 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5378 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5379
5380 auto CalleeCC = CLI.CallConv;
5381 auto &Outs = CLI.Outs;
5382 auto &Caller = MF.getFunction();
5383 auto CallerCC = Caller.getCallingConv();
5384
5385 // Do not tail call opt if the stack is used to pass parameters.
5386 if (CCInfo.getStackSize() != 0)
5387 return false;
5388
5389 // Do not tail call opt if any parameters need to be passed indirectly.
5390 for (auto &VA : ArgLocs)
5391 if (VA.getLocInfo() == CCValAssign::Indirect)
5392 return false;
5393
5394 // Do not tail call opt if either caller or callee uses struct return
5395 // semantics.
5396 auto IsCallerStructRet = Caller.hasStructRetAttr();
5397 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5398 if (IsCallerStructRet || IsCalleeStructRet)
5399 return false;
5400
5401 // Do not tail call opt if either the callee or caller has a byval argument.
5402 for (auto &Arg : Outs)
5403 if (Arg.Flags.isByVal())
5404 return false;
5405
5406 // The callee has to preserve all registers the caller needs to preserve.
5407 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5408 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5409 if (CalleeCC != CallerCC) {
5410 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5411 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5412 return false;
5413 }
5414 return true;
5415}
5416
5418 return DAG.getDataLayout().getPrefTypeAlign(
5419 VT.getTypeForEVT(*DAG.getContext()));
5420}
5421
5422// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5423// and output parameter nodes.
5424SDValue
5426 SmallVectorImpl<SDValue> &InVals) const {
5427 SelectionDAG &DAG = CLI.DAG;
5428 SDLoc &DL = CLI.DL;
5430 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5432 SDValue Chain = CLI.Chain;
5433 SDValue Callee = CLI.Callee;
5434 CallingConv::ID CallConv = CLI.CallConv;
5435 bool IsVarArg = CLI.IsVarArg;
5436 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5437 MVT GRLenVT = Subtarget.getGRLenVT();
5438 bool &IsTailCall = CLI.IsTailCall;
5439
5441
5442 // Analyze the operands of the call, assigning locations to each operand.
5444 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5445
5446 if (CallConv == CallingConv::GHC)
5447 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5448 else
5449 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5450
5451 // Check if it's really possible to do a tail call.
5452 if (IsTailCall)
5453 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5454
5455 if (IsTailCall)
5456 ++NumTailCalls;
5457 else if (CLI.CB && CLI.CB->isMustTailCall())
5458 report_fatal_error("failed to perform tail call elimination on a call "
5459 "site marked musttail");
5460
5461 // Get a count of how many bytes are to be pushed on the stack.
5462 unsigned NumBytes = ArgCCInfo.getStackSize();
5463
5464 // Create local copies for byval args.
5465 SmallVector<SDValue> ByValArgs;
5466 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5467 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5468 if (!Flags.isByVal())
5469 continue;
5470
5471 SDValue Arg = OutVals[i];
5472 unsigned Size = Flags.getByValSize();
5473 Align Alignment = Flags.getNonZeroByValAlign();
5474
5475 int FI =
5476 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5477 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5478 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5479
5480 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5481 /*IsVolatile=*/false,
5482 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5484 ByValArgs.push_back(FIPtr);
5485 }
5486
5487 if (!IsTailCall)
5488 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5489
5490 // Copy argument values to their designated locations.
5492 SmallVector<SDValue> MemOpChains;
5493 SDValue StackPtr;
5494 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5495 CCValAssign &VA = ArgLocs[i];
5496 SDValue ArgValue = OutVals[i];
5497 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5498
5499 // Promote the value if needed.
5500 // For now, only handle fully promoted and indirect arguments.
5501 if (VA.getLocInfo() == CCValAssign::Indirect) {
5502 // Store the argument in a stack slot and pass its address.
5503 Align StackAlign =
5504 std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5505 getPrefTypeAlign(ArgValue.getValueType(), DAG));
5506 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5507 // If the original argument was split and passed by reference, we need to
5508 // store the required parts of it here (and pass just one address).
5509 unsigned ArgIndex = Outs[i].OrigArgIndex;
5510 unsigned ArgPartOffset = Outs[i].PartOffset;
5511 assert(ArgPartOffset == 0);
5512 // Calculate the total size to store. We don't have access to what we're
5513 // actually storing other than performing the loop and collecting the
5514 // info.
5516 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5517 SDValue PartValue = OutVals[i + 1];
5518 unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5519 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5520 EVT PartVT = PartValue.getValueType();
5521
5522 StoredSize += PartVT.getStoreSize();
5523 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5524 Parts.push_back(std::make_pair(PartValue, Offset));
5525 ++i;
5526 }
5527 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5528 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5529 MemOpChains.push_back(
5530 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5532 for (const auto &Part : Parts) {
5533 SDValue PartValue = Part.first;
5534 SDValue PartOffset = Part.second;
5536 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5537 MemOpChains.push_back(
5538 DAG.getStore(Chain, DL, PartValue, Address,
5540 }
5541 ArgValue = SpillSlot;
5542 } else {
5543 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5544 }
5545
5546 // Use local copy if it is a byval arg.
5547 if (Flags.isByVal())
5548 ArgValue = ByValArgs[j++];
5549
5550 if (VA.isRegLoc()) {
5551 // Queue up the argument copies and emit them at the end.
5552 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5553 } else {
5554 assert(VA.isMemLoc() && "Argument not register or memory");
5555 assert(!IsTailCall && "Tail call not allowed if stack is used "
5556 "for passing parameters");
5557
5558 // Work out the address of the stack slot.
5559 if (!StackPtr.getNode())
5560 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5562 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5564
5565 // Emit the store.
5566 MemOpChains.push_back(
5567 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5568 }
5569 }
5570
5571 // Join the stores, which are independent of one another.
5572 if (!MemOpChains.empty())
5573 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5574
5575 SDValue Glue;
5576
5577 // Build a sequence of copy-to-reg nodes, chained and glued together.
5578 for (auto &Reg : RegsToPass) {
5579 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5580 Glue = Chain.getValue(1);
5581 }
5582
5583 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5584 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5585 // split it and then direct call can be matched by PseudoCALL.
5586 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5587 const GlobalValue *GV = S->getGlobal();
5588 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5591 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5592 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5593 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5596 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5597 }
5598
5599 // The first call operand is the chain and the second is the target address.
5601 Ops.push_back(Chain);
5602 Ops.push_back(Callee);
5603
5604 // Add argument registers to the end of the list so that they are
5605 // known live into the call.
5606 for (auto &Reg : RegsToPass)
5607 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5608
5609 if (!IsTailCall) {
5610 // Add a register mask operand representing the call-preserved registers.
5611 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5612 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5613 assert(Mask && "Missing call preserved mask for calling convention");
5614 Ops.push_back(DAG.getRegisterMask(Mask));
5615 }
5616
5617 // Glue the call to the argument copies, if any.
5618 if (Glue.getNode())
5619 Ops.push_back(Glue);
5620
5621 // Emit the call.
5622 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5623 unsigned Op;
5624 switch (DAG.getTarget().getCodeModel()) {
5625 default:
5626 report_fatal_error("Unsupported code model");
5627 case CodeModel::Small:
5628 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5629 break;
5630 case CodeModel::Medium:
5631 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5633 break;
5634 case CodeModel::Large:
5635 assert(Subtarget.is64Bit() && "Large code model requires LA64");
5637 break;
5638 }
5639
5640 if (IsTailCall) {
5642 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5643 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5644 return Ret;
5645 }
5646
5647 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5648 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5649 Glue = Chain.getValue(1);
5650
5651 // Mark the end of the call, which is glued to the call itself.
5652 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5653 Glue = Chain.getValue(1);
5654
5655 // Assign locations to each value returned by this call.
5657 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5658 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5659
5660 // Copy all of the result registers out of their specified physreg.
5661 for (auto &VA : RVLocs) {
5662 // Copy the value out.
5663 SDValue RetValue =
5664 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5665 // Glue the RetValue to the end of the call sequence.
5666 Chain = RetValue.getValue(1);
5667 Glue = RetValue.getValue(2);
5668
5669 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5670
5671 InVals.push_back(RetValue);
5672 }
5673
5674 return Chain;
5675}
5676
5678 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5679 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5681 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5682
5683 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5684 LoongArchABI::ABI ABI =
5685 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5686 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5687 Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5688 nullptr))
5689 return false;
5690 }
5691 return true;
5692}
5693
5695 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5697 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5698 SelectionDAG &DAG) const {
5699 // Stores the assignment of the return value to a location.
5701
5702 // Info about the registers and stack slot.
5703 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5704 *DAG.getContext());
5705
5706 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5707 nullptr, CC_LoongArch);
5708 if (CallConv == CallingConv::GHC && !RVLocs.empty())
5709 report_fatal_error("GHC functions return void only");
5710 SDValue Glue;
5711 SmallVector<SDValue, 4> RetOps(1, Chain);
5712
5713 // Copy the result values into the output registers.
5714 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5715 CCValAssign &VA = RVLocs[i];
5716 assert(VA.isRegLoc() && "Can only return in registers!");
5717
5718 // Handle a 'normal' return.
5719 SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5720 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5721
5722 // Guarantee that all emitted copies are stuck together.
5723 Glue = Chain.getValue(1);
5724 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5725 }
5726
5727 RetOps[0] = Chain; // Update chain.
5728
5729 // Add the glue node if we have it.
5730 if (Glue.getNode())
5731 RetOps.push_back(Glue);
5732
5733 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5734}
5735
5737 EVT VT) const {
5738 if (!Subtarget.hasExtLSX())
5739 return false;
5740
5741 if (VT == MVT::f32) {
5742 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
5743 return (masked == 0x3e000000 || masked == 0x40000000);
5744 }
5745
5746 if (VT == MVT::f64) {
5747 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
5748 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
5749 }
5750
5751 return false;
5752}
5753
5754bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5755 bool ForCodeSize) const {
5756 // TODO: Maybe need more checks here after vector extension is supported.
5757 if (VT == MVT::f32 && !Subtarget.hasBasicF())
5758 return false;
5759 if (VT == MVT::f64 && !Subtarget.hasBasicD())
5760 return false;
5761 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
5762}
5763
5765 return true;
5766}
5767
5769 return true;
5770}
5771
5772bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5773 const Instruction *I) const {
5774 if (!Subtarget.is64Bit())
5775 return isa<LoadInst>(I) || isa<StoreInst>(I);
5776
5777 if (isa<LoadInst>(I))
5778 return true;
5779
5780 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5781 // require fences beacuse we can use amswap_db.[w/d].
5782 Type *Ty = I->getOperand(0)->getType();
5783 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5784 unsigned Size = Ty->getIntegerBitWidth();
5785 return (Size == 8 || Size == 16);
5786 }
5787
5788 return false;
5789}
5790
5792 LLVMContext &Context,
5793 EVT VT) const {
5794 if (!VT.isVector())
5795 return getPointerTy(DL);
5797}
5798
5800 // TODO: Support vectors.
5801 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5802}
5803
5805 const CallInst &I,
5806 MachineFunction &MF,
5807 unsigned Intrinsic) const {
5808 switch (Intrinsic) {
5809 default:
5810 return false;
5811 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5812 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5813 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5814 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5816 Info.memVT = MVT::i32;
5817 Info.ptrVal = I.getArgOperand(0);
5818 Info.offset = 0;
5819 Info.align = Align(4);
5822 return true;
5823 // TODO: Add more Intrinsics later.
5824 }
5825}
5826
5827// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
5828// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
5829// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
5830// regression, we need to implement it manually.
5833
5835 Op == AtomicRMWInst::And) &&
5836 "Unable to expand");
5837 unsigned MinWordSize = 4;
5838
5839 IRBuilder<> Builder(AI);
5840 LLVMContext &Ctx = Builder.getContext();
5841 const DataLayout &DL = AI->getDataLayout();
5842 Type *ValueType = AI->getType();
5843 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
5844
5845 Value *Addr = AI->getPointerOperand();
5846 PointerType *PtrTy = cast<PointerType>(Addr->getType());
5847 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
5848
5849 Value *AlignedAddr = Builder.CreateIntrinsic(
5850 Intrinsic::ptrmask, {PtrTy, IntTy},
5851 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
5852 "AlignedAddr");
5853
5854 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
5855 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
5856 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
5857 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
5858 Value *Mask = Builder.CreateShl(
5859 ConstantInt::get(WordType,
5860 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
5861 ShiftAmt, "Mask");
5862 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
5863 Value *ValOperand_Shifted =
5864 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
5865 ShiftAmt, "ValOperand_Shifted");
5866 Value *NewOperand;
5867 if (Op == AtomicRMWInst::And)
5868 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
5869 else
5870 NewOperand = ValOperand_Shifted;
5871
5872 AtomicRMWInst *NewAI =
5873 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
5874 AI->getOrdering(), AI->getSyncScopeID());
5875
5876 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
5877 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
5878 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
5879 AI->replaceAllUsesWith(FinalOldResult);
5880 AI->eraseFromParent();
5881}
5882
5885 // TODO: Add more AtomicRMWInst that needs to be extended.
5886
5887 // Since floating-point operation requires a non-trivial set of data
5888 // operations, use CmpXChg to expand.
5889 if (AI->isFloatingPointOperation() ||
5895
5896 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
5899 AI->getOperation() == AtomicRMWInst::Sub)) {
5901 }
5902
5903 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5904 if (Subtarget.hasLAMCAS()) {
5905 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
5909 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
5911 }
5912
5913 if (Size == 8 || Size == 16)
5916}
5917
5918static Intrinsic::ID
5920 AtomicRMWInst::BinOp BinOp) {
5921 if (GRLen == 64) {
5922 switch (BinOp) {
5923 default:
5924 llvm_unreachable("Unexpected AtomicRMW BinOp");
5926 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5927 case AtomicRMWInst::Add:
5928 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5929 case AtomicRMWInst::Sub:
5930 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5932 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5934 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5936 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5937 case AtomicRMWInst::Max:
5938 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5939 case AtomicRMWInst::Min:
5940 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5941 // TODO: support other AtomicRMWInst.
5942 }
5943 }
5944
5945 if (GRLen == 32) {
5946 switch (BinOp) {
5947 default:
5948 llvm_unreachable("Unexpected AtomicRMW BinOp");
5950 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5951 case AtomicRMWInst::Add:
5952 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5953 case AtomicRMWInst::Sub:
5954 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5956 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5957 // TODO: support other AtomicRMWInst.
5958 }
5959 }
5960
5961 llvm_unreachable("Unexpected GRLen\n");
5962}
5963
5966 AtomicCmpXchgInst *CI) const {
5967
5968 if (Subtarget.hasLAMCAS())
5970
5972 if (Size == 8 || Size == 16)
5975}
5976
5978 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5979 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5980 AtomicOrdering FailOrd = CI->getFailureOrdering();
5981 Value *FailureOrdering =
5982 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5983
5984 // TODO: Support cmpxchg on LA32.
5985 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5986 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5987 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5988 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5989 Type *Tys[] = {AlignedAddr->getType()};
5990 Value *Result = Builder.CreateIntrinsic(
5991 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5992 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5993 return Result;
5994}
5995
5997 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5998 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5999 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
6000 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
6001 // mask, as this produces better code than the LL/SC loop emitted by
6002 // int_loongarch_masked_atomicrmw_xchg.
6003 if (AI->getOperation() == AtomicRMWInst::Xchg &&
6004 isa<ConstantInt>(AI->getValOperand())) {
6005 ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
6006 if (CVal->isZero())
6007 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
6008 Builder.CreateNot(Mask, "Inv_Mask"),
6009 AI->getAlign(), Ord);
6010 if (CVal->isMinusOne())
6011 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
6012 AI->getAlign(), Ord);
6013 }
6014
6015 unsigned GRLen = Subtarget.getGRLen();
6016 Value *Ordering =
6017 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
6018 Type *Tys[] = {AlignedAddr->getType()};
6020 AI->getModule(),
6022
6023 if (GRLen == 64) {
6024 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
6025 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
6026 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
6027 }
6028
6029 Value *Result;
6030
6031 // Must pass the shift amount needed to sign extend the loaded value prior
6032 // to performing a signed comparison for min/max. ShiftAmt is the number of
6033 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
6034 // is the number of bits to left+right shift the value in order to
6035 // sign-extend.
6036 if (AI->getOperation() == AtomicRMWInst::Min ||
6038 const DataLayout &DL = AI->getDataLayout();
6039 unsigned ValWidth =
6040 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
6041 Value *SextShamt =
6042 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
6043 Result = Builder.CreateCall(LlwOpScwLoop,
6044 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
6045 } else {
6046 Result =
6047 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
6048 }
6049
6050 if (GRLen == 64)
6051 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
6052 return Result;
6053}
6054
6056 const MachineFunction &MF, EVT VT) const {
6057 VT = VT.getScalarType();
6058
6059 if (!VT.isSimple())
6060 return false;
6061
6062 switch (VT.getSimpleVT().SimpleTy) {
6063 case MVT::f32:
6064 case MVT::f64:
6065 return true;
6066 default:
6067 break;
6068 }
6069
6070 return false;
6071}
6072
6074 const Constant *PersonalityFn) const {
6075 return LoongArch::R4;
6076}
6077
6079 const Constant *PersonalityFn) const {
6080 return LoongArch::R5;
6081}
6082
6083//===----------------------------------------------------------------------===//
6084// Target Optimization Hooks
6085//===----------------------------------------------------------------------===//
6086
6088 const LoongArchSubtarget &Subtarget) {
6089 // Feature FRECIPE instrucions relative accuracy is 2^-14.
6090 // IEEE float has 23 digits and double has 52 digits.
6091 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
6092 return RefinementSteps;
6093}
6094
6096 SelectionDAG &DAG, int Enabled,
6097 int &RefinementSteps,
6098 bool &UseOneConstNR,
6099 bool Reciprocal) const {
6100 if (Subtarget.hasFrecipe()) {
6101 SDLoc DL(Operand);
6102 EVT VT = Operand.getValueType();
6103
6104 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6105 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6106 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6107 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6108 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6109
6110 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6111 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6112
6113 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
6114 if (Reciprocal)
6115 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
6116
6117 return Estimate;
6118 }
6119 }
6120
6121 return SDValue();
6122}
6123
6125 SelectionDAG &DAG,
6126 int Enabled,
6127 int &RefinementSteps) const {
6128 if (Subtarget.hasFrecipe()) {
6129 SDLoc DL(Operand);
6130 EVT VT = Operand.getValueType();
6131
6132 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
6133 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
6134 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
6135 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
6136 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
6137
6138 if (RefinementSteps == ReciprocalEstimate::Unspecified)
6139 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
6140
6141 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
6142 }
6143 }
6144
6145 return SDValue();
6146}
6147
6148//===----------------------------------------------------------------------===//
6149// LoongArch Inline Assembly Support
6150//===----------------------------------------------------------------------===//
6151
6153LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
6154 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
6155 //
6156 // 'f': A floating-point register (if available).
6157 // 'k': A memory operand whose address is formed by a base register and
6158 // (optionally scaled) index register.
6159 // 'l': A signed 16-bit constant.
6160 // 'm': A memory operand whose address is formed by a base register and
6161 // offset that is suitable for use in instructions with the same
6162 // addressing mode as st.w and ld.w.
6163 // 'I': A signed 12-bit constant (for arithmetic instructions).
6164 // 'J': Integer zero.
6165 // 'K': An unsigned 12-bit constant (for logic instructions).
6166 // "ZB": An address that is held in a general-purpose register. The offset is
6167 // zero.
6168 // "ZC": A memory operand whose address is formed by a base register and
6169 // offset that is suitable for use in instructions with the same
6170 // addressing mode as ll.w and sc.w.
6171 if (Constraint.size() == 1) {
6172 switch (Constraint[0]) {
6173 default:
6174 break;
6175 case 'f':
6176 return C_RegisterClass;
6177 case 'l':
6178 case 'I':
6179 case 'J':
6180 case 'K':
6181 return C_Immediate;
6182 case 'k':
6183 return C_Memory;
6184 }
6185 }
6186
6187 if (Constraint == "ZC" || Constraint == "ZB")
6188 return C_Memory;
6189
6190 // 'm' is handled here.
6191 return TargetLowering::getConstraintType(Constraint);
6192}
6193
6194InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
6195 StringRef ConstraintCode) const {
6196 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
6201}
6202
6203std::pair<unsigned, const TargetRegisterClass *>
6204LoongArchTargetLowering::getRegForInlineAsmConstraint(
6205 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
6206 // First, see if this is a constraint that directly corresponds to a LoongArch
6207 // register class.
6208 if (Constraint.size() == 1) {
6209 switch (Constraint[0]) {
6210 case 'r':
6211 // TODO: Support fixed vectors up to GRLen?
6212 if (VT.isVector())
6213 break;
6214 return std::make_pair(0U, &LoongArch::GPRRegClass);
6215 case 'f':
6216 if (Subtarget.hasBasicF() && VT == MVT::f32)
6217 return std::make_pair(0U, &LoongArch::FPR32RegClass);
6218 if (Subtarget.hasBasicD() && VT == MVT::f64)
6219 return std::make_pair(0U, &LoongArch::FPR64RegClass);
6220 if (Subtarget.hasExtLSX() &&
6221 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
6222 return std::make_pair(0U, &LoongArch::LSX128RegClass);
6223 if (Subtarget.hasExtLASX() &&
6224 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
6225 return std::make_pair(0U, &LoongArch::LASX256RegClass);
6226 break;
6227 default:
6228 break;
6229 }
6230 }
6231
6232 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
6233 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
6234 // constraints while the official register name is prefixed with a '$'. So we
6235 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
6236 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
6237 // case insensitive, so no need to convert the constraint to upper case here.
6238 //
6239 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
6240 // decode the usage of register name aliases into their official names. And
6241 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
6242 // official register names.
6243 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
6244 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
6245 bool IsFP = Constraint[2] == 'f';
6246 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
6247 std::pair<unsigned, const TargetRegisterClass *> R;
6249 TRI, join_items("", Temp.first, Temp.second), VT);
6250 // Match those names to the widest floating point register type available.
6251 if (IsFP) {
6252 unsigned RegNo = R.first;
6253 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
6254 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
6255 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
6256 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
6257 }
6258 }
6259 }
6260 return R;
6261 }
6262
6263 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
6264}
6265
6266void LoongArchTargetLowering::LowerAsmOperandForConstraint(
6267 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
6268 SelectionDAG &DAG) const {
6269 // Currently only support length 1 constraints.
6270 if (Constraint.size() == 1) {
6271 switch (Constraint[0]) {
6272 case 'l':
6273 // Validate & create a 16-bit signed immediate operand.
6274 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6275 uint64_t CVal = C->getSExtValue();
6276 if (isInt<16>(CVal))
6277 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6278 Subtarget.getGRLenVT()));
6279 }
6280 return;
6281 case 'I':
6282 // Validate & create a 12-bit signed immediate operand.
6283 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6284 uint64_t CVal = C->getSExtValue();
6285 if (isInt<12>(CVal))
6286 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
6287 Subtarget.getGRLenVT()));
6288 }
6289 return;
6290 case 'J':
6291 // Validate & create an integer zero operand.
6292 if (auto *C = dyn_cast<ConstantSDNode>(Op))
6293 if (C->getZExtValue() == 0)
6294 Ops.push_back(
6295 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
6296 return;
6297 case 'K':
6298 // Validate & create a 12-bit unsigned immediate operand.
6299 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6300 uint64_t CVal = C->getZExtValue();
6301 if (isUInt<12>(CVal))
6302 Ops.push_back(
6303 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
6304 }
6305 return;
6306 default:
6307 break;
6308 }
6309 }
6310 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
6311}
6312
6313#define GET_REGISTER_MATCHER
6314#include "LoongArchGenAsmMatcher.inc"
6315
6318 const MachineFunction &MF) const {
6319 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6320 std::string NewRegName = Name.second.str();
6321 Register Reg = MatchRegisterAltName(NewRegName);
6322 if (Reg == LoongArch::NoRegister)
6323 Reg = MatchRegisterName(NewRegName);
6324 if (Reg == LoongArch::NoRegister)
6326 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6327 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6328 if (!ReservedRegs.test(Reg))
6329 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6330 StringRef(RegName) + "\"."));
6331 return Reg;
6332}
6333
6335 EVT VT, SDValue C) const {
6336 // TODO: Support vectors.
6337 if (!VT.isScalarInteger())
6338 return false;
6339
6340 // Omit the optimization if the data size exceeds GRLen.
6341 if (VT.getSizeInBits() > Subtarget.getGRLen())
6342 return false;
6343
6344 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6345 const APInt &Imm = ConstNode->getAPIntValue();
6346 // Break MUL into (SLLI + ADD/SUB) or ALSL.
6347 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6348 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6349 return true;
6350 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6351 if (ConstNode->hasOneUse() &&
6352 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6353 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6354 return true;
6355 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6356 // in which the immediate has two set bits. Or Break (MUL x, imm)
6357 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6358 // equals to (1 << s0) - (1 << s1).
6359 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6360 unsigned Shifts = Imm.countr_zero();
6361 // Reject immediates which can be composed via a single LUI.
6362 if (Shifts >= 12)
6363 return false;
6364 // Reject multiplications can be optimized to
6365 // (SLLI (ALSL x, x, 1/2/3/4), s).
6366 APInt ImmPop = Imm.ashr(Shifts);
6367 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6368 return false;
6369 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6370 // since it needs one more instruction than other 3 cases.
6371 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6372 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6373 (ImmSmall - Imm).isPowerOf2())
6374 return true;
6375 }
6376 }
6377
6378 return false;
6379}
6380
6382 const AddrMode &AM,
6383 Type *Ty, unsigned AS,
6384 Instruction *I) const {
6385 // LoongArch has four basic addressing modes:
6386 // 1. reg
6387 // 2. reg + 12-bit signed offset
6388 // 3. reg + 14-bit signed offset left-shifted by 2
6389 // 4. reg1 + reg2
6390 // TODO: Add more checks after support vector extension.
6391
6392 // No global is ever allowed as a base.
6393 if (AM.BaseGV)
6394 return false;
6395
6396 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6397 // with `UAL` feature.
6398 if (!isInt<12>(AM.BaseOffs) &&
6399 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6400 return false;
6401
6402 switch (AM.Scale) {
6403 case 0:
6404 // "r+i" or just "i", depending on HasBaseReg.
6405 break;
6406 case 1:
6407 // "r+r+i" is not allowed.
6408 if (AM.HasBaseReg && AM.BaseOffs)
6409 return false;
6410 // Otherwise we have "r+r" or "r+i".
6411 break;
6412 case 2:
6413 // "2*r+r" or "2*r+i" is not allowed.
6414 if (AM.HasBaseReg || AM.BaseOffs)
6415 return false;
6416 // Allow "2*r" as "r+r".
6417 break;
6418 default:
6419 return false;
6420 }
6421
6422 return true;
6423}
6424
6426 return isInt<12>(Imm);
6427}
6428
6430 return isInt<12>(Imm);
6431}
6432
6434 // Zexts are free if they can be combined with a load.
6435 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
6436 // poorly with type legalization of compares preferring sext.
6437 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6438 EVT MemVT = LD->getMemoryVT();
6439 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6440 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
6441 LD->getExtensionType() == ISD::ZEXTLOAD))
6442 return true;
6443 }
6444
6445 return TargetLowering::isZExtFree(Val, VT2);
6446}
6447
6449 EVT DstVT) const {
6450 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6451}
6452
6454 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6455}
6456
6458 // TODO: Support vectors.
6459 if (Y.getValueType().isVector())
6460 return false;
6461
6462 return !isa<ConstantSDNode>(Y);
6463}
6464
6466 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
6467 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
6468}
6469
6471 Type *Ty, bool IsSigned) const {
6472 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
6473 return true;
6474
6475 return IsSigned;
6476}
6477
6479 // Return false to suppress the unnecessary extensions if the LibCall
6480 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6481 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6482 Type.getSizeInBits() < Subtarget.getGRLen()))
6483 return false;
6484 return true;
6485}
6486
6487// memcpy, and other memory intrinsics, typically tries to use wider load/store
6488// if the source/dest is aligned and the copy size is large enough. We therefore
6489// want to align such objects passed to memory intrinsics.
6491 unsigned &MinSize,
6492 Align &PrefAlign) const {
6493 if (!isa<MemIntrinsic>(CI))
6494 return false;
6495
6496 if (Subtarget.is64Bit()) {
6497 MinSize = 8;
6498 PrefAlign = Align(8);
6499 } else {
6500 MinSize = 4;
6501 PrefAlign = Align(4);
6502 }
6503
6504 return true;
6505}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
const MCPhysReg ArgFPR32s[]
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, bool IsRet, Type *OrigTy)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
This file contains some functions that are useful when dealing with strings.
Class for arbitrary precision integers.
Definition: APInt.h:78
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1257
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
Value * getPointerOperand()
Definition: Instructions.h:870
bool isFloatingPointOperation() const
Definition: Instructions.h:882
BinOp getOperation() const
Definition: Instructions.h:805
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:861
Value * getValOperand()
Definition: Instructions.h:874
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
bool test(unsigned Idx) const
Definition: BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
SmallVectorImpl< ISD::ArgFlagsTy > & getPendingArgFlags()
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
SmallVectorImpl< CCValAssign > & getPendingLocs()
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition: Constants.h:220
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:364
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:847
A debug info location.
Definition: DebugLoc.h:33
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:216
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
Argument * getArg(unsigned i) const
Definition: Function.h:886
bool isDSOLocal() const
Definition: GlobalValue.h:305
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2044
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1479
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:545
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:550
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1756
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1386
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2151
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:516
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1458
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2032
LLVMContext & getContext() const
Definition: IRBuilder.h:195
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1517
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2141
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2448
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1861
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2018
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1539
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2704
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
Class to represent integer types.
Definition: DerivedTypes.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
unsigned getMaxBytesForAlignment() const
Align getPrefFunctionAlignment() const
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y —> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:310
Class to represent pointers.
Definition: DerivedTypes.h:670
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:703
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
size_t use_size() const
Return the number of uses of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:750
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:801
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:503
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:760
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:856
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:827
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:712
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:700
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:796
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:150
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ Entry
Definition: COFF.h:844
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1197
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1193
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:491
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:257
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:744
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1226
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:841
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1299
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1304
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:492
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:964
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1490
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ WRITE_REGISTER
Definition: ISDOpcodes.h:125
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:1059
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1127
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:522
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:218
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1222
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:674
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:615
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:124
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1282
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1112
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:697
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:766
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:135
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:100
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1279
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1217
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:198
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1606
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1586
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
ABI getTargetABI(StringRef ABIName)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:285
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
@ None
Definition: CodeGenData.h:106
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition: MathExtras.h:193
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
unsigned getKillRegState(bool B)
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Register getFrameRegister(const MachineFunction &MF) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT, bool Value=true)