LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
18#include "LoongArchSubtarget.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/IntrinsicsLoongArch.h"
32#include "llvm/Support/Debug.h"
37
38using namespace llvm;
39
40#define DEBUG_TYPE "loongarch-isel-lowering"
41
42STATISTIC(NumTailCalls, "Number of tail calls");
43
44static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
45 cl::desc("Trap on integer division by zero."),
46 cl::init(false));
47
49 const LoongArchSubtarget &STI)
50 : TargetLowering(TM), Subtarget(STI) {
51
52 MVT GRLenVT = Subtarget.getGRLenVT();
53
54 // Set up the register classes.
55
56 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
57 if (Subtarget.hasBasicF())
58 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
59 if (Subtarget.hasBasicD())
60 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
61
62 static const MVT::SimpleValueType LSXVTs[] = {
63 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
64 static const MVT::SimpleValueType LASXVTs[] = {
65 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
66
67 if (Subtarget.hasExtLSX())
68 for (MVT VT : LSXVTs)
69 addRegisterClass(VT, &LoongArch::LSX128RegClass);
70
71 if (Subtarget.hasExtLASX())
72 for (MVT VT : LASXVTs)
73 addRegisterClass(VT, &LoongArch::LASX256RegClass);
74
75 // Set operations for LA32 and LA64.
76
78 MVT::i1, Promote);
79
86
89 GRLenVT, Custom);
90
92
93 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
94 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
95 setOperationAction(ISD::VASTART, MVT::Other, Custom);
96 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
97
98 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
99 setOperationAction(ISD::TRAP, MVT::Other, Legal);
100
104
105 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
106
107 // BITREV/REVB requires the 32S feature.
108 if (STI.has32S()) {
109 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
110 // we get to know which of sll and revb.2h is faster.
113
114 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
115 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
116 // and i32 could still be byte-swapped relatively cheaply.
118 } else {
126 }
127
128 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
129 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
130 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
134
137
138 // Set operations for LA64 only.
139
140 if (Subtarget.is64Bit()) {
147 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
158
162 Custom);
163 setOperationAction(ISD::LROUND, MVT::i32, Custom);
164 }
165
166 // Set operations for LA32 only.
167
168 if (!Subtarget.is64Bit()) {
174 if (Subtarget.hasBasicD())
175 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
176 }
177
178 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
179
180 static const ISD::CondCode FPCCToExpand[] = {
183
184 // Set operations for 'F' feature.
185
186 if (Subtarget.hasBasicF()) {
187 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
188 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
189 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
190 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
191 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
192
194 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
196 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
197 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
198 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
199 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
204 setOperationAction(ISD::FSIN, MVT::f32, Expand);
205 setOperationAction(ISD::FCOS, MVT::f32, Expand);
206 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
207 setOperationAction(ISD::FPOW, MVT::f32, Expand);
209 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
210 Subtarget.isSoftFPABI() ? LibCall : Custom);
211 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
212 Subtarget.isSoftFPABI() ? LibCall : Custom);
213 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
214 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
215 Subtarget.isSoftFPABI() ? LibCall : Custom);
216
217 if (Subtarget.is64Bit())
218 setOperationAction(ISD::FRINT, MVT::f32, Legal);
219
220 if (!Subtarget.hasBasicD()) {
222 if (Subtarget.is64Bit()) {
225 }
226 }
227 }
228
229 // Set operations for 'D' feature.
230
231 if (Subtarget.hasBasicD()) {
232 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
233 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
234 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
235 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
236 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
237 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
238 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
239
241 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
245 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
246 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
247 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
249 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
251 setOperationAction(ISD::FSIN, MVT::f64, Expand);
252 setOperationAction(ISD::FCOS, MVT::f64, Expand);
253 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
254 setOperationAction(ISD::FPOW, MVT::f64, Expand);
256 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
257 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
258 Subtarget.isSoftFPABI() ? LibCall : Custom);
259 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
260 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
261 Subtarget.isSoftFPABI() ? LibCall : Custom);
262
263 if (Subtarget.is64Bit())
264 setOperationAction(ISD::FRINT, MVT::f64, Legal);
265 }
266
267 // Set operations for 'LSX' feature.
268
269 if (Subtarget.hasExtLSX()) {
271 // Expand all truncating stores and extending loads.
272 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
273 setTruncStoreAction(VT, InnerVT, Expand);
276 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
277 }
278 // By default everything must be expanded. Then we will selectively turn
279 // on ones that can be effectively codegen'd.
280 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
282 }
283
284 for (MVT VT : LSXVTs) {
285 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
286 setOperationAction(ISD::BITCAST, VT, Legal);
288
292
297 }
298 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
301 Legal);
303 VT, Legal);
310 Expand);
314 }
315 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
317 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
319 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
322 }
323 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
327 setOperationAction(ISD::FSQRT, VT, Legal);
328 setOperationAction(ISD::FNEG, VT, Legal);
331 VT, Expand);
333 }
335 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
336 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
337 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
338 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
339
340 for (MVT VT :
341 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
342 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
344 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
345 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
346 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
347 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
348 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
349 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
350 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
351 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
352 }
353 }
354
355 // Set operations for 'LASX' feature.
356
357 if (Subtarget.hasExtLASX()) {
358 for (MVT VT : LASXVTs) {
359 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
360 setOperationAction(ISD::BITCAST, VT, Legal);
362
368
372 }
373 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
376 Legal);
378 VT, Legal);
385 Expand);
389 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
390 }
391 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
393 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
395 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
398 }
399 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
403 setOperationAction(ISD::FSQRT, VT, Legal);
404 setOperationAction(ISD::FNEG, VT, Legal);
407 VT, Expand);
409 }
410 }
411
412 // Set DAG combine for LA32 and LA64.
413
418
419 // Set DAG combine for 'LSX' feature.
420
421 if (Subtarget.hasExtLSX()) {
423 setTargetDAGCombine(ISD::BITCAST);
424 }
425
426 // Set DAG combine for 'LASX' feature.
427
428 if (Subtarget.hasExtLASX())
430
431 // Compute derived properties from the register classes.
432 computeRegisterProperties(Subtarget.getRegisterInfo());
433
435
438
439 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
440
442
443 // Function alignments.
445 // Set preferred alignments.
446 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
447 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
448 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
449
450 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
451 if (Subtarget.hasLAMCAS())
453
454 if (Subtarget.hasSCQ()) {
456 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
457 }
458}
459
461 const GlobalAddressSDNode *GA) const {
462 // In order to maximise the opportunity for common subexpression elimination,
463 // keep a separate ADD node for the global address offset instead of folding
464 // it in the global address node. Later peephole optimisations may choose to
465 // fold it back in when profitable.
466 return false;
467}
468
470 SelectionDAG &DAG) const {
471 switch (Op.getOpcode()) {
472 case ISD::ATOMIC_FENCE:
473 return lowerATOMIC_FENCE(Op, DAG);
475 return lowerEH_DWARF_CFA(Op, DAG);
477 return lowerGlobalAddress(Op, DAG);
479 return lowerGlobalTLSAddress(Op, DAG);
481 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
483 return lowerINTRINSIC_W_CHAIN(Op, DAG);
485 return lowerINTRINSIC_VOID(Op, DAG);
487 return lowerBlockAddress(Op, DAG);
488 case ISD::JumpTable:
489 return lowerJumpTable(Op, DAG);
490 case ISD::SHL_PARTS:
491 return lowerShiftLeftParts(Op, DAG);
492 case ISD::SRA_PARTS:
493 return lowerShiftRightParts(Op, DAG, true);
494 case ISD::SRL_PARTS:
495 return lowerShiftRightParts(Op, DAG, false);
497 return lowerConstantPool(Op, DAG);
498 case ISD::FP_TO_SINT:
499 return lowerFP_TO_SINT(Op, DAG);
500 case ISD::BITCAST:
501 return lowerBITCAST(Op, DAG);
502 case ISD::UINT_TO_FP:
503 return lowerUINT_TO_FP(Op, DAG);
504 case ISD::SINT_TO_FP:
505 return lowerSINT_TO_FP(Op, DAG);
506 case ISD::VASTART:
507 return lowerVASTART(Op, DAG);
508 case ISD::FRAMEADDR:
509 return lowerFRAMEADDR(Op, DAG);
510 case ISD::RETURNADDR:
511 return lowerRETURNADDR(Op, DAG);
513 return lowerWRITE_REGISTER(Op, DAG);
515 return lowerINSERT_VECTOR_ELT(Op, DAG);
517 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
519 return lowerBUILD_VECTOR(Op, DAG);
521 return lowerCONCAT_VECTORS(Op, DAG);
523 return lowerVECTOR_SHUFFLE(Op, DAG);
524 case ISD::BITREVERSE:
525 return lowerBITREVERSE(Op, DAG);
527 return lowerSCALAR_TO_VECTOR(Op, DAG);
528 case ISD::PREFETCH:
529 return lowerPREFETCH(Op, DAG);
530 case ISD::SELECT:
531 return lowerSELECT(Op, DAG);
532 case ISD::BRCOND:
533 return lowerBRCOND(Op, DAG);
534 case ISD::FP_TO_FP16:
535 return lowerFP_TO_FP16(Op, DAG);
536 case ISD::FP16_TO_FP:
537 return lowerFP16_TO_FP(Op, DAG);
538 case ISD::FP_TO_BF16:
539 return lowerFP_TO_BF16(Op, DAG);
540 case ISD::BF16_TO_FP:
541 return lowerBF16_TO_FP(Op, DAG);
542 case ISD::VECREDUCE_ADD:
543 return lowerVECREDUCE_ADD(Op, DAG);
544 case ISD::VECREDUCE_AND:
545 case ISD::VECREDUCE_OR:
546 case ISD::VECREDUCE_XOR:
547 case ISD::VECREDUCE_SMAX:
548 case ISD::VECREDUCE_SMIN:
549 case ISD::VECREDUCE_UMAX:
550 case ISD::VECREDUCE_UMIN:
551 return lowerVECREDUCE(Op, DAG);
552 }
553 return SDValue();
554}
555
556// Lower vecreduce_add using vhaddw instructions.
557// For Example:
558// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
559// can be lowered to:
560// VHADDW_D_W vr0, vr0, vr0
561// VHADDW_Q_D vr0, vr0, vr0
562// VPICKVE2GR_D a0, vr0, 0
563// ADDI_W a0, a0, 0
564SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
565 SelectionDAG &DAG) const {
566
567 SDLoc DL(Op);
568 MVT OpVT = Op.getSimpleValueType();
569 SDValue Val = Op.getOperand(0);
570
571 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
572 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
573
574 unsigned LegalVecSize = 128;
575 bool isLASX256Vector =
576 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
577
578 // Ensure operand type legal or enable it legal.
579 while (!isTypeLegal(Val.getSimpleValueType())) {
580 Val = DAG.WidenVector(Val, DL);
581 }
582
583 // NumEles is designed for iterations count, v4i32 for LSX
584 // and v8i32 for LASX should have the same count.
585 if (isLASX256Vector) {
586 NumEles /= 2;
587 LegalVecSize = 256;
588 }
589
590 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
591 MVT IntTy = MVT::getIntegerVT(EleBits);
592 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
593 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
594 }
595
596 if (isLASX256Vector) {
597 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
598 DAG.getConstant(2, DL, MVT::i64));
599 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
600 }
601
602 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
603 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
604}
605
606// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
607// For Example:
608// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
609// can be lowered to:
610// VBSRL_V vr1, vr0, 8
611// VMAX_W vr0, vr1, vr0
612// VBSRL_V vr1, vr0, 4
613// VMAX_W vr0, vr1, vr0
614// VPICKVE2GR_W a0, vr0, 0
615// For 256 bit vector, it is illegal and will be spilt into
616// two 128 bit vector by default then processed by this.
617SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
618 SelectionDAG &DAG) const {
619 SDLoc DL(Op);
620
621 MVT OpVT = Op.getSimpleValueType();
622 SDValue Val = Op.getOperand(0);
623
624 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
625 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
626
627 // Ensure operand type legal or enable it legal.
628 while (!isTypeLegal(Val.getSimpleValueType())) {
629 Val = DAG.WidenVector(Val, DL);
630 }
631
632 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
633 MVT VecTy = Val.getSimpleValueType();
634
635 for (int i = NumEles; i > 1; i /= 2) {
636 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, MVT::i64);
637 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
638 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
639 }
640
641 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
642 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
643}
644
645SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
646 SelectionDAG &DAG) const {
647 unsigned IsData = Op.getConstantOperandVal(4);
648
649 // We don't support non-data prefetch.
650 // Just preserve the chain.
651 if (!IsData)
652 return Op.getOperand(0);
653
654 return Op;
655}
656
657// Return true if Val is equal to (setcc LHS, RHS, CC).
658// Return false if Val is the inverse of (setcc LHS, RHS, CC).
659// Otherwise, return std::nullopt.
660static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
661 ISD::CondCode CC, SDValue Val) {
662 assert(Val->getOpcode() == ISD::SETCC);
663 SDValue LHS2 = Val.getOperand(0);
664 SDValue RHS2 = Val.getOperand(1);
665 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
666
667 if (LHS == LHS2 && RHS == RHS2) {
668 if (CC == CC2)
669 return true;
670 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
671 return false;
672 } else if (LHS == RHS2 && RHS == LHS2) {
674 if (CC == CC2)
675 return true;
676 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
677 return false;
678 }
679
680 return std::nullopt;
681}
682
684 const LoongArchSubtarget &Subtarget) {
685 SDValue CondV = N->getOperand(0);
686 SDValue TrueV = N->getOperand(1);
687 SDValue FalseV = N->getOperand(2);
688 MVT VT = N->getSimpleValueType(0);
689 SDLoc DL(N);
690
691 // (select c, -1, y) -> -c | y
692 if (isAllOnesConstant(TrueV)) {
693 SDValue Neg = DAG.getNegative(CondV, DL, VT);
694 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
695 }
696 // (select c, y, -1) -> (c-1) | y
697 if (isAllOnesConstant(FalseV)) {
698 SDValue Neg =
699 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
700 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
701 }
702
703 // (select c, 0, y) -> (c-1) & y
704 if (isNullConstant(TrueV)) {
705 SDValue Neg =
706 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
707 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
708 }
709 // (select c, y, 0) -> -c & y
710 if (isNullConstant(FalseV)) {
711 SDValue Neg = DAG.getNegative(CondV, DL, VT);
712 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
713 }
714
715 // select c, ~x, x --> xor -c, x
716 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
717 const APInt &TrueVal = TrueV->getAsAPIntVal();
718 const APInt &FalseVal = FalseV->getAsAPIntVal();
719 if (~TrueVal == FalseVal) {
720 SDValue Neg = DAG.getNegative(CondV, DL, VT);
721 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
722 }
723 }
724
725 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
726 // when both truev and falsev are also setcc.
727 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
728 FalseV.getOpcode() == ISD::SETCC) {
729 SDValue LHS = CondV.getOperand(0);
730 SDValue RHS = CondV.getOperand(1);
731 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
732
733 // (select x, x, y) -> x | y
734 // (select !x, x, y) -> x & y
735 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
736 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
737 DAG.getFreeze(FalseV));
738 }
739 // (select x, y, x) -> x & y
740 // (select !x, y, x) -> x | y
741 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
742 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
743 DAG.getFreeze(TrueV), FalseV);
744 }
745 }
746
747 return SDValue();
748}
749
750// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
751// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
752// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
753// being `0` or `-1`. In such cases we can replace `select` with `and`.
754// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
755// than `c0`?
756static SDValue
758 const LoongArchSubtarget &Subtarget) {
759 unsigned SelOpNo = 0;
760 SDValue Sel = BO->getOperand(0);
761 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
762 SelOpNo = 1;
763 Sel = BO->getOperand(1);
764 }
765
766 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
767 return SDValue();
768
769 unsigned ConstSelOpNo = 1;
770 unsigned OtherSelOpNo = 2;
771 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
772 ConstSelOpNo = 2;
773 OtherSelOpNo = 1;
774 }
775 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
776 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
777 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
778 return SDValue();
779
780 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
781 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
782 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
783 return SDValue();
784
785 SDLoc DL(Sel);
786 EVT VT = BO->getValueType(0);
787
788 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
789 if (SelOpNo == 1)
790 std::swap(NewConstOps[0], NewConstOps[1]);
791
792 SDValue NewConstOp =
793 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
794 if (!NewConstOp)
795 return SDValue();
796
797 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
798 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
799 return SDValue();
800
801 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
802 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
803 if (SelOpNo == 1)
804 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
805 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
806
807 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
808 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
809 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
810}
811
812// Changes the condition code and swaps operands if necessary, so the SetCC
813// operation matches one of the comparisons supported directly by branches
814// in the LoongArch ISA. May adjust compares to favor compare with 0 over
815// compare with 1/-1.
817 ISD::CondCode &CC, SelectionDAG &DAG) {
818 // If this is a single bit test that can't be handled by ANDI, shift the
819 // bit to be tested to the MSB and perform a signed compare with 0.
820 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
821 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
822 isa<ConstantSDNode>(LHS.getOperand(1))) {
823 uint64_t Mask = LHS.getConstantOperandVal(1);
824 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
825 unsigned ShAmt = 0;
826 if (isPowerOf2_64(Mask)) {
827 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
828 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
829 } else {
830 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
831 }
832
833 LHS = LHS.getOperand(0);
834 if (ShAmt != 0)
835 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
836 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
837 return;
838 }
839 }
840
841 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
842 int64_t C = RHSC->getSExtValue();
843 switch (CC) {
844 default:
845 break;
846 case ISD::SETGT:
847 // Convert X > -1 to X >= 0.
848 if (C == -1) {
849 RHS = DAG.getConstant(0, DL, RHS.getValueType());
850 CC = ISD::SETGE;
851 return;
852 }
853 break;
854 case ISD::SETLT:
855 // Convert X < 1 to 0 >= X.
856 if (C == 1) {
857 RHS = LHS;
858 LHS = DAG.getConstant(0, DL, RHS.getValueType());
859 CC = ISD::SETGE;
860 return;
861 }
862 break;
863 }
864 }
865
866 switch (CC) {
867 default:
868 break;
869 case ISD::SETGT:
870 case ISD::SETLE:
871 case ISD::SETUGT:
872 case ISD::SETULE:
874 std::swap(LHS, RHS);
875 break;
876 }
877}
878
879SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
880 SelectionDAG &DAG) const {
881 SDValue CondV = Op.getOperand(0);
882 SDValue TrueV = Op.getOperand(1);
883 SDValue FalseV = Op.getOperand(2);
884 SDLoc DL(Op);
885 MVT VT = Op.getSimpleValueType();
886 MVT GRLenVT = Subtarget.getGRLenVT();
887
888 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
889 return V;
890
891 if (Op.hasOneUse()) {
892 unsigned UseOpc = Op->user_begin()->getOpcode();
893 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
894 SDNode *BinOp = *Op->user_begin();
895 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
896 DAG, Subtarget)) {
897 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
898 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
899 // may return a constant node and cause crash in lowerSELECT.
900 if (NewSel.getOpcode() == ISD::SELECT)
901 return lowerSELECT(NewSel, DAG);
902 return NewSel;
903 }
904 }
905 }
906
907 // If the condition is not an integer SETCC which operates on GRLenVT, we need
908 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
909 // (select condv, truev, falsev)
910 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
911 if (CondV.getOpcode() != ISD::SETCC ||
912 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
913 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
914 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
915
916 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
917
918 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
919 }
920
921 // If the CondV is the output of a SETCC node which operates on GRLenVT
922 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
923 // to take advantage of the integer compare+branch instructions. i.e.: (select
924 // (setcc lhs, rhs, cc), truev, falsev)
925 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
926 SDValue LHS = CondV.getOperand(0);
927 SDValue RHS = CondV.getOperand(1);
928 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
929
930 // Special case for a select of 2 constants that have a difference of 1.
931 // Normally this is done by DAGCombine, but if the select is introduced by
932 // type legalization or op legalization, we miss it. Restricting to SETLT
933 // case for now because that is what signed saturating add/sub need.
934 // FIXME: We don't need the condition to be SETLT or even a SETCC,
935 // but we would probably want to swap the true/false values if the condition
936 // is SETGE/SETLE to avoid an XORI.
937 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
938 CCVal == ISD::SETLT) {
939 const APInt &TrueVal = TrueV->getAsAPIntVal();
940 const APInt &FalseVal = FalseV->getAsAPIntVal();
941 if (TrueVal - 1 == FalseVal)
942 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
943 if (TrueVal + 1 == FalseVal)
944 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
945 }
946
947 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
948 // 1 < x ? x : 1 -> 0 < x ? x : 1
949 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
950 RHS == TrueV && LHS == FalseV) {
951 LHS = DAG.getConstant(0, DL, VT);
952 // 0 <u x is the same as x != 0.
953 if (CCVal == ISD::SETULT) {
954 std::swap(LHS, RHS);
955 CCVal = ISD::SETNE;
956 }
957 }
958
959 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
960 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
961 RHS == FalseV) {
962 RHS = DAG.getConstant(0, DL, VT);
963 }
964
965 SDValue TargetCC = DAG.getCondCode(CCVal);
966
967 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
968 // (select (setcc lhs, rhs, CC), constant, falsev)
969 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
970 std::swap(TrueV, FalseV);
971 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
972 }
973
974 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
975 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
976}
977
978SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
979 SelectionDAG &DAG) const {
980 SDValue CondV = Op.getOperand(1);
981 SDLoc DL(Op);
982 MVT GRLenVT = Subtarget.getGRLenVT();
983
984 if (CondV.getOpcode() == ISD::SETCC) {
985 if (CondV.getOperand(0).getValueType() == GRLenVT) {
986 SDValue LHS = CondV.getOperand(0);
987 SDValue RHS = CondV.getOperand(1);
988 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
989
990 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
991
992 SDValue TargetCC = DAG.getCondCode(CCVal);
993 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
994 Op.getOperand(0), LHS, RHS, TargetCC,
995 Op.getOperand(2));
996 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
997 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
998 Op.getOperand(0), CondV, Op.getOperand(2));
999 }
1000 }
1001
1002 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1003 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1004 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1005}
1006
1007SDValue
1008LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1009 SelectionDAG &DAG) const {
1010 SDLoc DL(Op);
1011 MVT OpVT = Op.getSimpleValueType();
1012
1013 SDValue Vector = DAG.getUNDEF(OpVT);
1014 SDValue Val = Op.getOperand(0);
1015 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1016
1017 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1018}
1019
1020SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1021 SelectionDAG &DAG) const {
1022 EVT ResTy = Op->getValueType(0);
1023 SDValue Src = Op->getOperand(0);
1024 SDLoc DL(Op);
1025
1026 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1027 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1028 unsigned int NewEltNum = NewVT.getVectorNumElements();
1029
1030 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1031
1033 for (unsigned int i = 0; i < NewEltNum; i++) {
1034 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1035 DAG.getConstant(i, DL, MVT::i64));
1036 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1037 ? (unsigned)LoongArchISD::BITREV_8B
1038 : (unsigned)ISD::BITREVERSE;
1039 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1040 }
1041 SDValue Res =
1042 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1043
1044 switch (ResTy.getSimpleVT().SimpleTy) {
1045 default:
1046 return SDValue();
1047 case MVT::v16i8:
1048 case MVT::v32i8:
1049 return Res;
1050 case MVT::v8i16:
1051 case MVT::v16i16:
1052 case MVT::v4i32:
1053 case MVT::v8i32: {
1055 for (unsigned int i = 0; i < NewEltNum; i++)
1056 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1057 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1058 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1059 }
1060 }
1061}
1062
1063// Widen element type to get a new mask value (if possible).
1064// For example:
1065// shufflevector <4 x i32> %a, <4 x i32> %b,
1066// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1067// is equivalent to:
1068// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1069// can be lowered to:
1070// VPACKOD_D vr0, vr0, vr1
1072 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1073 unsigned EltBits = VT.getScalarSizeInBits();
1074
1075 if (EltBits > 32 || EltBits == 1)
1076 return SDValue();
1077
1078 SmallVector<int, 8> NewMask;
1079 if (widenShuffleMaskElts(Mask, NewMask)) {
1080 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1081 : MVT::getIntegerVT(EltBits * 2);
1082 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1083 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1084 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1085 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1086 return DAG.getBitcast(
1087 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1088 }
1089 }
1090
1091 return SDValue();
1092}
1093
1094/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1095/// instruction.
1096// The funciton matches elements from one of the input vector shuffled to the
1097// left or right with zeroable elements 'shifted in'. It handles both the
1098// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1099// lane.
1100// Mostly copied from X86.
1101static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1102 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1103 int MaskOffset, const APInt &Zeroable) {
1104 int Size = Mask.size();
1105 unsigned SizeInBits = Size * ScalarSizeInBits;
1106
1107 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1108 for (int i = 0; i < Size; i += Scale)
1109 for (int j = 0; j < Shift; ++j)
1110 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1111 return false;
1112
1113 return true;
1114 };
1115
1116 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1117 int Step = 1) {
1118 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1119 if (!(Mask[i] == -1 || Mask[i] == Low))
1120 return false;
1121 return true;
1122 };
1123
1124 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1125 for (int i = 0; i != Size; i += Scale) {
1126 unsigned Pos = Left ? i + Shift : i;
1127 unsigned Low = Left ? i : i + Shift;
1128 unsigned Len = Scale - Shift;
1129 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1130 return -1;
1131 }
1132
1133 int ShiftEltBits = ScalarSizeInBits * Scale;
1134 bool ByteShift = ShiftEltBits > 64;
1135 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1136 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1137 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1138
1139 // Normalize the scale for byte shifts to still produce an i64 element
1140 // type.
1141 Scale = ByteShift ? Scale / 2 : Scale;
1142
1143 // We need to round trip through the appropriate type for the shift.
1144 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1145 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1146 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1147 return (int)ShiftAmt;
1148 };
1149
1150 unsigned MaxWidth = 128;
1151 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1152 for (int Shift = 1; Shift != Scale; ++Shift)
1153 for (bool Left : {true, false})
1154 if (CheckZeros(Shift, Scale, Left)) {
1155 int ShiftAmt = MatchShift(Shift, Scale, Left);
1156 if (0 < ShiftAmt)
1157 return ShiftAmt;
1158 }
1159
1160 // no match
1161 return -1;
1162}
1163
1164/// Lower VECTOR_SHUFFLE as shift (if possible).
1165///
1166/// For example:
1167/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1168/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1169/// is lowered to:
1170/// (VBSLL_V $v0, $v0, 4)
1171///
1172/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1173/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1174/// is lowered to:
1175/// (VSLLI_D $v0, $v0, 32)
1177 MVT VT, SDValue V1, SDValue V2,
1178 SelectionDAG &DAG,
1179 const LoongArchSubtarget &Subtarget,
1180 const APInt &Zeroable) {
1181 int Size = Mask.size();
1182 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1183
1184 MVT ShiftVT;
1185 SDValue V = V1;
1186 unsigned Opcode;
1187
1188 // Try to match shuffle against V1 shift.
1189 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1190 Mask, 0, Zeroable);
1191
1192 // If V1 failed, try to match shuffle against V2 shift.
1193 if (ShiftAmt < 0) {
1194 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1195 Mask, Size, Zeroable);
1196 V = V2;
1197 }
1198
1199 if (ShiftAmt < 0)
1200 return SDValue();
1201
1202 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1203 "Illegal integer vector type");
1204 V = DAG.getBitcast(ShiftVT, V);
1205 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1206 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1207 return DAG.getBitcast(VT, V);
1208}
1209
1210/// Determine whether a range fits a regular pattern of values.
1211/// This function accounts for the possibility of jumping over the End iterator.
1212template <typename ValType>
1213static bool
1215 unsigned CheckStride,
1217 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1218 auto &I = Begin;
1219
1220 while (I != End) {
1221 if (*I != -1 && *I != ExpectedIndex)
1222 return false;
1223 ExpectedIndex += ExpectedIndexStride;
1224
1225 // Incrementing past End is undefined behaviour so we must increment one
1226 // step at a time and check for End at each step.
1227 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1228 ; // Empty loop body.
1229 }
1230 return true;
1231}
1232
1233/// Compute whether each element of a shuffle is zeroable.
1234///
1235/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1237 SDValue V2, APInt &KnownUndef,
1238 APInt &KnownZero) {
1239 int Size = Mask.size();
1240 KnownUndef = KnownZero = APInt::getZero(Size);
1241
1242 V1 = peekThroughBitcasts(V1);
1243 V2 = peekThroughBitcasts(V2);
1244
1245 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1246 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1247
1248 int VectorSizeInBits = V1.getValueSizeInBits();
1249 int ScalarSizeInBits = VectorSizeInBits / Size;
1250 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1251 (void)ScalarSizeInBits;
1252
1253 for (int i = 0; i < Size; ++i) {
1254 int M = Mask[i];
1255 if (M < 0) {
1256 KnownUndef.setBit(i);
1257 continue;
1258 }
1259 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1260 KnownZero.setBit(i);
1261 continue;
1262 }
1263 }
1264}
1265
1266/// Test whether a shuffle mask is equivalent within each sub-lane.
1267///
1268/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1269/// non-trivial to compute in the face of undef lanes. The representation is
1270/// suitable for use with existing 128-bit shuffles as entries from the second
1271/// vector have been remapped to [LaneSize, 2*LaneSize).
1272static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1273 ArrayRef<int> Mask,
1274 SmallVectorImpl<int> &RepeatedMask) {
1275 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1276 RepeatedMask.assign(LaneSize, -1);
1277 int Size = Mask.size();
1278 for (int i = 0; i < Size; ++i) {
1279 assert(Mask[i] == -1 || Mask[i] >= 0);
1280 if (Mask[i] < 0)
1281 continue;
1282 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1283 // This entry crosses lanes, so there is no way to model this shuffle.
1284 return false;
1285
1286 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1287 // Adjust second vector indices to start at LaneSize instead of Size.
1288 int LocalM =
1289 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1290 if (RepeatedMask[i % LaneSize] < 0)
1291 // This is the first non-undef entry in this slot of a 128-bit lane.
1292 RepeatedMask[i % LaneSize] = LocalM;
1293 else if (RepeatedMask[i % LaneSize] != LocalM)
1294 // Found a mismatch with the repeated mask.
1295 return false;
1296 }
1297 return true;
1298}
1299
1300/// Attempts to match vector shuffle as byte rotation.
1302 ArrayRef<int> Mask) {
1303
1304 SDValue Lo, Hi;
1305 SmallVector<int, 16> RepeatedMask;
1306
1307 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1308 return -1;
1309
1310 int NumElts = RepeatedMask.size();
1311 int Rotation = 0;
1312 int Scale = 16 / NumElts;
1313
1314 for (int i = 0; i < NumElts; ++i) {
1315 int M = RepeatedMask[i];
1316 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1317 "Unexpected mask index.");
1318 if (M < 0)
1319 continue;
1320
1321 // Determine where a rotated vector would have started.
1322 int StartIdx = i - (M % NumElts);
1323 if (StartIdx == 0)
1324 return -1;
1325
1326 // If we found the tail of a vector the rotation must be the missing
1327 // front. If we found the head of a vector, it must be how much of the
1328 // head.
1329 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1330
1331 if (Rotation == 0)
1332 Rotation = CandidateRotation;
1333 else if (Rotation != CandidateRotation)
1334 return -1;
1335
1336 // Compute which value this mask is pointing at.
1337 SDValue MaskV = M < NumElts ? V1 : V2;
1338
1339 // Compute which of the two target values this index should be assigned
1340 // to. This reflects whether the high elements are remaining or the low
1341 // elements are remaining.
1342 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1343
1344 // Either set up this value if we've not encountered it before, or check
1345 // that it remains consistent.
1346 if (!TargetV)
1347 TargetV = MaskV;
1348 else if (TargetV != MaskV)
1349 return -1;
1350 }
1351
1352 // Check that we successfully analyzed the mask, and normalize the results.
1353 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1354 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1355 if (!Lo)
1356 Lo = Hi;
1357 else if (!Hi)
1358 Hi = Lo;
1359
1360 V1 = Lo;
1361 V2 = Hi;
1362
1363 return Rotation * Scale;
1364}
1365
1366/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1367///
1368/// For example:
1369/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1370/// <2 x i32> <i32 3, i32 0>
1371/// is lowered to:
1372/// (VBSRL_V $v1, $v1, 8)
1373/// (VBSLL_V $v0, $v0, 8)
1374/// (VOR_V $v0, $V0, $v1)
1375static SDValue
1377 SDValue V1, SDValue V2, SelectionDAG &DAG,
1378 const LoongArchSubtarget &Subtarget) {
1379
1380 SDValue Lo = V1, Hi = V2;
1381 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1382 if (ByteRotation <= 0)
1383 return SDValue();
1384
1385 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1386 Lo = DAG.getBitcast(ByteVT, Lo);
1387 Hi = DAG.getBitcast(ByteVT, Hi);
1388
1389 int LoByteShift = 16 - ByteRotation;
1390 int HiByteShift = ByteRotation;
1391 MVT GRLenVT = Subtarget.getGRLenVT();
1392
1393 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1394 DAG.getConstant(LoByteShift, DL, GRLenVT));
1395 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1396 DAG.getConstant(HiByteShift, DL, GRLenVT));
1397 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1398}
1399
1400/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1401///
1402/// For example:
1403/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1404/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1405/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1406/// is lowered to:
1407/// (VREPLI $v1, 0)
1408/// (VILVL $v0, $v1, $v0)
1410 ArrayRef<int> Mask, MVT VT,
1411 SDValue V1, SDValue V2,
1412 SelectionDAG &DAG,
1413 const APInt &Zeroable) {
1414 int Bits = VT.getSizeInBits();
1415 int EltBits = VT.getScalarSizeInBits();
1416 int NumElements = VT.getVectorNumElements();
1417
1418 if (Zeroable.isAllOnes())
1419 return DAG.getConstant(0, DL, VT);
1420
1421 // Define a helper function to check a particular ext-scale and lower to it if
1422 // valid.
1423 auto Lower = [&](int Scale) -> SDValue {
1424 SDValue InputV;
1425 bool AnyExt = true;
1426 int Offset = 0;
1427 for (int i = 0; i < NumElements; i++) {
1428 int M = Mask[i];
1429 if (M < 0)
1430 continue;
1431 if (i % Scale != 0) {
1432 // Each of the extended elements need to be zeroable.
1433 if (!Zeroable[i])
1434 return SDValue();
1435
1436 AnyExt = false;
1437 continue;
1438 }
1439
1440 // Each of the base elements needs to be consecutive indices into the
1441 // same input vector.
1442 SDValue V = M < NumElements ? V1 : V2;
1443 M = M % NumElements;
1444 if (!InputV) {
1445 InputV = V;
1446 Offset = M - (i / Scale);
1447
1448 // These offset can't be handled
1449 if (Offset % (NumElements / Scale))
1450 return SDValue();
1451 } else if (InputV != V)
1452 return SDValue();
1453
1454 if (M != (Offset + (i / Scale)))
1455 return SDValue(); // Non-consecutive strided elements.
1456 }
1457
1458 // If we fail to find an input, we have a zero-shuffle which should always
1459 // have already been handled.
1460 if (!InputV)
1461 return SDValue();
1462
1463 do {
1464 unsigned VilVLoHi = LoongArchISD::VILVL;
1465 if (Offset >= (NumElements / 2)) {
1466 VilVLoHi = LoongArchISD::VILVH;
1467 Offset -= (NumElements / 2);
1468 }
1469
1470 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1471 SDValue Ext =
1472 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1473 InputV = DAG.getBitcast(InputVT, InputV);
1474 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1475 Scale /= 2;
1476 EltBits *= 2;
1477 NumElements /= 2;
1478 } while (Scale > 1);
1479 return DAG.getBitcast(VT, InputV);
1480 };
1481
1482 // Each iteration, try extending the elements half as much, but into twice as
1483 // many elements.
1484 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1485 NumExtElements *= 2) {
1486 if (SDValue V = Lower(NumElements / NumExtElements))
1487 return V;
1488 }
1489 return SDValue();
1490}
1491
1492/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1493///
1494/// VREPLVEI performs vector broadcast based on an element specified by an
1495/// integer immediate, with its mask being similar to:
1496/// <x, x, x, ...>
1497/// where x is any valid index.
1498///
1499/// When undef's appear in the mask they are treated as if they were whatever
1500/// value is necessary in order to fit the above form.
1501static SDValue
1503 SDValue V1, SDValue V2, SelectionDAG &DAG,
1504 const LoongArchSubtarget &Subtarget) {
1505 int SplatIndex = -1;
1506 for (const auto &M : Mask) {
1507 if (M != -1) {
1508 SplatIndex = M;
1509 break;
1510 }
1511 }
1512
1513 if (SplatIndex == -1)
1514 return DAG.getUNDEF(VT);
1515
1516 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1517 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1518 APInt Imm(64, SplatIndex);
1519 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1520 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1521 }
1522
1523 return SDValue();
1524}
1525
1526/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1527///
1528/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1529/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1530///
1531/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1532/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1533/// When undef's appear they are treated as if they were whatever value is
1534/// necessary in order to fit the above forms.
1535///
1536/// For example:
1537/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1538/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1539/// i32 7, i32 6, i32 5, i32 4>
1540/// is lowered to:
1541/// (VSHUF4I_H $v0, $v1, 27)
1542/// where the 27 comes from:
1543/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1544static SDValue
1546 SDValue V1, SDValue V2, SelectionDAG &DAG,
1547 const LoongArchSubtarget &Subtarget) {
1548
1549 unsigned SubVecSize = 4;
1550 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1551 SubVecSize = 2;
1552
1553 int SubMask[4] = {-1, -1, -1, -1};
1554 for (unsigned i = 0; i < SubVecSize; ++i) {
1555 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1556 int M = Mask[j];
1557
1558 // Convert from vector index to 4-element subvector index
1559 // If an index refers to an element outside of the subvector then give up
1560 if (M != -1) {
1561 M -= 4 * (j / SubVecSize);
1562 if (M < 0 || M >= 4)
1563 return SDValue();
1564 }
1565
1566 // If the mask has an undef, replace it with the current index.
1567 // Note that it might still be undef if the current index is also undef
1568 if (SubMask[i] == -1)
1569 SubMask[i] = M;
1570 // Check that non-undef values are the same as in the mask. If they
1571 // aren't then give up
1572 else if (M != -1 && M != SubMask[i])
1573 return SDValue();
1574 }
1575 }
1576
1577 // Calculate the immediate. Replace any remaining undefs with zero
1578 APInt Imm(64, 0);
1579 for (int i = SubVecSize - 1; i >= 0; --i) {
1580 int M = SubMask[i];
1581
1582 if (M == -1)
1583 M = 0;
1584
1585 Imm <<= 2;
1586 Imm |= M & 0x3;
1587 }
1588
1589 MVT GRLenVT = Subtarget.getGRLenVT();
1590
1591 // Return vshuf4i.d
1592 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1593 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
1594 DAG.getConstant(Imm, DL, GRLenVT));
1595
1596 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1597 DAG.getConstant(Imm, DL, GRLenVT));
1598}
1599
1600/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1601///
1602/// VPACKEV interleaves the even elements from each vector.
1603///
1604/// It is possible to lower into VPACKEV when the mask consists of two of the
1605/// following forms interleaved:
1606/// <0, 2, 4, ...>
1607/// <n, n+2, n+4, ...>
1608/// where n is the number of elements in the vector.
1609/// For example:
1610/// <0, 0, 2, 2, 4, 4, ...>
1611/// <0, n, 2, n+2, 4, n+4, ...>
1612///
1613/// When undef's appear in the mask they are treated as if they were whatever
1614/// value is necessary in order to fit the above forms.
1616 MVT VT, SDValue V1, SDValue V2,
1617 SelectionDAG &DAG) {
1618
1619 const auto &Begin = Mask.begin();
1620 const auto &End = Mask.end();
1621 SDValue OriV1 = V1, OriV2 = V2;
1622
1623 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1624 V1 = OriV1;
1625 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1626 V1 = OriV2;
1627 else
1628 return SDValue();
1629
1630 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1631 V2 = OriV1;
1632 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1633 V2 = OriV2;
1634 else
1635 return SDValue();
1636
1637 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1638}
1639
1640/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1641///
1642/// VPACKOD interleaves the odd elements from each vector.
1643///
1644/// It is possible to lower into VPACKOD when the mask consists of two of the
1645/// following forms interleaved:
1646/// <1, 3, 5, ...>
1647/// <n+1, n+3, n+5, ...>
1648/// where n is the number of elements in the vector.
1649/// For example:
1650/// <1, 1, 3, 3, 5, 5, ...>
1651/// <1, n+1, 3, n+3, 5, n+5, ...>
1652///
1653/// When undef's appear in the mask they are treated as if they were whatever
1654/// value is necessary in order to fit the above forms.
1656 MVT VT, SDValue V1, SDValue V2,
1657 SelectionDAG &DAG) {
1658
1659 const auto &Begin = Mask.begin();
1660 const auto &End = Mask.end();
1661 SDValue OriV1 = V1, OriV2 = V2;
1662
1663 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1664 V1 = OriV1;
1665 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1666 V1 = OriV2;
1667 else
1668 return SDValue();
1669
1670 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1671 V2 = OriV1;
1672 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1673 V2 = OriV2;
1674 else
1675 return SDValue();
1676
1677 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1678}
1679
1680/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1681///
1682/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1683/// of each vector.
1684///
1685/// It is possible to lower into VILVH when the mask consists of two of the
1686/// following forms interleaved:
1687/// <x, x+1, x+2, ...>
1688/// <n+x, n+x+1, n+x+2, ...>
1689/// where n is the number of elements in the vector and x is half n.
1690/// For example:
1691/// <x, x, x+1, x+1, x+2, x+2, ...>
1692/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1693///
1694/// When undef's appear in the mask they are treated as if they were whatever
1695/// value is necessary in order to fit the above forms.
1697 MVT VT, SDValue V1, SDValue V2,
1698 SelectionDAG &DAG) {
1699
1700 const auto &Begin = Mask.begin();
1701 const auto &End = Mask.end();
1702 unsigned HalfSize = Mask.size() / 2;
1703 SDValue OriV1 = V1, OriV2 = V2;
1704
1705 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1706 V1 = OriV1;
1707 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1708 V1 = OriV2;
1709 else
1710 return SDValue();
1711
1712 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1713 V2 = OriV1;
1714 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1715 1))
1716 V2 = OriV2;
1717 else
1718 return SDValue();
1719
1720 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1721}
1722
1723/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1724///
1725/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1726/// of each vector.
1727///
1728/// It is possible to lower into VILVL when the mask consists of two of the
1729/// following forms interleaved:
1730/// <0, 1, 2, ...>
1731/// <n, n+1, n+2, ...>
1732/// where n is the number of elements in the vector.
1733/// For example:
1734/// <0, 0, 1, 1, 2, 2, ...>
1735/// <0, n, 1, n+1, 2, n+2, ...>
1736///
1737/// When undef's appear in the mask they are treated as if they were whatever
1738/// value is necessary in order to fit the above forms.
1740 MVT VT, SDValue V1, SDValue V2,
1741 SelectionDAG &DAG) {
1742
1743 const auto &Begin = Mask.begin();
1744 const auto &End = Mask.end();
1745 SDValue OriV1 = V1, OriV2 = V2;
1746
1747 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
1748 V1 = OriV1;
1749 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
1750 V1 = OriV2;
1751 else
1752 return SDValue();
1753
1754 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
1755 V2 = OriV1;
1756 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
1757 V2 = OriV2;
1758 else
1759 return SDValue();
1760
1761 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1762}
1763
1764/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
1765///
1766/// VPICKEV copies the even elements of each vector into the result vector.
1767///
1768/// It is possible to lower into VPICKEV when the mask consists of two of the
1769/// following forms concatenated:
1770/// <0, 2, 4, ...>
1771/// <n, n+2, n+4, ...>
1772/// where n is the number of elements in the vector.
1773/// For example:
1774/// <0, 2, 4, ..., 0, 2, 4, ...>
1775/// <0, 2, 4, ..., n, n+2, n+4, ...>
1776///
1777/// When undef's appear in the mask they are treated as if they were whatever
1778/// value is necessary in order to fit the above forms.
1780 MVT VT, SDValue V1, SDValue V2,
1781 SelectionDAG &DAG) {
1782
1783 const auto &Begin = Mask.begin();
1784 const auto &Mid = Mask.begin() + Mask.size() / 2;
1785 const auto &End = Mask.end();
1786 SDValue OriV1 = V1, OriV2 = V2;
1787
1788 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
1789 V1 = OriV1;
1790 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
1791 V1 = OriV2;
1792 else
1793 return SDValue();
1794
1795 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
1796 V2 = OriV1;
1797 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
1798 V2 = OriV2;
1799
1800 else
1801 return SDValue();
1802
1803 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1804}
1805
1806/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
1807///
1808/// VPICKOD copies the odd elements of each vector into the result vector.
1809///
1810/// It is possible to lower into VPICKOD when the mask consists of two of the
1811/// following forms concatenated:
1812/// <1, 3, 5, ...>
1813/// <n+1, n+3, n+5, ...>
1814/// where n is the number of elements in the vector.
1815/// For example:
1816/// <1, 3, 5, ..., 1, 3, 5, ...>
1817/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
1818///
1819/// When undef's appear in the mask they are treated as if they were whatever
1820/// value is necessary in order to fit the above forms.
1822 MVT VT, SDValue V1, SDValue V2,
1823 SelectionDAG &DAG) {
1824
1825 const auto &Begin = Mask.begin();
1826 const auto &Mid = Mask.begin() + Mask.size() / 2;
1827 const auto &End = Mask.end();
1828 SDValue OriV1 = V1, OriV2 = V2;
1829
1830 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
1831 V1 = OriV1;
1832 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
1833 V1 = OriV2;
1834 else
1835 return SDValue();
1836
1837 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
1838 V2 = OriV1;
1839 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
1840 V2 = OriV2;
1841 else
1842 return SDValue();
1843
1844 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1845}
1846
1847/// Lower VECTOR_SHUFFLE into VSHUF.
1848///
1849/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
1850/// adding it as an operand to the resulting VSHUF.
1852 MVT VT, SDValue V1, SDValue V2,
1853 SelectionDAG &DAG) {
1854
1856 for (auto M : Mask)
1857 Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
1858
1859 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1860 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
1861
1862 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
1863 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
1864 // VSHF concatenates the vectors in a bitwise fashion:
1865 // <0b00, 0b01> + <0b10, 0b11> ->
1866 // 0b0100 + 0b1110 -> 0b01001110
1867 // <0b10, 0b11, 0b00, 0b01>
1868 // We must therefore swap the operands to get the correct result.
1869 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1870}
1871
1872/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
1873///
1874/// This routine breaks down the specific type of 128-bit shuffle and
1875/// dispatches to the lowering routines accordingly.
1877 SDValue V1, SDValue V2, SelectionDAG &DAG,
1878 const LoongArchSubtarget &Subtarget) {
1879 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
1880 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
1881 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
1882 "Vector type is unsupported for lsx!");
1884 "Two operands have different types!");
1885 assert(VT.getVectorNumElements() == Mask.size() &&
1886 "Unexpected mask size for shuffle!");
1887 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1888
1889 APInt KnownUndef, KnownZero;
1890 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
1891 APInt Zeroable = KnownUndef | KnownZero;
1892
1893 SDValue Result;
1894 // TODO: Add more comparison patterns.
1895 if (V2.isUndef()) {
1896 if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
1897 Subtarget)))
1898 return Result;
1899 if ((Result =
1900 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1901 return Result;
1902
1903 // TODO: This comment may be enabled in the future to better match the
1904 // pattern for instruction selection.
1905 /* V2 = V1; */
1906 }
1907
1908 // It is recommended not to change the pattern comparison order for better
1909 // performance.
1910 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
1911 return Result;
1912 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
1913 return Result;
1914 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
1915 return Result;
1916 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
1917 return Result;
1918 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
1919 return Result;
1920 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
1921 return Result;
1922 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
1923 (Result =
1924 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
1925 return Result;
1926 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
1927 Zeroable)))
1928 return Result;
1929 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
1930 Zeroable)))
1931 return Result;
1932 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
1933 Subtarget)))
1934 return Result;
1935 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
1936 return NewShuffle;
1937 if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
1938 return Result;
1939 return SDValue();
1940}
1941
1942/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
1943///
1944/// It is a XVREPLVEI when the mask is:
1945/// <x, x, x, ..., x+n, x+n, x+n, ...>
1946/// where the number of x is equal to n and n is half the length of vector.
1947///
1948/// When undef's appear in the mask they are treated as if they were whatever
1949/// value is necessary in order to fit the above form.
1950static SDValue
1952 SDValue V1, SDValue V2, SelectionDAG &DAG,
1953 const LoongArchSubtarget &Subtarget) {
1954 int SplatIndex = -1;
1955 for (const auto &M : Mask) {
1956 if (M != -1) {
1957 SplatIndex = M;
1958 break;
1959 }
1960 }
1961
1962 if (SplatIndex == -1)
1963 return DAG.getUNDEF(VT);
1964
1965 const auto &Begin = Mask.begin();
1966 const auto &End = Mask.end();
1967 unsigned HalfSize = Mask.size() / 2;
1968
1969 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1970 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
1971 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
1972 0)) {
1973 APInt Imm(64, SplatIndex);
1974 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1975 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()));
1976 }
1977
1978 return SDValue();
1979}
1980
1981/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
1982static SDValue
1984 SDValue V1, SDValue V2, SelectionDAG &DAG,
1985 const LoongArchSubtarget &Subtarget) {
1986 // When the size is less than or equal to 4, lower cost instructions may be
1987 // used.
1988 if (Mask.size() <= 4)
1989 return SDValue();
1990 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
1991}
1992
1993/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
1995 MVT VT, SDValue V1, SDValue V2,
1996 SelectionDAG &DAG) {
1997 // LoongArch LASX only have XVPERM_W.
1998 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
1999 return SDValue();
2000
2001 unsigned NumElts = VT.getVectorNumElements();
2002 unsigned HalfSize = NumElts / 2;
2003 bool FrontLo = true, FrontHi = true;
2004 bool BackLo = true, BackHi = true;
2005
2006 auto inRange = [](int val, int low, int high) {
2007 return (val == -1) || (val >= low && val < high);
2008 };
2009
2010 for (unsigned i = 0; i < HalfSize; ++i) {
2011 int Fronti = Mask[i];
2012 int Backi = Mask[i + HalfSize];
2013
2014 FrontLo &= inRange(Fronti, 0, HalfSize);
2015 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2016 BackLo &= inRange(Backi, 0, HalfSize);
2017 BackHi &= inRange(Backi, HalfSize, NumElts);
2018 }
2019
2020 // If both the lower and upper 128-bit parts access only one half of the
2021 // vector (either lower or upper), avoid using xvperm.w. The latency of
2022 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2023 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2024 return SDValue();
2025
2027 for (unsigned i = 0; i < NumElts; ++i)
2028 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(MVT::i64)
2029 : DAG.getConstant(Mask[i], DL, MVT::i64));
2030 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2031
2032 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2033}
2034
2035/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2037 MVT VT, SDValue V1, SDValue V2,
2038 SelectionDAG &DAG) {
2039 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2040}
2041
2042/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2044 MVT VT, SDValue V1, SDValue V2,
2045 SelectionDAG &DAG) {
2046 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2047}
2048
2049/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2051 MVT VT, SDValue V1, SDValue V2,
2052 SelectionDAG &DAG) {
2053
2054 const auto &Begin = Mask.begin();
2055 const auto &End = Mask.end();
2056 unsigned HalfSize = Mask.size() / 2;
2057 unsigned LeftSize = HalfSize / 2;
2058 SDValue OriV1 = V1, OriV2 = V2;
2059
2060 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2061 1) &&
2062 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2063 V1 = OriV1;
2064 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2065 Mask.size() + HalfSize - LeftSize, 1) &&
2066 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2067 Mask.size() + HalfSize + LeftSize, 1))
2068 V1 = OriV2;
2069 else
2070 return SDValue();
2071
2072 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2073 1) &&
2074 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2075 1))
2076 V2 = OriV1;
2077 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2078 Mask.size() + HalfSize - LeftSize, 1) &&
2079 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2080 Mask.size() + HalfSize + LeftSize, 1))
2081 V2 = OriV2;
2082 else
2083 return SDValue();
2084
2085 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2086}
2087
2088/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2090 MVT VT, SDValue V1, SDValue V2,
2091 SelectionDAG &DAG) {
2092
2093 const auto &Begin = Mask.begin();
2094 const auto &End = Mask.end();
2095 unsigned HalfSize = Mask.size() / 2;
2096 SDValue OriV1 = V1, OriV2 = V2;
2097
2098 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2099 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2100 V1 = OriV1;
2101 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2102 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2103 Mask.size() + HalfSize, 1))
2104 V1 = OriV2;
2105 else
2106 return SDValue();
2107
2108 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2109 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2110 V2 = OriV1;
2111 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2112 1) &&
2113 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2114 Mask.size() + HalfSize, 1))
2115 V2 = OriV2;
2116 else
2117 return SDValue();
2118
2119 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2120}
2121
2122/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2124 MVT VT, SDValue V1, SDValue V2,
2125 SelectionDAG &DAG) {
2126
2127 const auto &Begin = Mask.begin();
2128 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2129 const auto &Mid = Mask.begin() + Mask.size() / 2;
2130 const auto &RightMid = Mask.end() - Mask.size() / 4;
2131 const auto &End = Mask.end();
2132 unsigned HalfSize = Mask.size() / 2;
2133 SDValue OriV1 = V1, OriV2 = V2;
2134
2135 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2136 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2137 V1 = OriV1;
2138 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2139 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2140 V1 = OriV2;
2141 else
2142 return SDValue();
2143
2144 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2145 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2146 V2 = OriV1;
2147 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2148 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2149 V2 = OriV2;
2150
2151 else
2152 return SDValue();
2153
2154 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2155}
2156
2157/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2159 MVT VT, SDValue V1, SDValue V2,
2160 SelectionDAG &DAG) {
2161
2162 const auto &Begin = Mask.begin();
2163 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2164 const auto &Mid = Mask.begin() + Mask.size() / 2;
2165 const auto &RightMid = Mask.end() - Mask.size() / 4;
2166 const auto &End = Mask.end();
2167 unsigned HalfSize = Mask.size() / 2;
2168 SDValue OriV1 = V1, OriV2 = V2;
2169
2170 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2171 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2172 V1 = OriV1;
2173 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2174 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2175 2))
2176 V1 = OriV2;
2177 else
2178 return SDValue();
2179
2180 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2181 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2182 V2 = OriV1;
2183 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2184 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2185 2))
2186 V2 = OriV2;
2187 else
2188 return SDValue();
2189
2190 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2191}
2192
2193/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2195 MVT VT, SDValue V1, SDValue V2,
2196 SelectionDAG &DAG) {
2197
2198 int MaskSize = Mask.size();
2199 int HalfSize = Mask.size() / 2;
2200 const auto &Begin = Mask.begin();
2201 const auto &Mid = Mask.begin() + HalfSize;
2202 const auto &End = Mask.end();
2203
2204 // VECTOR_SHUFFLE concatenates the vectors:
2205 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2206 // shuffling ->
2207 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2208 //
2209 // XVSHUF concatenates the vectors:
2210 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2211 // shuffling ->
2212 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2213 SmallVector<SDValue, 8> MaskAlloc;
2214 for (auto it = Begin; it < Mid; it++) {
2215 if (*it < 0) // UNDEF
2216 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2217 else if ((*it >= 0 && *it < HalfSize) ||
2218 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2219 int M = *it < HalfSize ? *it : *it - HalfSize;
2220 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2221 } else
2222 return SDValue();
2223 }
2224 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2225
2226 for (auto it = Mid; it < End; it++) {
2227 if (*it < 0) // UNDEF
2228 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2229 else if ((*it >= HalfSize && *it < MaskSize) ||
2230 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2231 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2232 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2233 } else
2234 return SDValue();
2235 }
2236 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2237
2238 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2239 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2240 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2241}
2242
2243/// Shuffle vectors by lane to generate more optimized instructions.
2244/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2245///
2246/// Therefore, except for the following four cases, other cases are regarded
2247/// as cross-lane shuffles, where optimization is relatively limited.
2248///
2249/// - Shuffle high, low lanes of two inputs vector
2250/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2251/// - Shuffle low, high lanes of two inputs vector
2252/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2253/// - Shuffle low, low lanes of two inputs vector
2254/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2255/// - Shuffle high, high lanes of two inputs vector
2256/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2257///
2258/// The first case is the closest to LoongArch instructions and the other
2259/// cases need to be converted to it for processing.
2260///
2261/// This function may modify V1, V2 and Mask
2263 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2264 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2265
2266 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2267
2268 int MaskSize = Mask.size();
2269 int HalfSize = Mask.size() / 2;
2270 MVT GRLenVT = Subtarget.getGRLenVT();
2271
2272 HalfMaskType preMask = None, postMask = None;
2273
2274 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2275 return M < 0 || (M >= 0 && M < HalfSize) ||
2276 (M >= MaskSize && M < MaskSize + HalfSize);
2277 }))
2278 preMask = HighLaneTy;
2279 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2280 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2281 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2282 }))
2283 preMask = LowLaneTy;
2284
2285 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2286 return M < 0 || (M >= 0 && M < HalfSize) ||
2287 (M >= MaskSize && M < MaskSize + HalfSize);
2288 }))
2289 postMask = HighLaneTy;
2290 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2291 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2292 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2293 }))
2294 postMask = LowLaneTy;
2295
2296 // The pre-half of mask is high lane type, and the post-half of mask
2297 // is low lane type, which is closest to the LoongArch instructions.
2298 //
2299 // Note: In the LoongArch architecture, the high lane of mask corresponds
2300 // to the lower 128-bit of vector register, and the low lane of mask
2301 // corresponds the higher 128-bit of vector register.
2302 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2303 return;
2304 }
2305 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2306 V1 = DAG.getBitcast(MVT::v4i64, V1);
2307 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2308 DAG.getConstant(0b01001110, DL, GRLenVT));
2309 V1 = DAG.getBitcast(VT, V1);
2310
2311 if (!V2.isUndef()) {
2312 V2 = DAG.getBitcast(MVT::v4i64, V2);
2313 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2314 DAG.getConstant(0b01001110, DL, GRLenVT));
2315 V2 = DAG.getBitcast(VT, V2);
2316 }
2317
2318 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2319 *it = *it < 0 ? *it : *it - HalfSize;
2320 }
2321 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2322 *it = *it < 0 ? *it : *it + HalfSize;
2323 }
2324 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2325 V1 = DAG.getBitcast(MVT::v4i64, V1);
2326 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2327 DAG.getConstant(0b11101110, DL, GRLenVT));
2328 V1 = DAG.getBitcast(VT, V1);
2329
2330 if (!V2.isUndef()) {
2331 V2 = DAG.getBitcast(MVT::v4i64, V2);
2332 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2333 DAG.getConstant(0b11101110, DL, GRLenVT));
2334 V2 = DAG.getBitcast(VT, V2);
2335 }
2336
2337 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2338 *it = *it < 0 ? *it : *it - HalfSize;
2339 }
2340 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2341 V1 = DAG.getBitcast(MVT::v4i64, V1);
2342 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2343 DAG.getConstant(0b01000100, DL, GRLenVT));
2344 V1 = DAG.getBitcast(VT, V1);
2345
2346 if (!V2.isUndef()) {
2347 V2 = DAG.getBitcast(MVT::v4i64, V2);
2348 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2349 DAG.getConstant(0b01000100, DL, GRLenVT));
2350 V2 = DAG.getBitcast(VT, V2);
2351 }
2352
2353 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2354 *it = *it < 0 ? *it : *it + HalfSize;
2355 }
2356 } else { // cross-lane
2357 return;
2358 }
2359}
2360
2361/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2362/// Only for 256-bit vector.
2363///
2364/// For example:
2365/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2366/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2367/// is lowerded to:
2368/// (XVPERMI $xr2, $xr0, 78)
2369/// (XVSHUF $xr1, $xr2, $xr0)
2370/// (XVORI $xr0, $xr1, 0)
2372 ArrayRef<int> Mask,
2373 MVT VT, SDValue V1,
2374 SDValue V2,
2375 SelectionDAG &DAG) {
2376 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2377 int Size = Mask.size();
2378 int LaneSize = Size / 2;
2379
2380 bool LaneCrossing[2] = {false, false};
2381 for (int i = 0; i < Size; ++i)
2382 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2383 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2384
2385 // Ensure that all lanes ared involved.
2386 if (!LaneCrossing[0] && !LaneCrossing[1])
2387 return SDValue();
2388
2389 SmallVector<int> InLaneMask;
2390 InLaneMask.assign(Mask.begin(), Mask.end());
2391 for (int i = 0; i < Size; ++i) {
2392 int &M = InLaneMask[i];
2393 if (M < 0)
2394 continue;
2395 if (((M % Size) / LaneSize) != (i / LaneSize))
2396 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2397 }
2398
2399 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2400 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2401 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2402 Flipped = DAG.getBitcast(VT, Flipped);
2403 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2404}
2405
2406/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2407///
2408/// This routine breaks down the specific type of 256-bit shuffle and
2409/// dispatches to the lowering routines accordingly.
2411 SDValue V1, SDValue V2, SelectionDAG &DAG,
2412 const LoongArchSubtarget &Subtarget) {
2413 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2414 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2415 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2416 "Vector type is unsupported for lasx!");
2418 "Two operands have different types!");
2419 assert(VT.getVectorNumElements() == Mask.size() &&
2420 "Unexpected mask size for shuffle!");
2421 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2422 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2423
2424 // canonicalize non cross-lane shuffle vector
2425 SmallVector<int> NewMask(Mask);
2426 canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2427
2428 APInt KnownUndef, KnownZero;
2429 computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2430 APInt Zeroable = KnownUndef | KnownZero;
2431
2432 SDValue Result;
2433 // TODO: Add more comparison patterns.
2434 if (V2.isUndef()) {
2435 if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2436 Subtarget)))
2437 return Result;
2438 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2439 Subtarget)))
2440 return Result;
2441 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2442 return Result;
2443 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2444 V1, V2, DAG)))
2445 return Result;
2446
2447 // TODO: This comment may be enabled in the future to better match the
2448 // pattern for instruction selection.
2449 /* V2 = V1; */
2450 }
2451
2452 // It is recommended not to change the pattern comparison order for better
2453 // performance.
2454 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2455 return Result;
2456 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2457 return Result;
2458 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2459 return Result;
2460 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2461 return Result;
2462 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2463 return Result;
2464 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2465 return Result;
2466 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2467 Subtarget, Zeroable)))
2468 return Result;
2469 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2470 Subtarget)))
2471 return Result;
2472 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2473 return NewShuffle;
2474 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2475 return Result;
2476
2477 return SDValue();
2478}
2479
2480SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2481 SelectionDAG &DAG) const {
2482 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2483 ArrayRef<int> OrigMask = SVOp->getMask();
2484 SDValue V1 = Op.getOperand(0);
2485 SDValue V2 = Op.getOperand(1);
2486 MVT VT = Op.getSimpleValueType();
2487 int NumElements = VT.getVectorNumElements();
2488 SDLoc DL(Op);
2489
2490 bool V1IsUndef = V1.isUndef();
2491 bool V2IsUndef = V2.isUndef();
2492 if (V1IsUndef && V2IsUndef)
2493 return DAG.getUNDEF(VT);
2494
2495 // When we create a shuffle node we put the UNDEF node to second operand,
2496 // but in some cases the first operand may be transformed to UNDEF.
2497 // In this case we should just commute the node.
2498 if (V1IsUndef)
2499 return DAG.getCommutedVectorShuffle(*SVOp);
2500
2501 // Check for non-undef masks pointing at an undef vector and make the masks
2502 // undef as well. This makes it easier to match the shuffle based solely on
2503 // the mask.
2504 if (V2IsUndef &&
2505 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2506 SmallVector<int, 8> NewMask(OrigMask);
2507 for (int &M : NewMask)
2508 if (M >= NumElements)
2509 M = -1;
2510 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2511 }
2512
2513 // Check for illegal shuffle mask element index values.
2514 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2515 (void)MaskUpperLimit;
2516 assert(llvm::all_of(OrigMask,
2517 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2518 "Out of bounds shuffle index");
2519
2520 // For each vector width, delegate to a specialized lowering routine.
2521 if (VT.is128BitVector())
2522 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2523
2524 if (VT.is256BitVector())
2525 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2526
2527 return SDValue();
2528}
2529
2530SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2531 SelectionDAG &DAG) const {
2532 // Custom lower to ensure the libcall return is passed in an FPR on hard
2533 // float ABIs.
2534 SDLoc DL(Op);
2535 MakeLibCallOptions CallOptions;
2536 SDValue Op0 = Op.getOperand(0);
2537 SDValue Chain = SDValue();
2538 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2539 SDValue Res;
2540 std::tie(Res, Chain) =
2541 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2542 if (Subtarget.is64Bit())
2543 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2544 return DAG.getBitcast(MVT::i32, Res);
2545}
2546
2547SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2548 SelectionDAG &DAG) const {
2549 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2550 // float ABIs.
2551 SDLoc DL(Op);
2552 MakeLibCallOptions CallOptions;
2553 SDValue Op0 = Op.getOperand(0);
2554 SDValue Chain = SDValue();
2555 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2556 DL, MVT::f32, Op0)
2557 : DAG.getBitcast(MVT::f32, Op0);
2558 SDValue Res;
2559 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2560 CallOptions, DL, Chain);
2561 return Res;
2562}
2563
2564SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2565 SelectionDAG &DAG) const {
2566 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2567 SDLoc DL(Op);
2568 MakeLibCallOptions CallOptions;
2569 RTLIB::Libcall LC =
2570 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2571 SDValue Res =
2572 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2573 if (Subtarget.is64Bit())
2574 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2575 return DAG.getBitcast(MVT::i32, Res);
2576}
2577
2578SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2579 SelectionDAG &DAG) const {
2580 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2581 MVT VT = Op.getSimpleValueType();
2582 SDLoc DL(Op);
2583 Op = DAG.getNode(
2584 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2585 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2586 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2587 DL, MVT::f32, Op)
2588 : DAG.getBitcast(MVT::f32, Op);
2589 if (VT != MVT::f32)
2590 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2591 return Res;
2592}
2593
2594// Lower BUILD_VECTOR as broadcast load (if possible).
2595// For example:
2596// %a = load i8, ptr %ptr
2597// %b = build_vector %a, %a, %a, %a
2598// is lowered to :
2599// (VLDREPL_B $a0, 0)
2601 const SDLoc &DL,
2602 SelectionDAG &DAG) {
2603 MVT VT = BVOp->getSimpleValueType(0);
2604 int NumOps = BVOp->getNumOperands();
2605
2606 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2607 "Unsupported vector type for broadcast.");
2608
2609 SDValue IdentitySrc;
2610 bool IsIdeneity = true;
2611
2612 for (int i = 0; i != NumOps; i++) {
2613 SDValue Op = BVOp->getOperand(i);
2614 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2615 IsIdeneity = false;
2616 break;
2617 }
2618 IdentitySrc = BVOp->getOperand(0);
2619 }
2620
2621 // make sure that this load is valid and only has one user.
2622 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2623 return SDValue();
2624
2625 auto *LN = cast<LoadSDNode>(IdentitySrc);
2626 auto ExtType = LN->getExtensionType();
2627
2628 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
2629 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
2630 SDVTList Tys =
2631 LN->isIndexed()
2632 ? DAG.getVTList(VT, LN->getBasePtr().getValueType(), MVT::Other)
2633 : DAG.getVTList(VT, MVT::Other);
2634 SDValue Ops[] = {LN->getChain(), LN->getBasePtr(), LN->getOffset()};
2635 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
2636 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
2637 return BCast;
2638 }
2639 return SDValue();
2640}
2641
2642SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
2643 SelectionDAG &DAG) const {
2644 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2645 EVT ResTy = Op->getValueType(0);
2646 unsigned NumElts = ResTy.getVectorNumElements();
2647 SDLoc DL(Op);
2648 APInt SplatValue, SplatUndef;
2649 unsigned SplatBitSize;
2650 bool HasAnyUndefs;
2651 bool IsConstant = false;
2652 bool UseSameConstant = true;
2653 SDValue ConstantValue;
2654 bool Is128Vec = ResTy.is128BitVector();
2655 bool Is256Vec = ResTy.is256BitVector();
2656
2657 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
2658 (!Subtarget.hasExtLASX() || !Is256Vec))
2659 return SDValue();
2660
2661 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
2662 return Result;
2663
2664 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
2665 /*MinSplatBits=*/8) &&
2666 SplatBitSize <= 64) {
2667 // We can only cope with 8, 16, 32, or 64-bit elements.
2668 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2669 SplatBitSize != 64)
2670 return SDValue();
2671
2672 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
2673 // We can only handle 64-bit elements that are within
2674 // the signed 32-bit range on 32-bit targets.
2675 if (!SplatValue.isSignedIntN(32))
2676 return SDValue();
2677 if ((Is128Vec && ResTy == MVT::v4i32) ||
2678 (Is256Vec && ResTy == MVT::v8i32))
2679 return Op;
2680 }
2681
2682 EVT ViaVecTy;
2683
2684 switch (SplatBitSize) {
2685 default:
2686 return SDValue();
2687 case 8:
2688 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
2689 break;
2690 case 16:
2691 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
2692 break;
2693 case 32:
2694 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
2695 break;
2696 case 64:
2697 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
2698 break;
2699 }
2700
2701 // SelectionDAG::getConstant will promote SplatValue appropriately.
2702 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2703
2704 // Bitcast to the type we originally wanted.
2705 if (ViaVecTy != ResTy)
2706 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2707
2708 return Result;
2709 }
2710
2711 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
2712 return Op;
2713
2714 for (unsigned i = 0; i < NumElts; ++i) {
2715 SDValue Opi = Node->getOperand(i);
2716 if (isIntOrFPConstant(Opi)) {
2717 IsConstant = true;
2718 if (!ConstantValue.getNode())
2719 ConstantValue = Opi;
2720 else if (ConstantValue != Opi)
2721 UseSameConstant = false;
2722 }
2723 }
2724
2725 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
2726 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
2727 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
2728 for (unsigned i = 0; i < NumElts; ++i) {
2729 SDValue Opi = Node->getOperand(i);
2730 if (!isIntOrFPConstant(Opi))
2731 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
2732 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2733 }
2734 return Result;
2735 }
2736
2737 if (!IsConstant) {
2738 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2739 // The resulting code is the same length as the expansion, but it doesn't
2740 // use memory operations.
2741 assert(ResTy.isVector());
2742
2743 SDValue Op0 = Node->getOperand(0);
2744 SDValue Vector = DAG.getUNDEF(ResTy);
2745
2746 if (!Op0.isUndef())
2747 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
2748 for (unsigned i = 1; i < NumElts; ++i) {
2749 SDValue Opi = Node->getOperand(i);
2750 if (Opi.isUndef())
2751 continue;
2752 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
2753 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2754 }
2755 return Vector;
2756 }
2757
2758 return SDValue();
2759}
2760
2761SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
2762 SelectionDAG &DAG) const {
2763 SDLoc DL(Op);
2764 MVT ResVT = Op.getSimpleValueType();
2765 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
2766
2767 unsigned NumOperands = Op.getNumOperands();
2768 unsigned NumFreezeUndef = 0;
2769 unsigned NumZero = 0;
2770 unsigned NumNonZero = 0;
2771 unsigned NonZeros = 0;
2772 SmallSet<SDValue, 4> Undefs;
2773 for (unsigned i = 0; i != NumOperands; ++i) {
2774 SDValue SubVec = Op.getOperand(i);
2775 if (SubVec.isUndef())
2776 continue;
2777 if (ISD::isFreezeUndef(SubVec.getNode())) {
2778 // If the freeze(undef) has multiple uses then we must fold to zero.
2779 if (SubVec.hasOneUse()) {
2780 ++NumFreezeUndef;
2781 } else {
2782 ++NumZero;
2783 Undefs.insert(SubVec);
2784 }
2785 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
2786 ++NumZero;
2787 else {
2788 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
2789 NonZeros |= 1 << i;
2790 ++NumNonZero;
2791 }
2792 }
2793
2794 // If we have more than 2 non-zeros, build each half separately.
2795 if (NumNonZero > 2) {
2796 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
2797 ArrayRef<SDUse> Ops = Op->ops();
2798 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2799 Ops.slice(0, NumOperands / 2));
2800 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
2801 Ops.slice(NumOperands / 2));
2802 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
2803 }
2804
2805 // Otherwise, build it up through insert_subvectors.
2806 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
2807 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
2808 : DAG.getUNDEF(ResVT));
2809
2810 // Replace Undef operands with ZeroVector.
2811 for (SDValue U : Undefs)
2812 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
2813
2814 MVT SubVT = Op.getOperand(0).getSimpleValueType();
2815 unsigned NumSubElems = SubVT.getVectorNumElements();
2816 for (unsigned i = 0; i != NumOperands; ++i) {
2817 if ((NonZeros & (1 << i)) == 0)
2818 continue;
2819
2820 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
2821 DAG.getVectorIdxConstant(i * NumSubElems, DL));
2822 }
2823
2824 return Vec;
2825}
2826
2827SDValue
2828LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2829 SelectionDAG &DAG) const {
2830 MVT EltVT = Op.getSimpleValueType();
2831 SDValue Vec = Op->getOperand(0);
2832 EVT VecTy = Vec->getValueType(0);
2833 SDValue Idx = Op->getOperand(1);
2834 SDLoc DL(Op);
2835 MVT GRLenVT = Subtarget.getGRLenVT();
2836
2837 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
2838
2839 if (isa<ConstantSDNode>(Idx))
2840 return Op;
2841
2842 switch (VecTy.getSimpleVT().SimpleTy) {
2843 default:
2844 llvm_unreachable("Unexpected type");
2845 case MVT::v32i8:
2846 case MVT::v16i16:
2847 case MVT::v4i64:
2848 case MVT::v4f64: {
2849 // Extract the high half subvector and place it to the low half of a new
2850 // vector. It doesn't matter what the high half of the new vector is.
2851 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
2852 SDValue VecHi =
2853 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
2854 SDValue TmpVec =
2855 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
2856 VecHi, DAG.getConstant(0, DL, GRLenVT));
2857
2858 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
2859 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
2860 // desired element.
2861 SDValue IdxCp =
2862 DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx);
2863 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
2864 SDValue MaskVec =
2865 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
2866 SDValue ResVec =
2867 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
2868
2869 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
2870 DAG.getConstant(0, DL, GRLenVT));
2871 }
2872 case MVT::v8i32:
2873 case MVT::v8f32: {
2874 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
2875 SDValue SplatValue =
2876 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
2877
2878 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
2879 DAG.getConstant(0, DL, GRLenVT));
2880 }
2881 }
2882}
2883
2884SDValue
2885LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2886 SelectionDAG &DAG) const {
2887 MVT VT = Op.getSimpleValueType();
2888 MVT EltVT = VT.getVectorElementType();
2889 unsigned NumElts = VT.getVectorNumElements();
2890 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
2891 SDLoc DL(Op);
2892 SDValue Op0 = Op.getOperand(0);
2893 SDValue Op1 = Op.getOperand(1);
2894 SDValue Op2 = Op.getOperand(2);
2895
2896 if (isa<ConstantSDNode>(Op2))
2897 return Op;
2898
2899 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
2900 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
2901
2902 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
2903 return SDValue();
2904
2905 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
2906 SDValue SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
2907
2908 SmallVector<SDValue, 32> RawIndices;
2909 for (unsigned i = 0; i < NumElts; ++i)
2910 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
2911 SDValue Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
2912
2913 // insert vec, elt, idx
2914 // =>
2915 // select (splatidx == {0,1,2...}) ? splatelt : vec
2916 SDValue SelectCC =
2917 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
2918 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
2919}
2920
2921SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
2922 SelectionDAG &DAG) const {
2923 SDLoc DL(Op);
2924 SyncScope::ID FenceSSID =
2925 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
2926
2927 // singlethread fences only synchronize with signal handlers on the same
2928 // thread and thus only need to preserve instruction order, not actually
2929 // enforce memory ordering.
2930 if (FenceSSID == SyncScope::SingleThread)
2931 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
2932 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
2933
2934 return Op;
2935}
2936
2937SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
2938 SelectionDAG &DAG) const {
2939
2940 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
2941 DAG.getContext()->emitError(
2942 "On LA64, only 64-bit registers can be written.");
2943 return Op.getOperand(0);
2944 }
2945
2946 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
2947 DAG.getContext()->emitError(
2948 "On LA32, only 32-bit registers can be written.");
2949 return Op.getOperand(0);
2950 }
2951
2952 return Op;
2953}
2954
2955SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
2956 SelectionDAG &DAG) const {
2957 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
2958 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
2959 "be a constant integer");
2960 return SDValue();
2961 }
2962
2963 MachineFunction &MF = DAG.getMachineFunction();
2965 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
2966 EVT VT = Op.getValueType();
2967 SDLoc DL(Op);
2968 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
2969 unsigned Depth = Op.getConstantOperandVal(0);
2970 int GRLenInBytes = Subtarget.getGRLen() / 8;
2971
2972 while (Depth--) {
2973 int Offset = -(GRLenInBytes * 2);
2974 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
2975 DAG.getSignedConstant(Offset, DL, VT));
2976 FrameAddr =
2977 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
2978 }
2979 return FrameAddr;
2980}
2981
2982SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
2983 SelectionDAG &DAG) const {
2984 // Currently only support lowering return address for current frame.
2985 if (Op.getConstantOperandVal(0) != 0) {
2986 DAG.getContext()->emitError(
2987 "return address can only be determined for the current frame");
2988 return SDValue();
2989 }
2990
2991 MachineFunction &MF = DAG.getMachineFunction();
2993 MVT GRLenVT = Subtarget.getGRLenVT();
2994
2995 // Return the value of the return address register, marking it an implicit
2996 // live-in.
2997 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
2998 getRegClassFor(GRLenVT));
2999 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3000}
3001
3002SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3003 SelectionDAG &DAG) const {
3004 MachineFunction &MF = DAG.getMachineFunction();
3005 auto Size = Subtarget.getGRLen() / 8;
3006 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3007 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3008}
3009
3010SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3011 SelectionDAG &DAG) const {
3012 MachineFunction &MF = DAG.getMachineFunction();
3013 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3014
3015 SDLoc DL(Op);
3016 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3018
3019 // vastart just stores the address of the VarArgsFrameIndex slot into the
3020 // memory location argument.
3021 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3022 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3023 MachinePointerInfo(SV));
3024}
3025
3026SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3027 SelectionDAG &DAG) const {
3028 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3029 !Subtarget.hasBasicD() && "unexpected target features");
3030
3031 SDLoc DL(Op);
3032 SDValue Op0 = Op.getOperand(0);
3033 if (Op0->getOpcode() == ISD::AND) {
3034 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3035 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3036 return Op;
3037 }
3038
3039 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3040 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3041 Op0.getConstantOperandVal(2) == UINT64_C(0))
3042 return Op;
3043
3044 if (Op0.getOpcode() == ISD::AssertZext &&
3045 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3046 return Op;
3047
3048 EVT OpVT = Op0.getValueType();
3049 EVT RetVT = Op.getValueType();
3050 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3051 MakeLibCallOptions CallOptions;
3052 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3053 SDValue Chain = SDValue();
3055 std::tie(Result, Chain) =
3056 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3057 return Result;
3058}
3059
3060SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3061 SelectionDAG &DAG) const {
3062 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3063 !Subtarget.hasBasicD() && "unexpected target features");
3064
3065 SDLoc DL(Op);
3066 SDValue Op0 = Op.getOperand(0);
3067
3068 if ((Op0.getOpcode() == ISD::AssertSext ||
3070 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3071 return Op;
3072
3073 EVT OpVT = Op0.getValueType();
3074 EVT RetVT = Op.getValueType();
3075 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3076 MakeLibCallOptions CallOptions;
3077 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3078 SDValue Chain = SDValue();
3080 std::tie(Result, Chain) =
3081 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3082 return Result;
3083}
3084
3085SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3086 SelectionDAG &DAG) const {
3087
3088 SDLoc DL(Op);
3089 EVT VT = Op.getValueType();
3090 SDValue Op0 = Op.getOperand(0);
3091 EVT Op0VT = Op0.getValueType();
3092
3093 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3094 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3095 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3096 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3097 }
3098 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3099 SDValue Lo, Hi;
3100 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3101 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3102 }
3103 return Op;
3104}
3105
3106SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3107 SelectionDAG &DAG) const {
3108
3109 SDLoc DL(Op);
3110 SDValue Op0 = Op.getOperand(0);
3111
3112 if (Op0.getValueType() == MVT::f16)
3113 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3114
3115 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3116 !Subtarget.hasBasicD()) {
3117 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3118 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3119 }
3120
3121 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3122 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3123 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3124}
3125
3127 SelectionDAG &DAG, unsigned Flags) {
3128 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3129}
3130
3132 SelectionDAG &DAG, unsigned Flags) {
3133 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3134 Flags);
3135}
3136
3138 SelectionDAG &DAG, unsigned Flags) {
3139 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3140 N->getOffset(), Flags);
3141}
3142
3144 SelectionDAG &DAG, unsigned Flags) {
3145 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3146}
3147
3148template <class NodeTy>
3149SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3151 bool IsLocal) const {
3152 SDLoc DL(N);
3153 EVT Ty = getPointerTy(DAG.getDataLayout());
3154 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3155 SDValue Load;
3156
3157 switch (M) {
3158 default:
3159 report_fatal_error("Unsupported code model");
3160
3161 case CodeModel::Large: {
3162 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3163
3164 // This is not actually used, but is necessary for successfully matching
3165 // the PseudoLA_*_LARGE nodes.
3166 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3167 if (IsLocal) {
3168 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3169 // eventually becomes the desired 5-insn code sequence.
3170 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3171 Tmp, Addr),
3172 0);
3173 } else {
3174 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3175 // eventually becomes the desired 5-insn code sequence.
3176 Load = SDValue(
3177 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3178 0);
3179 }
3180 break;
3181 }
3182
3183 case CodeModel::Small:
3184 case CodeModel::Medium:
3185 if (IsLocal) {
3186 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3187 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3188 Load = SDValue(
3189 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3190 } else {
3191 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3192 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3193 Load =
3194 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3195 }
3196 }
3197
3198 if (!IsLocal) {
3199 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3200 MachineFunction &MF = DAG.getMachineFunction();
3201 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3205 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3206 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3207 }
3208
3209 return Load;
3210}
3211
3212SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3213 SelectionDAG &DAG) const {
3214 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3215 DAG.getTarget().getCodeModel());
3216}
3217
3218SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3219 SelectionDAG &DAG) const {
3220 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3221 DAG.getTarget().getCodeModel());
3222}
3223
3224SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3225 SelectionDAG &DAG) const {
3226 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3227 DAG.getTarget().getCodeModel());
3228}
3229
3230SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3231 SelectionDAG &DAG) const {
3232 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3233 assert(N->getOffset() == 0 && "unexpected offset in global node");
3234 auto CM = DAG.getTarget().getCodeModel();
3235 const GlobalValue *GV = N->getGlobal();
3236
3237 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3238 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3239 CM = *GCM;
3240 }
3241
3242 return getAddr(N, DAG, CM, GV->isDSOLocal());
3243}
3244
3245SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3246 SelectionDAG &DAG,
3247 unsigned Opc, bool UseGOT,
3248 bool Large) const {
3249 SDLoc DL(N);
3250 EVT Ty = getPointerTy(DAG.getDataLayout());
3251 MVT GRLenVT = Subtarget.getGRLenVT();
3252
3253 // This is not actually used, but is necessary for successfully matching the
3254 // PseudoLA_*_LARGE nodes.
3255 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3256 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3257
3258 // Only IE needs an extra argument for large code model.
3259 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3260 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3261 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3262
3263 // If it is LE for normal/medium code model, the add tp operation will occur
3264 // during the pseudo-instruction expansion.
3265 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3266 return Offset;
3267
3268 if (UseGOT) {
3269 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3270 MachineFunction &MF = DAG.getMachineFunction();
3271 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3275 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3276 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3277 }
3278
3279 // Add the thread pointer.
3280 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3281 DAG.getRegister(LoongArch::R2, GRLenVT));
3282}
3283
3284SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3285 SelectionDAG &DAG,
3286 unsigned Opc,
3287 bool Large) const {
3288 SDLoc DL(N);
3289 EVT Ty = getPointerTy(DAG.getDataLayout());
3290 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3291
3292 // This is not actually used, but is necessary for successfully matching the
3293 // PseudoLA_*_LARGE nodes.
3294 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3295
3296 // Use a PC-relative addressing mode to access the dynamic GOT address.
3297 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3298 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3299 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3300
3301 // Prepare argument list to generate call.
3303 Args.emplace_back(Load, CallTy);
3304
3305 // Setup call to __tls_get_addr.
3306 TargetLowering::CallLoweringInfo CLI(DAG);
3307 CLI.setDebugLoc(DL)
3308 .setChain(DAG.getEntryNode())
3309 .setLibCallee(CallingConv::C, CallTy,
3310 DAG.getExternalSymbol("__tls_get_addr", Ty),
3311 std::move(Args));
3312
3313 return LowerCallTo(CLI).first;
3314}
3315
3316SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3317 SelectionDAG &DAG, unsigned Opc,
3318 bool Large) const {
3319 SDLoc DL(N);
3320 EVT Ty = getPointerTy(DAG.getDataLayout());
3321 const GlobalValue *GV = N->getGlobal();
3322
3323 // This is not actually used, but is necessary for successfully matching the
3324 // PseudoLA_*_LARGE nodes.
3325 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3326
3327 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3328 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3329 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3330 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3331 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3332}
3333
3334SDValue
3335LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3336 SelectionDAG &DAG) const {
3339 report_fatal_error("In GHC calling convention TLS is not supported");
3340
3341 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3342 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3343
3344 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3345 assert(N->getOffset() == 0 && "unexpected offset in global node");
3346
3347 if (DAG.getTarget().useEmulatedTLS())
3348 reportFatalUsageError("the emulated TLS is prohibited");
3349
3350 bool IsDesc = DAG.getTarget().useTLSDESC();
3351
3352 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3354 // In this model, application code calls the dynamic linker function
3355 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3356 // runtime.
3357 if (!IsDesc)
3358 return getDynamicTLSAddr(N, DAG,
3359 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3360 : LoongArch::PseudoLA_TLS_GD,
3361 Large);
3362 break;
3364 // Same as GeneralDynamic, except for assembly modifiers and relocation
3365 // records.
3366 if (!IsDesc)
3367 return getDynamicTLSAddr(N, DAG,
3368 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3369 : LoongArch::PseudoLA_TLS_LD,
3370 Large);
3371 break;
3373 // This model uses the GOT to resolve TLS offsets.
3374 return getStaticTLSAddr(N, DAG,
3375 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3376 : LoongArch::PseudoLA_TLS_IE,
3377 /*UseGOT=*/true, Large);
3379 // This model is used when static linking as the TLS offsets are resolved
3380 // during program linking.
3381 //
3382 // This node doesn't need an extra argument for the large code model.
3383 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3384 /*UseGOT=*/false, Large);
3385 }
3386
3387 return getTLSDescAddr(N, DAG,
3388 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3389 : LoongArch::PseudoLA_TLS_DESC,
3390 Large);
3391}
3392
3393template <unsigned N>
3395 SelectionDAG &DAG, bool IsSigned = false) {
3396 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3397 // Check the ImmArg.
3398 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3399 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3400 DAG.getContext()->emitError(Op->getOperationName(0) +
3401 ": argument out of range.");
3402 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3403 }
3404 return SDValue();
3405}
3406
3407SDValue
3408LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3409 SelectionDAG &DAG) const {
3410 switch (Op.getConstantOperandVal(0)) {
3411 default:
3412 return SDValue(); // Don't custom lower most intrinsics.
3413 case Intrinsic::thread_pointer: {
3414 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3415 return DAG.getRegister(LoongArch::R2, PtrVT);
3416 }
3417 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3418 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3419 case Intrinsic::loongarch_lsx_vreplvei_d:
3420 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3421 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3422 case Intrinsic::loongarch_lsx_vreplvei_w:
3423 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3424 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3425 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3426 case Intrinsic::loongarch_lasx_xvpickve_d:
3427 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3428 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3429 case Intrinsic::loongarch_lasx_xvinsve0_d:
3430 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3431 case Intrinsic::loongarch_lsx_vsat_b:
3432 case Intrinsic::loongarch_lsx_vsat_bu:
3433 case Intrinsic::loongarch_lsx_vrotri_b:
3434 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3435 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3436 case Intrinsic::loongarch_lsx_vsrlri_b:
3437 case Intrinsic::loongarch_lsx_vsrari_b:
3438 case Intrinsic::loongarch_lsx_vreplvei_h:
3439 case Intrinsic::loongarch_lasx_xvsat_b:
3440 case Intrinsic::loongarch_lasx_xvsat_bu:
3441 case Intrinsic::loongarch_lasx_xvrotri_b:
3442 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3443 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3444 case Intrinsic::loongarch_lasx_xvsrlri_b:
3445 case Intrinsic::loongarch_lasx_xvsrari_b:
3446 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3447 case Intrinsic::loongarch_lasx_xvpickve_w:
3448 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3449 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3450 case Intrinsic::loongarch_lasx_xvinsve0_w:
3451 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3452 case Intrinsic::loongarch_lsx_vsat_h:
3453 case Intrinsic::loongarch_lsx_vsat_hu:
3454 case Intrinsic::loongarch_lsx_vrotri_h:
3455 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3456 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3457 case Intrinsic::loongarch_lsx_vsrlri_h:
3458 case Intrinsic::loongarch_lsx_vsrari_h:
3459 case Intrinsic::loongarch_lsx_vreplvei_b:
3460 case Intrinsic::loongarch_lasx_xvsat_h:
3461 case Intrinsic::loongarch_lasx_xvsat_hu:
3462 case Intrinsic::loongarch_lasx_xvrotri_h:
3463 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3464 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3465 case Intrinsic::loongarch_lasx_xvsrlri_h:
3466 case Intrinsic::loongarch_lasx_xvsrari_h:
3467 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3468 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3469 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3470 case Intrinsic::loongarch_lsx_vsrani_b_h:
3471 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3472 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3473 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3474 case Intrinsic::loongarch_lsx_vssrani_b_h:
3475 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3476 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3477 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3478 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3479 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3480 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3481 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3482 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3483 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3484 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3485 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3486 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3487 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3488 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3489 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3490 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3491 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3492 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3493 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3494 case Intrinsic::loongarch_lsx_vsat_w:
3495 case Intrinsic::loongarch_lsx_vsat_wu:
3496 case Intrinsic::loongarch_lsx_vrotri_w:
3497 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3498 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3499 case Intrinsic::loongarch_lsx_vsrlri_w:
3500 case Intrinsic::loongarch_lsx_vsrari_w:
3501 case Intrinsic::loongarch_lsx_vslei_bu:
3502 case Intrinsic::loongarch_lsx_vslei_hu:
3503 case Intrinsic::loongarch_lsx_vslei_wu:
3504 case Intrinsic::loongarch_lsx_vslei_du:
3505 case Intrinsic::loongarch_lsx_vslti_bu:
3506 case Intrinsic::loongarch_lsx_vslti_hu:
3507 case Intrinsic::loongarch_lsx_vslti_wu:
3508 case Intrinsic::loongarch_lsx_vslti_du:
3509 case Intrinsic::loongarch_lsx_vbsll_v:
3510 case Intrinsic::loongarch_lsx_vbsrl_v:
3511 case Intrinsic::loongarch_lasx_xvsat_w:
3512 case Intrinsic::loongarch_lasx_xvsat_wu:
3513 case Intrinsic::loongarch_lasx_xvrotri_w:
3514 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
3515 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
3516 case Intrinsic::loongarch_lasx_xvsrlri_w:
3517 case Intrinsic::loongarch_lasx_xvsrari_w:
3518 case Intrinsic::loongarch_lasx_xvslei_bu:
3519 case Intrinsic::loongarch_lasx_xvslei_hu:
3520 case Intrinsic::loongarch_lasx_xvslei_wu:
3521 case Intrinsic::loongarch_lasx_xvslei_du:
3522 case Intrinsic::loongarch_lasx_xvslti_bu:
3523 case Intrinsic::loongarch_lasx_xvslti_hu:
3524 case Intrinsic::loongarch_lasx_xvslti_wu:
3525 case Intrinsic::loongarch_lasx_xvslti_du:
3526 case Intrinsic::loongarch_lasx_xvbsll_v:
3527 case Intrinsic::loongarch_lasx_xvbsrl_v:
3528 return checkIntrinsicImmArg<5>(Op, 2, DAG);
3529 case Intrinsic::loongarch_lsx_vseqi_b:
3530 case Intrinsic::loongarch_lsx_vseqi_h:
3531 case Intrinsic::loongarch_lsx_vseqi_w:
3532 case Intrinsic::loongarch_lsx_vseqi_d:
3533 case Intrinsic::loongarch_lsx_vslei_b:
3534 case Intrinsic::loongarch_lsx_vslei_h:
3535 case Intrinsic::loongarch_lsx_vslei_w:
3536 case Intrinsic::loongarch_lsx_vslei_d:
3537 case Intrinsic::loongarch_lsx_vslti_b:
3538 case Intrinsic::loongarch_lsx_vslti_h:
3539 case Intrinsic::loongarch_lsx_vslti_w:
3540 case Intrinsic::loongarch_lsx_vslti_d:
3541 case Intrinsic::loongarch_lasx_xvseqi_b:
3542 case Intrinsic::loongarch_lasx_xvseqi_h:
3543 case Intrinsic::loongarch_lasx_xvseqi_w:
3544 case Intrinsic::loongarch_lasx_xvseqi_d:
3545 case Intrinsic::loongarch_lasx_xvslei_b:
3546 case Intrinsic::loongarch_lasx_xvslei_h:
3547 case Intrinsic::loongarch_lasx_xvslei_w:
3548 case Intrinsic::loongarch_lasx_xvslei_d:
3549 case Intrinsic::loongarch_lasx_xvslti_b:
3550 case Intrinsic::loongarch_lasx_xvslti_h:
3551 case Intrinsic::loongarch_lasx_xvslti_w:
3552 case Intrinsic::loongarch_lasx_xvslti_d:
3553 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
3554 case Intrinsic::loongarch_lsx_vsrlni_h_w:
3555 case Intrinsic::loongarch_lsx_vsrani_h_w:
3556 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
3557 case Intrinsic::loongarch_lsx_vsrarni_h_w:
3558 case Intrinsic::loongarch_lsx_vssrlni_h_w:
3559 case Intrinsic::loongarch_lsx_vssrani_h_w:
3560 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
3561 case Intrinsic::loongarch_lsx_vssrani_hu_w:
3562 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
3563 case Intrinsic::loongarch_lsx_vssrarni_h_w:
3564 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
3565 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
3566 case Intrinsic::loongarch_lsx_vfrstpi_b:
3567 case Intrinsic::loongarch_lsx_vfrstpi_h:
3568 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
3569 case Intrinsic::loongarch_lasx_xvsrani_h_w:
3570 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
3571 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
3572 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
3573 case Intrinsic::loongarch_lasx_xvssrani_h_w:
3574 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
3575 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
3576 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
3577 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
3578 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
3579 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
3580 case Intrinsic::loongarch_lasx_xvfrstpi_b:
3581 case Intrinsic::loongarch_lasx_xvfrstpi_h:
3582 return checkIntrinsicImmArg<5>(Op, 3, DAG);
3583 case Intrinsic::loongarch_lsx_vsat_d:
3584 case Intrinsic::loongarch_lsx_vsat_du:
3585 case Intrinsic::loongarch_lsx_vrotri_d:
3586 case Intrinsic::loongarch_lsx_vsrlri_d:
3587 case Intrinsic::loongarch_lsx_vsrari_d:
3588 case Intrinsic::loongarch_lasx_xvsat_d:
3589 case Intrinsic::loongarch_lasx_xvsat_du:
3590 case Intrinsic::loongarch_lasx_xvrotri_d:
3591 case Intrinsic::loongarch_lasx_xvsrlri_d:
3592 case Intrinsic::loongarch_lasx_xvsrari_d:
3593 return checkIntrinsicImmArg<6>(Op, 2, DAG);
3594 case Intrinsic::loongarch_lsx_vsrlni_w_d:
3595 case Intrinsic::loongarch_lsx_vsrani_w_d:
3596 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
3597 case Intrinsic::loongarch_lsx_vsrarni_w_d:
3598 case Intrinsic::loongarch_lsx_vssrlni_w_d:
3599 case Intrinsic::loongarch_lsx_vssrani_w_d:
3600 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
3601 case Intrinsic::loongarch_lsx_vssrani_wu_d:
3602 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
3603 case Intrinsic::loongarch_lsx_vssrarni_w_d:
3604 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
3605 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
3606 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
3607 case Intrinsic::loongarch_lasx_xvsrani_w_d:
3608 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
3609 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
3610 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
3611 case Intrinsic::loongarch_lasx_xvssrani_w_d:
3612 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
3613 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
3614 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
3615 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
3616 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
3617 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
3618 return checkIntrinsicImmArg<6>(Op, 3, DAG);
3619 case Intrinsic::loongarch_lsx_vsrlni_d_q:
3620 case Intrinsic::loongarch_lsx_vsrani_d_q:
3621 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
3622 case Intrinsic::loongarch_lsx_vsrarni_d_q:
3623 case Intrinsic::loongarch_lsx_vssrlni_d_q:
3624 case Intrinsic::loongarch_lsx_vssrani_d_q:
3625 case Intrinsic::loongarch_lsx_vssrlni_du_q:
3626 case Intrinsic::loongarch_lsx_vssrani_du_q:
3627 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
3628 case Intrinsic::loongarch_lsx_vssrarni_d_q:
3629 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
3630 case Intrinsic::loongarch_lsx_vssrarni_du_q:
3631 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
3632 case Intrinsic::loongarch_lasx_xvsrani_d_q:
3633 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
3634 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
3635 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
3636 case Intrinsic::loongarch_lasx_xvssrani_d_q:
3637 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
3638 case Intrinsic::loongarch_lasx_xvssrani_du_q:
3639 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
3640 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
3641 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
3642 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
3643 return checkIntrinsicImmArg<7>(Op, 3, DAG);
3644 case Intrinsic::loongarch_lsx_vnori_b:
3645 case Intrinsic::loongarch_lsx_vshuf4i_b:
3646 case Intrinsic::loongarch_lsx_vshuf4i_h:
3647 case Intrinsic::loongarch_lsx_vshuf4i_w:
3648 case Intrinsic::loongarch_lasx_xvnori_b:
3649 case Intrinsic::loongarch_lasx_xvshuf4i_b:
3650 case Intrinsic::loongarch_lasx_xvshuf4i_h:
3651 case Intrinsic::loongarch_lasx_xvshuf4i_w:
3652 case Intrinsic::loongarch_lasx_xvpermi_d:
3653 return checkIntrinsicImmArg<8>(Op, 2, DAG);
3654 case Intrinsic::loongarch_lsx_vshuf4i_d:
3655 case Intrinsic::loongarch_lsx_vpermi_w:
3656 case Intrinsic::loongarch_lsx_vbitseli_b:
3657 case Intrinsic::loongarch_lsx_vextrins_b:
3658 case Intrinsic::loongarch_lsx_vextrins_h:
3659 case Intrinsic::loongarch_lsx_vextrins_w:
3660 case Intrinsic::loongarch_lsx_vextrins_d:
3661 case Intrinsic::loongarch_lasx_xvshuf4i_d:
3662 case Intrinsic::loongarch_lasx_xvpermi_w:
3663 case Intrinsic::loongarch_lasx_xvpermi_q:
3664 case Intrinsic::loongarch_lasx_xvbitseli_b:
3665 case Intrinsic::loongarch_lasx_xvextrins_b:
3666 case Intrinsic::loongarch_lasx_xvextrins_h:
3667 case Intrinsic::loongarch_lasx_xvextrins_w:
3668 case Intrinsic::loongarch_lasx_xvextrins_d:
3669 return checkIntrinsicImmArg<8>(Op, 3, DAG);
3670 case Intrinsic::loongarch_lsx_vrepli_b:
3671 case Intrinsic::loongarch_lsx_vrepli_h:
3672 case Intrinsic::loongarch_lsx_vrepli_w:
3673 case Intrinsic::loongarch_lsx_vrepli_d:
3674 case Intrinsic::loongarch_lasx_xvrepli_b:
3675 case Intrinsic::loongarch_lasx_xvrepli_h:
3676 case Intrinsic::loongarch_lasx_xvrepli_w:
3677 case Intrinsic::loongarch_lasx_xvrepli_d:
3678 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
3679 case Intrinsic::loongarch_lsx_vldi:
3680 case Intrinsic::loongarch_lasx_xvldi:
3681 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
3682 }
3683}
3684
3685// Helper function that emits error message for intrinsics with chain and return
3686// merge values of a UNDEF and the chain.
3688 StringRef ErrorMsg,
3689 SelectionDAG &DAG) {
3690 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3691 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
3692 SDLoc(Op));
3693}
3694
3695SDValue
3696LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
3697 SelectionDAG &DAG) const {
3698 SDLoc DL(Op);
3699 MVT GRLenVT = Subtarget.getGRLenVT();
3700 EVT VT = Op.getValueType();
3701 SDValue Chain = Op.getOperand(0);
3702 const StringRef ErrorMsgOOR = "argument out of range";
3703 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3704 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3705
3706 switch (Op.getConstantOperandVal(1)) {
3707 default:
3708 return Op;
3709 case Intrinsic::loongarch_crc_w_b_w:
3710 case Intrinsic::loongarch_crc_w_h_w:
3711 case Intrinsic::loongarch_crc_w_w_w:
3712 case Intrinsic::loongarch_crc_w_d_w:
3713 case Intrinsic::loongarch_crcc_w_b_w:
3714 case Intrinsic::loongarch_crcc_w_h_w:
3715 case Intrinsic::loongarch_crcc_w_w_w:
3716 case Intrinsic::loongarch_crcc_w_d_w:
3717 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
3718 case Intrinsic::loongarch_csrrd_w:
3719 case Intrinsic::loongarch_csrrd_d: {
3720 unsigned Imm = Op.getConstantOperandVal(2);
3721 return !isUInt<14>(Imm)
3722 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3723 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3724 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3725 }
3726 case Intrinsic::loongarch_csrwr_w:
3727 case Intrinsic::loongarch_csrwr_d: {
3728 unsigned Imm = Op.getConstantOperandVal(3);
3729 return !isUInt<14>(Imm)
3730 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3731 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3732 {Chain, Op.getOperand(2),
3733 DAG.getConstant(Imm, DL, GRLenVT)});
3734 }
3735 case Intrinsic::loongarch_csrxchg_w:
3736 case Intrinsic::loongarch_csrxchg_d: {
3737 unsigned Imm = Op.getConstantOperandVal(4);
3738 return !isUInt<14>(Imm)
3739 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3740 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3741 {Chain, Op.getOperand(2), Op.getOperand(3),
3742 DAG.getConstant(Imm, DL, GRLenVT)});
3743 }
3744 case Intrinsic::loongarch_iocsrrd_d: {
3745 return DAG.getNode(
3746 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
3747 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
3748 }
3749#define IOCSRRD_CASE(NAME, NODE) \
3750 case Intrinsic::loongarch_##NAME: { \
3751 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
3752 {Chain, Op.getOperand(2)}); \
3753 }
3754 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3755 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3756 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3757#undef IOCSRRD_CASE
3758 case Intrinsic::loongarch_cpucfg: {
3759 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3760 {Chain, Op.getOperand(2)});
3761 }
3762 case Intrinsic::loongarch_lddir_d: {
3763 unsigned Imm = Op.getConstantOperandVal(3);
3764 return !isUInt<8>(Imm)
3765 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3766 : Op;
3767 }
3768 case Intrinsic::loongarch_movfcsr2gr: {
3769 if (!Subtarget.hasBasicF())
3770 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
3771 unsigned Imm = Op.getConstantOperandVal(2);
3772 return !isUInt<2>(Imm)
3773 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3774 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
3775 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3776 }
3777 case Intrinsic::loongarch_lsx_vld:
3778 case Intrinsic::loongarch_lsx_vldrepl_b:
3779 case Intrinsic::loongarch_lasx_xvld:
3780 case Intrinsic::loongarch_lasx_xvldrepl_b:
3781 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3782 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
3783 : SDValue();
3784 case Intrinsic::loongarch_lsx_vldrepl_h:
3785 case Intrinsic::loongarch_lasx_xvldrepl_h:
3786 return !isShiftedInt<11, 1>(
3787 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3789 Op, "argument out of range or not a multiple of 2", DAG)
3790 : SDValue();
3791 case Intrinsic::loongarch_lsx_vldrepl_w:
3792 case Intrinsic::loongarch_lasx_xvldrepl_w:
3793 return !isShiftedInt<10, 2>(
3794 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3796 Op, "argument out of range or not a multiple of 4", DAG)
3797 : SDValue();
3798 case Intrinsic::loongarch_lsx_vldrepl_d:
3799 case Intrinsic::loongarch_lasx_xvldrepl_d:
3800 return !isShiftedInt<9, 3>(
3801 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
3803 Op, "argument out of range or not a multiple of 8", DAG)
3804 : SDValue();
3805 }
3806}
3807
3808// Helper function that emits error message for intrinsics with void return
3809// value and return the chain.
3811 SelectionDAG &DAG) {
3812
3813 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
3814 return Op.getOperand(0);
3815}
3816
3817SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
3818 SelectionDAG &DAG) const {
3819 SDLoc DL(Op);
3820 MVT GRLenVT = Subtarget.getGRLenVT();
3821 SDValue Chain = Op.getOperand(0);
3822 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
3823 SDValue Op2 = Op.getOperand(2);
3824 const StringRef ErrorMsgOOR = "argument out of range";
3825 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
3826 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
3827 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
3828
3829 switch (IntrinsicEnum) {
3830 default:
3831 // TODO: Add more Intrinsics.
3832 return SDValue();
3833 case Intrinsic::loongarch_cacop_d:
3834 case Intrinsic::loongarch_cacop_w: {
3835 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
3836 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
3837 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
3838 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
3839 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
3840 unsigned Imm1 = Op2->getAsZExtVal();
3841 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
3842 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
3843 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
3844 return Op;
3845 }
3846 case Intrinsic::loongarch_dbar: {
3847 unsigned Imm = Op2->getAsZExtVal();
3848 return !isUInt<15>(Imm)
3849 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3850 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
3851 DAG.getConstant(Imm, DL, GRLenVT));
3852 }
3853 case Intrinsic::loongarch_ibar: {
3854 unsigned Imm = Op2->getAsZExtVal();
3855 return !isUInt<15>(Imm)
3856 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3857 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
3858 DAG.getConstant(Imm, DL, GRLenVT));
3859 }
3860 case Intrinsic::loongarch_break: {
3861 unsigned Imm = Op2->getAsZExtVal();
3862 return !isUInt<15>(Imm)
3863 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3864 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
3865 DAG.getConstant(Imm, DL, GRLenVT));
3866 }
3867 case Intrinsic::loongarch_movgr2fcsr: {
3868 if (!Subtarget.hasBasicF())
3869 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
3870 unsigned Imm = Op2->getAsZExtVal();
3871 return !isUInt<2>(Imm)
3872 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3873 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
3874 DAG.getConstant(Imm, DL, GRLenVT),
3875 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
3876 Op.getOperand(3)));
3877 }
3878 case Intrinsic::loongarch_syscall: {
3879 unsigned Imm = Op2->getAsZExtVal();
3880 return !isUInt<15>(Imm)
3881 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3882 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
3883 DAG.getConstant(Imm, DL, GRLenVT));
3884 }
3885#define IOCSRWR_CASE(NAME, NODE) \
3886 case Intrinsic::loongarch_##NAME: { \
3887 SDValue Op3 = Op.getOperand(3); \
3888 return Subtarget.is64Bit() \
3889 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
3890 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
3891 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
3892 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
3893 Op3); \
3894 }
3895 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
3896 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
3897 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
3898#undef IOCSRWR_CASE
3899 case Intrinsic::loongarch_iocsrwr_d: {
3900 return !Subtarget.is64Bit()
3901 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3902 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
3903 Op2,
3904 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
3905 Op.getOperand(3)));
3906 }
3907#define ASRT_LE_GT_CASE(NAME) \
3908 case Intrinsic::loongarch_##NAME: { \
3909 return !Subtarget.is64Bit() \
3910 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
3911 : Op; \
3912 }
3913 ASRT_LE_GT_CASE(asrtle_d)
3914 ASRT_LE_GT_CASE(asrtgt_d)
3915#undef ASRT_LE_GT_CASE
3916 case Intrinsic::loongarch_ldpte_d: {
3917 unsigned Imm = Op.getConstantOperandVal(3);
3918 return !Subtarget.is64Bit()
3919 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
3920 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3921 : Op;
3922 }
3923 case Intrinsic::loongarch_lsx_vst:
3924 case Intrinsic::loongarch_lasx_xvst:
3925 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
3926 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3927 : SDValue();
3928 case Intrinsic::loongarch_lasx_xvstelm_b:
3929 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3930 !isUInt<5>(Op.getConstantOperandVal(5)))
3931 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3932 : SDValue();
3933 case Intrinsic::loongarch_lsx_vstelm_b:
3934 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3935 !isUInt<4>(Op.getConstantOperandVal(5)))
3936 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
3937 : SDValue();
3938 case Intrinsic::loongarch_lasx_xvstelm_h:
3939 return (!isShiftedInt<8, 1>(
3940 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3941 !isUInt<4>(Op.getConstantOperandVal(5)))
3943 Op, "argument out of range or not a multiple of 2", DAG)
3944 : SDValue();
3945 case Intrinsic::loongarch_lsx_vstelm_h:
3946 return (!isShiftedInt<8, 1>(
3947 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3948 !isUInt<3>(Op.getConstantOperandVal(5)))
3950 Op, "argument out of range or not a multiple of 2", DAG)
3951 : SDValue();
3952 case Intrinsic::loongarch_lasx_xvstelm_w:
3953 return (!isShiftedInt<8, 2>(
3954 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3955 !isUInt<3>(Op.getConstantOperandVal(5)))
3957 Op, "argument out of range or not a multiple of 4", DAG)
3958 : SDValue();
3959 case Intrinsic::loongarch_lsx_vstelm_w:
3960 return (!isShiftedInt<8, 2>(
3961 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3962 !isUInt<2>(Op.getConstantOperandVal(5)))
3964 Op, "argument out of range or not a multiple of 4", DAG)
3965 : SDValue();
3966 case Intrinsic::loongarch_lasx_xvstelm_d:
3967 return (!isShiftedInt<8, 3>(
3968 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3969 !isUInt<2>(Op.getConstantOperandVal(5)))
3971 Op, "argument out of range or not a multiple of 8", DAG)
3972 : SDValue();
3973 case Intrinsic::loongarch_lsx_vstelm_d:
3974 return (!isShiftedInt<8, 3>(
3975 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
3976 !isUInt<1>(Op.getConstantOperandVal(5)))
3978 Op, "argument out of range or not a multiple of 8", DAG)
3979 : SDValue();
3980 }
3981}
3982
3983SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
3984 SelectionDAG &DAG) const {
3985 SDLoc DL(Op);
3986 SDValue Lo = Op.getOperand(0);
3987 SDValue Hi = Op.getOperand(1);
3988 SDValue Shamt = Op.getOperand(2);
3989 EVT VT = Lo.getValueType();
3990
3991 // if Shamt-GRLen < 0: // Shamt < GRLen
3992 // Lo = Lo << Shamt
3993 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
3994 // else:
3995 // Lo = 0
3996 // Hi = Lo << (Shamt-GRLen)
3997
3998 SDValue Zero = DAG.getConstant(0, DL, VT);
3999 SDValue One = DAG.getConstant(1, DL, VT);
4000 SDValue MinusGRLen =
4001 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4002 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4003 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4004 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4005
4006 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4007 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4008 SDValue ShiftRightLo =
4009 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4010 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4011 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4012 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4013
4014 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4015
4016 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4017 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4018
4019 SDValue Parts[2] = {Lo, Hi};
4020 return DAG.getMergeValues(Parts, DL);
4021}
4022
4023SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4024 SelectionDAG &DAG,
4025 bool IsSRA) const {
4026 SDLoc DL(Op);
4027 SDValue Lo = Op.getOperand(0);
4028 SDValue Hi = Op.getOperand(1);
4029 SDValue Shamt = Op.getOperand(2);
4030 EVT VT = Lo.getValueType();
4031
4032 // SRA expansion:
4033 // if Shamt-GRLen < 0: // Shamt < GRLen
4034 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4035 // Hi = Hi >>s Shamt
4036 // else:
4037 // Lo = Hi >>s (Shamt-GRLen);
4038 // Hi = Hi >>s (GRLen-1)
4039 //
4040 // SRL expansion:
4041 // if Shamt-GRLen < 0: // Shamt < GRLen
4042 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4043 // Hi = Hi >>u Shamt
4044 // else:
4045 // Lo = Hi >>u (Shamt-GRLen);
4046 // Hi = 0;
4047
4048 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4049
4050 SDValue Zero = DAG.getConstant(0, DL, VT);
4051 SDValue One = DAG.getConstant(1, DL, VT);
4052 SDValue MinusGRLen =
4053 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4054 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4055 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4056 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4057
4058 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4059 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4060 SDValue ShiftLeftHi =
4061 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4062 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4063 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4064 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4065 SDValue HiFalse =
4066 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4067
4068 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4069
4070 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4071 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4072
4073 SDValue Parts[2] = {Lo, Hi};
4074 return DAG.getMergeValues(Parts, DL);
4075}
4076
4077// Returns the opcode of the target-specific SDNode that implements the 32-bit
4078// form of the given Opcode.
4080 switch (Opcode) {
4081 default:
4082 llvm_unreachable("Unexpected opcode");
4083 case ISD::SDIV:
4084 return LoongArchISD::DIV_W;
4085 case ISD::UDIV:
4086 return LoongArchISD::DIV_WU;
4087 case ISD::SREM:
4088 return LoongArchISD::MOD_W;
4089 case ISD::UREM:
4090 return LoongArchISD::MOD_WU;
4091 case ISD::SHL:
4092 return LoongArchISD::SLL_W;
4093 case ISD::SRA:
4094 return LoongArchISD::SRA_W;
4095 case ISD::SRL:
4096 return LoongArchISD::SRL_W;
4097 case ISD::ROTL:
4098 case ISD::ROTR:
4099 return LoongArchISD::ROTR_W;
4100 case ISD::CTTZ:
4101 return LoongArchISD::CTZ_W;
4102 case ISD::CTLZ:
4103 return LoongArchISD::CLZ_W;
4104 }
4105}
4106
4107// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4108// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4109// otherwise be promoted to i64, making it difficult to select the
4110// SLL_W/.../*W later one because the fact the operation was originally of
4111// type i8/i16/i32 is lost.
4113 unsigned ExtOpc = ISD::ANY_EXTEND) {
4114 SDLoc DL(N);
4115 LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
4116 SDValue NewOp0, NewRes;
4117
4118 switch (NumOp) {
4119 default:
4120 llvm_unreachable("Unexpected NumOp");
4121 case 1: {
4122 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4123 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4124 break;
4125 }
4126 case 2: {
4127 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4128 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4129 if (N->getOpcode() == ISD::ROTL) {
4130 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4131 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4132 }
4133 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4134 break;
4135 }
4136 // TODO:Handle more NumOp.
4137 }
4138
4139 // ReplaceNodeResults requires we maintain the same type for the return
4140 // value.
4141 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4142}
4143
4144// Converts the given 32-bit operation to a i64 operation with signed extension
4145// semantic to reduce the signed extension instructions.
4147 SDLoc DL(N);
4148 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4149 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4150 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4151 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4152 DAG.getValueType(MVT::i32));
4153 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4154}
4155
4156// Helper function that emits error message for intrinsics with/without chain
4157// and return a UNDEF or and the chain as the results.
4160 StringRef ErrorMsg, bool WithChain = true) {
4161 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4162 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4163 if (!WithChain)
4164 return;
4165 Results.push_back(N->getOperand(0));
4166}
4167
4168template <unsigned N>
4169static void
4171 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4172 unsigned ResOp) {
4173 const StringRef ErrorMsgOOR = "argument out of range";
4174 unsigned Imm = Node->getConstantOperandVal(2);
4175 if (!isUInt<N>(Imm)) {
4177 /*WithChain=*/false);
4178 return;
4179 }
4180 SDLoc DL(Node);
4181 SDValue Vec = Node->getOperand(1);
4182
4183 SDValue PickElt =
4184 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4185 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4187 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4188 PickElt.getValue(0)));
4189}
4190
4193 SelectionDAG &DAG,
4194 const LoongArchSubtarget &Subtarget,
4195 unsigned ResOp) {
4196 SDLoc DL(N);
4197 SDValue Vec = N->getOperand(1);
4198
4199 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4200 Results.push_back(
4201 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4202}
4203
4204static void
4206 SelectionDAG &DAG,
4207 const LoongArchSubtarget &Subtarget) {
4208 switch (N->getConstantOperandVal(0)) {
4209 default:
4210 llvm_unreachable("Unexpected Intrinsic.");
4211 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4212 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4214 break;
4215 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4216 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4217 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4219 break;
4220 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4221 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4223 break;
4224 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4225 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4227 break;
4228 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4229 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4230 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4232 break;
4233 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4234 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4236 break;
4237 case Intrinsic::loongarch_lsx_bz_b:
4238 case Intrinsic::loongarch_lsx_bz_h:
4239 case Intrinsic::loongarch_lsx_bz_w:
4240 case Intrinsic::loongarch_lsx_bz_d:
4241 case Intrinsic::loongarch_lasx_xbz_b:
4242 case Intrinsic::loongarch_lasx_xbz_h:
4243 case Intrinsic::loongarch_lasx_xbz_w:
4244 case Intrinsic::loongarch_lasx_xbz_d:
4245 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4247 break;
4248 case Intrinsic::loongarch_lsx_bz_v:
4249 case Intrinsic::loongarch_lasx_xbz_v:
4250 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4252 break;
4253 case Intrinsic::loongarch_lsx_bnz_b:
4254 case Intrinsic::loongarch_lsx_bnz_h:
4255 case Intrinsic::loongarch_lsx_bnz_w:
4256 case Intrinsic::loongarch_lsx_bnz_d:
4257 case Intrinsic::loongarch_lasx_xbnz_b:
4258 case Intrinsic::loongarch_lasx_xbnz_h:
4259 case Intrinsic::loongarch_lasx_xbnz_w:
4260 case Intrinsic::loongarch_lasx_xbnz_d:
4261 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4263 break;
4264 case Intrinsic::loongarch_lsx_bnz_v:
4265 case Intrinsic::loongarch_lasx_xbnz_v:
4266 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4268 break;
4269 }
4270}
4271
4274 SelectionDAG &DAG) {
4275 assert(N->getValueType(0) == MVT::i128 &&
4276 "AtomicCmpSwap on types less than 128 should be legal");
4277 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4278
4279 unsigned Opcode;
4280 switch (MemOp->getMergedOrdering()) {
4284 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4285 break;
4288 Opcode = LoongArch::PseudoCmpXchg128;
4289 break;
4290 default:
4291 llvm_unreachable("Unexpected ordering!");
4292 }
4293
4294 SDLoc DL(N);
4295 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4296 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4297 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4298 NewVal.first, NewVal.second, N->getOperand(0)};
4299
4300 SDNode *CmpSwap = DAG.getMachineNode(
4301 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4302 Ops);
4303 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4304 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4305 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4306 Results.push_back(SDValue(CmpSwap, 3));
4307}
4308
4311 SDLoc DL(N);
4312 EVT VT = N->getValueType(0);
4313 switch (N->getOpcode()) {
4314 default:
4315 llvm_unreachable("Don't know how to legalize this operation");
4316 case ISD::ADD:
4317 case ISD::SUB:
4318 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4319 "Unexpected custom legalisation");
4320 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4321 break;
4322 case ISD::SDIV:
4323 case ISD::UDIV:
4324 case ISD::SREM:
4325 case ISD::UREM:
4326 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4327 "Unexpected custom legalisation");
4328 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4329 Subtarget.hasDiv32() && VT == MVT::i32
4331 : ISD::SIGN_EXTEND));
4332 break;
4333 case ISD::SHL:
4334 case ISD::SRA:
4335 case ISD::SRL:
4336 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4337 "Unexpected custom legalisation");
4338 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4339 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4340 break;
4341 }
4342 break;
4343 case ISD::ROTL:
4344 case ISD::ROTR:
4345 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4346 "Unexpected custom legalisation");
4347 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4348 break;
4349 case ISD::FP_TO_SINT: {
4350 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4351 "Unexpected custom legalisation");
4352 SDValue Src = N->getOperand(0);
4353 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4354 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4356 if (!isTypeLegal(Src.getValueType()))
4357 return;
4358 if (Src.getValueType() == MVT::f16)
4359 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4360 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4361 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4362 return;
4363 }
4364 // If the FP type needs to be softened, emit a library call using the 'si'
4365 // version. If we left it to default legalization we'd end up with 'di'.
4366 RTLIB::Libcall LC;
4367 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4368 MakeLibCallOptions CallOptions;
4369 EVT OpVT = Src.getValueType();
4370 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4371 SDValue Chain = SDValue();
4372 SDValue Result;
4373 std::tie(Result, Chain) =
4374 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4375 Results.push_back(Result);
4376 break;
4377 }
4378 case ISD::BITCAST: {
4379 SDValue Src = N->getOperand(0);
4380 EVT SrcVT = Src.getValueType();
4381 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4382 Subtarget.hasBasicF()) {
4383 SDValue Dst =
4384 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4385 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4386 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4388 DAG.getVTList(MVT::i32, MVT::i32), Src);
4389 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4390 NewReg.getValue(0), NewReg.getValue(1));
4391 Results.push_back(RetReg);
4392 }
4393 break;
4394 }
4395 case ISD::FP_TO_UINT: {
4396 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4397 "Unexpected custom legalisation");
4398 auto &TLI = DAG.getTargetLoweringInfo();
4399 SDValue Tmp1, Tmp2;
4400 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4401 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4402 break;
4403 }
4404 case ISD::BSWAP: {
4405 SDValue Src = N->getOperand(0);
4406 assert((VT == MVT::i16 || VT == MVT::i32) &&
4407 "Unexpected custom legalization");
4408 MVT GRLenVT = Subtarget.getGRLenVT();
4409 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4410 SDValue Tmp;
4411 switch (VT.getSizeInBits()) {
4412 default:
4413 llvm_unreachable("Unexpected operand width");
4414 case 16:
4415 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4416 break;
4417 case 32:
4418 // Only LA64 will get to here due to the size mismatch between VT and
4419 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4420 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4421 break;
4422 }
4423 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4424 break;
4425 }
4426 case ISD::BITREVERSE: {
4427 SDValue Src = N->getOperand(0);
4428 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4429 "Unexpected custom legalization");
4430 MVT GRLenVT = Subtarget.getGRLenVT();
4431 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4432 SDValue Tmp;
4433 switch (VT.getSizeInBits()) {
4434 default:
4435 llvm_unreachable("Unexpected operand width");
4436 case 8:
4437 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4438 break;
4439 case 32:
4440 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4441 break;
4442 }
4443 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4444 break;
4445 }
4446 case ISD::CTLZ:
4447 case ISD::CTTZ: {
4448 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4449 "Unexpected custom legalisation");
4450 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4451 break;
4452 }
4454 SDValue Chain = N->getOperand(0);
4455 SDValue Op2 = N->getOperand(2);
4456 MVT GRLenVT = Subtarget.getGRLenVT();
4457 const StringRef ErrorMsgOOR = "argument out of range";
4458 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4459 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4460
4461 switch (N->getConstantOperandVal(1)) {
4462 default:
4463 llvm_unreachable("Unexpected Intrinsic.");
4464 case Intrinsic::loongarch_movfcsr2gr: {
4465 if (!Subtarget.hasBasicF()) {
4466 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4467 return;
4468 }
4469 unsigned Imm = Op2->getAsZExtVal();
4470 if (!isUInt<2>(Imm)) {
4471 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4472 return;
4473 }
4474 SDValue MOVFCSR2GRResults = DAG.getNode(
4475 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4476 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4477 Results.push_back(
4478 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4479 Results.push_back(MOVFCSR2GRResults.getValue(1));
4480 break;
4481 }
4482#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4483 case Intrinsic::loongarch_##NAME: { \
4484 SDValue NODE = DAG.getNode( \
4485 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4486 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4487 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4488 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4489 Results.push_back(NODE.getValue(1)); \
4490 break; \
4491 }
4492 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4493 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4494 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4495 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4496 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4497 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4498#undef CRC_CASE_EXT_BINARYOP
4499
4500#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4501 case Intrinsic::loongarch_##NAME: { \
4502 SDValue NODE = DAG.getNode( \
4503 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4504 {Chain, Op2, \
4505 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4506 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4507 Results.push_back(NODE.getValue(1)); \
4508 break; \
4509 }
4510 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
4511 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
4512#undef CRC_CASE_EXT_UNARYOP
4513#define CSR_CASE(ID) \
4514 case Intrinsic::loongarch_##ID: { \
4515 if (!Subtarget.is64Bit()) \
4516 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
4517 break; \
4518 }
4519 CSR_CASE(csrrd_d);
4520 CSR_CASE(csrwr_d);
4521 CSR_CASE(csrxchg_d);
4522 CSR_CASE(iocsrrd_d);
4523#undef CSR_CASE
4524 case Intrinsic::loongarch_csrrd_w: {
4525 unsigned Imm = Op2->getAsZExtVal();
4526 if (!isUInt<14>(Imm)) {
4527 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4528 return;
4529 }
4530 SDValue CSRRDResults =
4531 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4532 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4533 Results.push_back(
4534 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
4535 Results.push_back(CSRRDResults.getValue(1));
4536 break;
4537 }
4538 case Intrinsic::loongarch_csrwr_w: {
4539 unsigned Imm = N->getConstantOperandVal(3);
4540 if (!isUInt<14>(Imm)) {
4541 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4542 return;
4543 }
4544 SDValue CSRWRResults =
4545 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4546 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4547 DAG.getConstant(Imm, DL, GRLenVT)});
4548 Results.push_back(
4549 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
4550 Results.push_back(CSRWRResults.getValue(1));
4551 break;
4552 }
4553 case Intrinsic::loongarch_csrxchg_w: {
4554 unsigned Imm = N->getConstantOperandVal(4);
4555 if (!isUInt<14>(Imm)) {
4556 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4557 return;
4558 }
4559 SDValue CSRXCHGResults = DAG.getNode(
4560 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4561 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
4562 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
4563 DAG.getConstant(Imm, DL, GRLenVT)});
4564 Results.push_back(
4565 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
4566 Results.push_back(CSRXCHGResults.getValue(1));
4567 break;
4568 }
4569#define IOCSRRD_CASE(NAME, NODE) \
4570 case Intrinsic::loongarch_##NAME: { \
4571 SDValue IOCSRRDResults = \
4572 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4573 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
4574 Results.push_back( \
4575 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
4576 Results.push_back(IOCSRRDResults.getValue(1)); \
4577 break; \
4578 }
4579 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4580 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4581 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4582#undef IOCSRRD_CASE
4583 case Intrinsic::loongarch_cpucfg: {
4584 SDValue CPUCFGResults =
4585 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4586 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
4587 Results.push_back(
4588 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
4589 Results.push_back(CPUCFGResults.getValue(1));
4590 break;
4591 }
4592 case Intrinsic::loongarch_lddir_d: {
4593 if (!Subtarget.is64Bit()) {
4594 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
4595 return;
4596 }
4597 break;
4598 }
4599 }
4600 break;
4601 }
4602 case ISD::READ_REGISTER: {
4603 if (Subtarget.is64Bit())
4604 DAG.getContext()->emitError(
4605 "On LA64, only 64-bit registers can be read.");
4606 else
4607 DAG.getContext()->emitError(
4608 "On LA32, only 32-bit registers can be read.");
4609 Results.push_back(DAG.getUNDEF(VT));
4610 Results.push_back(N->getOperand(0));
4611 break;
4612 }
4614 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
4615 break;
4616 }
4617 case ISD::LROUND: {
4618 SDValue Op0 = N->getOperand(0);
4619 EVT OpVT = Op0.getValueType();
4620 RTLIB::Libcall LC =
4621 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
4622 MakeLibCallOptions CallOptions;
4623 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
4624 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
4625 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
4626 Results.push_back(Result);
4627 break;
4628 }
4629 case ISD::ATOMIC_CMP_SWAP: {
4631 break;
4632 }
4633 case ISD::TRUNCATE: {
4634 MVT VT = N->getSimpleValueType(0);
4635 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
4636 return;
4637
4638 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
4639 SDValue In = N->getOperand(0);
4640 EVT InVT = In.getValueType();
4641 EVT InEltVT = InVT.getVectorElementType();
4642 EVT EltVT = VT.getVectorElementType();
4643 unsigned MinElts = VT.getVectorNumElements();
4644 unsigned WidenNumElts = WidenVT.getVectorNumElements();
4645 unsigned InBits = InVT.getSizeInBits();
4646
4647 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
4648 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
4649 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
4650 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
4651 for (unsigned I = 0; I < MinElts; ++I)
4652 TruncMask[I] = Scale * I;
4653
4654 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
4655 MVT SVT = In.getSimpleValueType().getScalarType();
4656 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
4657 SDValue WidenIn =
4658 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
4659 DAG.getVectorIdxConstant(0, DL));
4660 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
4661 "Illegal vector type in truncation");
4662 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
4663 Results.push_back(
4664 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
4665 return;
4666 }
4667 }
4668
4669 break;
4670 }
4671 }
4672}
4673
4676 const LoongArchSubtarget &Subtarget) {
4677 if (DCI.isBeforeLegalizeOps())
4678 return SDValue();
4679
4680 SDValue FirstOperand = N->getOperand(0);
4681 SDValue SecondOperand = N->getOperand(1);
4682 unsigned FirstOperandOpc = FirstOperand.getOpcode();
4683 EVT ValTy = N->getValueType(0);
4684 SDLoc DL(N);
4685 uint64_t lsb, msb;
4686 unsigned SMIdx, SMLen;
4687 ConstantSDNode *CN;
4688 SDValue NewOperand;
4689 MVT GRLenVT = Subtarget.getGRLenVT();
4690
4691 // BSTRPICK requires the 32S feature.
4692 if (!Subtarget.has32S())
4693 return SDValue();
4694
4695 // Op's second operand must be a shifted mask.
4696 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
4697 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
4698 return SDValue();
4699
4700 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
4701 // Pattern match BSTRPICK.
4702 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
4703 // => BSTRPICK $dst, $src, msb, lsb
4704 // where msb = lsb + len - 1
4705
4706 // The second operand of the shift must be an immediate.
4707 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
4708 return SDValue();
4709
4710 lsb = CN->getZExtValue();
4711
4712 // Return if the shifted mask does not start at bit 0 or the sum of its
4713 // length and lsb exceeds the word's size.
4714 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
4715 return SDValue();
4716
4717 NewOperand = FirstOperand.getOperand(0);
4718 } else {
4719 // Pattern match BSTRPICK.
4720 // $dst = and $src, (2**len- 1) , if len > 12
4721 // => BSTRPICK $dst, $src, msb, lsb
4722 // where lsb = 0 and msb = len - 1
4723
4724 // If the mask is <= 0xfff, andi can be used instead.
4725 if (CN->getZExtValue() <= 0xfff)
4726 return SDValue();
4727
4728 // Return if the MSB exceeds.
4729 if (SMIdx + SMLen > ValTy.getSizeInBits())
4730 return SDValue();
4731
4732 if (SMIdx > 0) {
4733 // Omit if the constant has more than 2 uses. This a conservative
4734 // decision. Whether it is a win depends on the HW microarchitecture.
4735 // However it should always be better for 1 and 2 uses.
4736 if (CN->use_size() > 2)
4737 return SDValue();
4738 // Return if the constant can be composed by a single LU12I.W.
4739 if ((CN->getZExtValue() & 0xfff) == 0)
4740 return SDValue();
4741 // Return if the constand can be composed by a single ADDI with
4742 // the zero register.
4743 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
4744 return SDValue();
4745 }
4746
4747 lsb = SMIdx;
4748 NewOperand = FirstOperand;
4749 }
4750
4751 msb = lsb + SMLen - 1;
4752 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
4753 DAG.getConstant(msb, DL, GRLenVT),
4754 DAG.getConstant(lsb, DL, GRLenVT));
4755 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
4756 return NR0;
4757 // Try to optimize to
4758 // bstrpick $Rd, $Rs, msb, lsb
4759 // slli $Rd, $Rd, lsb
4760 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
4761 DAG.getConstant(lsb, DL, GRLenVT));
4762}
4763
4766 const LoongArchSubtarget &Subtarget) {
4767 // BSTRPICK requires the 32S feature.
4768 if (!Subtarget.has32S())
4769 return SDValue();
4770
4771 if (DCI.isBeforeLegalizeOps())
4772 return SDValue();
4773
4774 // $dst = srl (and $src, Mask), Shamt
4775 // =>
4776 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
4777 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
4778 //
4779
4780 SDValue FirstOperand = N->getOperand(0);
4781 ConstantSDNode *CN;
4782 EVT ValTy = N->getValueType(0);
4783 SDLoc DL(N);
4784 MVT GRLenVT = Subtarget.getGRLenVT();
4785 unsigned MaskIdx, MaskLen;
4786 uint64_t Shamt;
4787
4788 // The first operand must be an AND and the second operand of the AND must be
4789 // a shifted mask.
4790 if (FirstOperand.getOpcode() != ISD::AND ||
4791 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
4792 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
4793 return SDValue();
4794
4795 // The second operand (shift amount) must be an immediate.
4796 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
4797 return SDValue();
4798
4799 Shamt = CN->getZExtValue();
4800 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
4801 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
4802 FirstOperand->getOperand(0),
4803 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
4804 DAG.getConstant(Shamt, DL, GRLenVT));
4805
4806 return SDValue();
4807}
4808
4809// Helper to peek through bitops/trunc/setcc to determine size of source vector.
4810// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
4811static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
4812 unsigned Depth) {
4813 // Limit recursion.
4815 return false;
4816 switch (Src.getOpcode()) {
4817 case ISD::SETCC:
4818 case ISD::TRUNCATE:
4819 return Src.getOperand(0).getValueSizeInBits() == Size;
4820 case ISD::FREEZE:
4821 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
4822 case ISD::AND:
4823 case ISD::XOR:
4824 case ISD::OR:
4825 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
4826 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
4827 case ISD::SELECT:
4828 case ISD::VSELECT:
4829 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
4830 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
4831 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
4832 case ISD::BUILD_VECTOR:
4833 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
4834 ISD::isBuildVectorAllOnes(Src.getNode());
4835 }
4836 return false;
4837}
4838
4839// Helper to push sign extension of vXi1 SETCC result through bitops.
4841 SDValue Src, const SDLoc &DL) {
4842 switch (Src.getOpcode()) {
4843 case ISD::SETCC:
4844 case ISD::FREEZE:
4845 case ISD::TRUNCATE:
4846 case ISD::BUILD_VECTOR:
4847 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
4848 case ISD::AND:
4849 case ISD::XOR:
4850 case ISD::OR:
4851 return DAG.getNode(
4852 Src.getOpcode(), DL, SExtVT,
4853 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
4854 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
4855 case ISD::SELECT:
4856 case ISD::VSELECT:
4857 return DAG.getSelect(
4858 DL, SExtVT, Src.getOperand(0),
4859 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
4860 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
4861 }
4862 llvm_unreachable("Unexpected node type for vXi1 sign extension");
4863}
4864
4865static SDValue
4868 const LoongArchSubtarget &Subtarget) {
4869 SDLoc DL(N);
4870 EVT VT = N->getValueType(0);
4871 SDValue Src = N->getOperand(0);
4872 EVT SrcVT = Src.getValueType();
4873
4874 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4875 return SDValue();
4876
4877 bool UseLASX;
4878 unsigned Opc = ISD::DELETED_NODE;
4879 EVT CmpVT = Src.getOperand(0).getValueType();
4880 EVT EltVT = CmpVT.getVectorElementType();
4881
4882 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
4883 UseLASX = false;
4884 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4885 CmpVT.getSizeInBits() == 256)
4886 UseLASX = true;
4887 else
4888 return SDValue();
4889
4890 SDValue SrcN1 = Src.getOperand(1);
4891 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4892 default:
4893 break;
4894 case ISD::SETEQ:
4895 // x == 0 => not (vmsknez.b x)
4896 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4898 break;
4899 case ISD::SETGT:
4900 // x > -1 => vmskgez.b x
4901 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4903 break;
4904 case ISD::SETGE:
4905 // x >= 0 => vmskgez.b x
4906 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4908 break;
4909 case ISD::SETLT:
4910 // x < 0 => vmskltz.{b,h,w,d} x
4911 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4912 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4913 EltVT == MVT::i64))
4915 break;
4916 case ISD::SETLE:
4917 // x <= -1 => vmskltz.{b,h,w,d} x
4918 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4919 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4920 EltVT == MVT::i64))
4922 break;
4923 case ISD::SETNE:
4924 // x != 0 => vmsknez.b x
4925 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4927 break;
4928 }
4929
4930 if (Opc == ISD::DELETED_NODE)
4931 return SDValue();
4932
4933 SDValue V = DAG.getNode(Opc, DL, MVT::i64, Src.getOperand(0));
4935 V = DAG.getZExtOrTrunc(V, DL, T);
4936 return DAG.getBitcast(VT, V);
4937}
4938
4941 const LoongArchSubtarget &Subtarget) {
4942 SDLoc DL(N);
4943 EVT VT = N->getValueType(0);
4944 SDValue Src = N->getOperand(0);
4945 EVT SrcVT = Src.getValueType();
4946
4947 if (!DCI.isBeforeLegalizeOps())
4948 return SDValue();
4949
4950 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4951 return SDValue();
4952
4953 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
4954 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
4955 if (Res)
4956 return Res;
4957
4958 // Generate vXi1 using [X]VMSKLTZ
4959 MVT SExtVT;
4960 unsigned Opc;
4961 bool UseLASX = false;
4962 bool PropagateSExt = false;
4963
4964 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
4965 EVT CmpVT = Src.getOperand(0).getValueType();
4966 if (CmpVT.getSizeInBits() > 256)
4967 return SDValue();
4968 }
4969
4970 switch (SrcVT.getSimpleVT().SimpleTy) {
4971 default:
4972 return SDValue();
4973 case MVT::v2i1:
4974 SExtVT = MVT::v2i64;
4975 break;
4976 case MVT::v4i1:
4977 SExtVT = MVT::v4i32;
4978 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4979 SExtVT = MVT::v4i64;
4980 UseLASX = true;
4981 PropagateSExt = true;
4982 }
4983 break;
4984 case MVT::v8i1:
4985 SExtVT = MVT::v8i16;
4986 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4987 SExtVT = MVT::v8i32;
4988 UseLASX = true;
4989 PropagateSExt = true;
4990 }
4991 break;
4992 case MVT::v16i1:
4993 SExtVT = MVT::v16i8;
4994 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
4995 SExtVT = MVT::v16i16;
4996 UseLASX = true;
4997 PropagateSExt = true;
4998 }
4999 break;
5000 case MVT::v32i1:
5001 SExtVT = MVT::v32i8;
5002 UseLASX = true;
5003 break;
5004 };
5005 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5006 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5007
5008 SDValue V;
5009 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5010 if (Src.getSimpleValueType() == MVT::v32i8) {
5011 SDValue Lo, Hi;
5012 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5013 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Lo);
5014 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, MVT::i64, Hi);
5015 Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi,
5016 DAG.getConstant(16, DL, MVT::i8));
5017 V = DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi);
5018 } else if (UseLASX) {
5019 return SDValue();
5020 }
5021 }
5022
5023 if (!V) {
5025 V = DAG.getNode(Opc, DL, MVT::i64, Src);
5026 }
5027
5029 V = DAG.getZExtOrTrunc(V, DL, T);
5030 return DAG.getBitcast(VT, V);
5031}
5032
5035 const LoongArchSubtarget &Subtarget) {
5036 MVT GRLenVT = Subtarget.getGRLenVT();
5037 EVT ValTy = N->getValueType(0);
5038 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5039 ConstantSDNode *CN0, *CN1;
5040 SDLoc DL(N);
5041 unsigned ValBits = ValTy.getSizeInBits();
5042 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5043 unsigned Shamt;
5044 bool SwapAndRetried = false;
5045
5046 // BSTRPICK requires the 32S feature.
5047 if (!Subtarget.has32S())
5048 return SDValue();
5049
5050 if (DCI.isBeforeLegalizeOps())
5051 return SDValue();
5052
5053 if (ValBits != 32 && ValBits != 64)
5054 return SDValue();
5055
5056Retry:
5057 // 1st pattern to match BSTRINS:
5058 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5059 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5060 // =>
5061 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5062 if (N0.getOpcode() == ISD::AND &&
5063 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5064 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5065 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5066 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5067 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5068 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5069 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5070 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5071 (MaskIdx0 + MaskLen0 <= ValBits)) {
5072 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5073 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5074 N1.getOperand(0).getOperand(0),
5075 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5076 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5077 }
5078
5079 // 2nd pattern to match BSTRINS:
5080 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5081 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5082 // =>
5083 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5084 if (N0.getOpcode() == ISD::AND &&
5085 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5086 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5087 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5088 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5089 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5090 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5091 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5092 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5093 (MaskIdx0 + MaskLen0 <= ValBits)) {
5094 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5095 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5096 N1.getOperand(0).getOperand(0),
5097 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5098 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5099 }
5100
5101 // 3rd pattern to match BSTRINS:
5102 // R = or (and X, mask0), (and Y, mask1)
5103 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5104 // =>
5105 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5106 // where msb = lsb + size - 1
5107 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5108 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5109 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5110 (MaskIdx0 + MaskLen0 <= 64) &&
5111 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5112 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5113 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5114 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5115 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5116 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5117 DAG.getConstant(ValBits == 32
5118 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5119 : (MaskIdx0 + MaskLen0 - 1),
5120 DL, GRLenVT),
5121 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5122 }
5123
5124 // 4th pattern to match BSTRINS:
5125 // R = or (and X, mask), (shl Y, shamt)
5126 // where mask = (2**shamt - 1)
5127 // =>
5128 // R = BSTRINS X, Y, ValBits - 1, shamt
5129 // where ValBits = 32 or 64
5130 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5131 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5132 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5133 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5134 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5135 (MaskIdx0 + MaskLen0 <= ValBits)) {
5136 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5137 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5138 N1.getOperand(0),
5139 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5140 DAG.getConstant(Shamt, DL, GRLenVT));
5141 }
5142
5143 // 5th pattern to match BSTRINS:
5144 // R = or (and X, mask), const
5145 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5146 // =>
5147 // R = BSTRINS X, (const >> lsb), msb, lsb
5148 // where msb = lsb + size - 1
5149 if (N0.getOpcode() == ISD::AND &&
5150 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5151 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5152 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5153 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5154 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5155 return DAG.getNode(
5156 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5157 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5158 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5159 : (MaskIdx0 + MaskLen0 - 1),
5160 DL, GRLenVT),
5161 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5162 }
5163
5164 // 6th pattern.
5165 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5166 // by the incoming bits are known to be zero.
5167 // =>
5168 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5169 //
5170 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5171 // pattern is more common than the 1st. So we put the 1st before the 6th in
5172 // order to match as many nodes as possible.
5173 ConstantSDNode *CNMask, *CNShamt;
5174 unsigned MaskIdx, MaskLen;
5175 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5176 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5177 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5178 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5179 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5180 Shamt = CNShamt->getZExtValue();
5181 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5182 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5183 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5184 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5185 N1.getOperand(0).getOperand(0),
5186 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5187 DAG.getConstant(Shamt, DL, GRLenVT));
5188 }
5189 }
5190
5191 // 7th pattern.
5192 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5193 // overwritten by the incoming bits are known to be zero.
5194 // =>
5195 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5196 //
5197 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5198 // before the 7th in order to match as many nodes as possible.
5199 if (N1.getOpcode() == ISD::AND &&
5200 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5201 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5202 N1.getOperand(0).getOpcode() == ISD::SHL &&
5203 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5204 CNShamt->getZExtValue() == MaskIdx) {
5205 APInt ShMask(ValBits, CNMask->getZExtValue());
5206 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5207 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5208 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5209 N1.getOperand(0).getOperand(0),
5210 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5211 DAG.getConstant(MaskIdx, DL, GRLenVT));
5212 }
5213 }
5214
5215 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5216 if (!SwapAndRetried) {
5217 std::swap(N0, N1);
5218 SwapAndRetried = true;
5219 goto Retry;
5220 }
5221
5222 SwapAndRetried = false;
5223Retry2:
5224 // 8th pattern.
5225 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5226 // the incoming bits are known to be zero.
5227 // =>
5228 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5229 //
5230 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5231 // we put it here in order to match as many nodes as possible or generate less
5232 // instructions.
5233 if (N1.getOpcode() == ISD::AND &&
5234 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5235 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5236 APInt ShMask(ValBits, CNMask->getZExtValue());
5237 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5238 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5239 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5240 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5241 N1->getOperand(0),
5242 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5243 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5244 DAG.getConstant(MaskIdx, DL, GRLenVT));
5245 }
5246 }
5247 // Swap N0/N1 and retry.
5248 if (!SwapAndRetried) {
5249 std::swap(N0, N1);
5250 SwapAndRetried = true;
5251 goto Retry2;
5252 }
5253
5254 return SDValue();
5255}
5256
5257static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5258 ExtType = ISD::NON_EXTLOAD;
5259
5260 switch (V.getNode()->getOpcode()) {
5261 case ISD::LOAD: {
5262 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5263 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5264 (LoadNode->getMemoryVT() == MVT::i16)) {
5265 ExtType = LoadNode->getExtensionType();
5266 return true;
5267 }
5268 return false;
5269 }
5270 case ISD::AssertSext: {
5271 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5272 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5273 ExtType = ISD::SEXTLOAD;
5274 return true;
5275 }
5276 return false;
5277 }
5278 case ISD::AssertZext: {
5279 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5280 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5281 ExtType = ISD::ZEXTLOAD;
5282 return true;
5283 }
5284 return false;
5285 }
5286 default:
5287 return false;
5288 }
5289
5290 return false;
5291}
5292
5293// Eliminate redundant truncation and zero-extension nodes.
5294// * Case 1:
5295// +------------+ +------------+ +------------+
5296// | Input1 | | Input2 | | CC |
5297// +------------+ +------------+ +------------+
5298// | | |
5299// V V +----+
5300// +------------+ +------------+ |
5301// | TRUNCATE | | TRUNCATE | |
5302// +------------+ +------------+ |
5303// | | |
5304// V V |
5305// +------------+ +------------+ |
5306// | ZERO_EXT | | ZERO_EXT | |
5307// +------------+ +------------+ |
5308// | | |
5309// | +-------------+ |
5310// V V | |
5311// +----------------+ | |
5312// | AND | | |
5313// +----------------+ | |
5314// | | |
5315// +---------------+ | |
5316// | | |
5317// V V V
5318// +-------------+
5319// | CMP |
5320// +-------------+
5321// * Case 2:
5322// +------------+ +------------+ +-------------+ +------------+ +------------+
5323// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5324// +------------+ +------------+ +-------------+ +------------+ +------------+
5325// | | | | |
5326// V | | | |
5327// +------------+ | | | |
5328// | XOR |<---------------------+ | |
5329// +------------+ | | |
5330// | | | |
5331// V V +---------------+ |
5332// +------------+ +------------+ | |
5333// | TRUNCATE | | TRUNCATE | | +-------------------------+
5334// +------------+ +------------+ | |
5335// | | | |
5336// V V | |
5337// +------------+ +------------+ | |
5338// | ZERO_EXT | | ZERO_EXT | | |
5339// +------------+ +------------+ | |
5340// | | | |
5341// V V | |
5342// +----------------+ | |
5343// | AND | | |
5344// +----------------+ | |
5345// | | |
5346// +---------------+ | |
5347// | | |
5348// V V V
5349// +-------------+
5350// | CMP |
5351// +-------------+
5354 const LoongArchSubtarget &Subtarget) {
5355 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5356
5357 SDNode *AndNode = N->getOperand(0).getNode();
5358 if (AndNode->getOpcode() != ISD::AND)
5359 return SDValue();
5360
5361 SDValue AndInputValue2 = AndNode->getOperand(1);
5362 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5363 return SDValue();
5364
5365 SDValue CmpInputValue = N->getOperand(1);
5366 SDValue AndInputValue1 = AndNode->getOperand(0);
5367 if (AndInputValue1.getOpcode() == ISD::XOR) {
5368 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5369 return SDValue();
5370 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5371 if (!CN || CN->getSExtValue() != -1)
5372 return SDValue();
5373 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5374 if (!CN || CN->getSExtValue() != 0)
5375 return SDValue();
5376 AndInputValue1 = AndInputValue1.getOperand(0);
5377 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5378 return SDValue();
5379 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5380 if (AndInputValue2 != CmpInputValue)
5381 return SDValue();
5382 } else {
5383 return SDValue();
5384 }
5385
5386 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5387 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5388 return SDValue();
5389
5390 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5391 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5392 return SDValue();
5393
5394 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5395 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5396 ISD::LoadExtType ExtType1;
5397 ISD::LoadExtType ExtType2;
5398
5399 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5400 !checkValueWidth(TruncInputValue2, ExtType2))
5401 return SDValue();
5402
5403 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5404 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5405 return SDValue();
5406
5407 if ((ExtType2 != ISD::ZEXTLOAD) &&
5408 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5409 return SDValue();
5410
5411 // These truncation and zero-extension nodes are not necessary, remove them.
5412 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5413 TruncInputValue1, TruncInputValue2);
5414 SDValue NewSetCC =
5415 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5416 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5417 return SDValue(N, 0);
5418}
5419
5420// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5423 const LoongArchSubtarget &Subtarget) {
5424 if (DCI.isBeforeLegalizeOps())
5425 return SDValue();
5426
5427 SDValue Src = N->getOperand(0);
5428 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5429 return SDValue();
5430
5431 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5432 Src.getOperand(0));
5433}
5434
5435// Perform common combines for BR_CC and SELECT_CC conditions.
5436static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5437 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5438 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5439
5440 // As far as arithmetic right shift always saves the sign,
5441 // shift can be omitted.
5442 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5443 // setge (sra X, N), 0 -> setge X, 0
5444 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5445 LHS.getOpcode() == ISD::SRA) {
5446 LHS = LHS.getOperand(0);
5447 return true;
5448 }
5449
5450 if (!ISD::isIntEqualitySetCC(CCVal))
5451 return false;
5452
5453 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5454 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5455 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5456 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5457 // If we're looking for eq 0 instead of ne 0, we need to invert the
5458 // condition.
5459 bool Invert = CCVal == ISD::SETEQ;
5460 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5461 if (Invert)
5462 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5463
5464 RHS = LHS.getOperand(1);
5465 LHS = LHS.getOperand(0);
5466 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5467
5468 CC = DAG.getCondCode(CCVal);
5469 return true;
5470 }
5471
5472 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
5473 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
5474 LHS.getOperand(1).getOpcode() == ISD::Constant) {
5475 SDValue LHS0 = LHS.getOperand(0);
5476 if (LHS0.getOpcode() == ISD::AND &&
5477 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
5478 uint64_t Mask = LHS0.getConstantOperandVal(1);
5479 uint64_t ShAmt = LHS.getConstantOperandVal(1);
5480 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
5481 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
5482 CC = DAG.getCondCode(CCVal);
5483
5484 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
5485 LHS = LHS0.getOperand(0);
5486 if (ShAmt != 0)
5487 LHS =
5488 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
5489 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
5490 return true;
5491 }
5492 }
5493 }
5494
5495 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
5496 // This can occur when legalizing some floating point comparisons.
5497 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
5498 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
5499 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5500 CC = DAG.getCondCode(CCVal);
5501 RHS = DAG.getConstant(0, DL, LHS.getValueType());
5502 return true;
5503 }
5504
5505 return false;
5506}
5507
5510 const LoongArchSubtarget &Subtarget) {
5511 SDValue LHS = N->getOperand(1);
5512 SDValue RHS = N->getOperand(2);
5513 SDValue CC = N->getOperand(3);
5514 SDLoc DL(N);
5515
5516 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5517 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
5518 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
5519
5520 return SDValue();
5521}
5522
5525 const LoongArchSubtarget &Subtarget) {
5526 // Transform
5527 SDValue LHS = N->getOperand(0);
5528 SDValue RHS = N->getOperand(1);
5529 SDValue CC = N->getOperand(2);
5530 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5531 SDValue TrueV = N->getOperand(3);
5532 SDValue FalseV = N->getOperand(4);
5533 SDLoc DL(N);
5534 EVT VT = N->getValueType(0);
5535
5536 // If the True and False values are the same, we don't need a select_cc.
5537 if (TrueV == FalseV)
5538 return TrueV;
5539
5540 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
5541 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
5542 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
5544 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
5545 if (CCVal == ISD::CondCode::SETGE)
5546 std::swap(TrueV, FalseV);
5547
5548 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
5549 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
5550 // Only handle simm12, if it is not in this range, it can be considered as
5551 // register.
5552 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
5553 isInt<12>(TrueSImm - FalseSImm)) {
5554 SDValue SRA =
5555 DAG.getNode(ISD::SRA, DL, VT, LHS,
5556 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
5557 SDValue AND =
5558 DAG.getNode(ISD::AND, DL, VT, SRA,
5559 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
5560 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
5561 }
5562
5563 if (CCVal == ISD::CondCode::SETGE)
5564 std::swap(TrueV, FalseV);
5565 }
5566
5567 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
5568 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
5569 {LHS, RHS, CC, TrueV, FalseV});
5570
5571 return SDValue();
5572}
5573
5574template <unsigned N>
5576 SelectionDAG &DAG,
5577 const LoongArchSubtarget &Subtarget,
5578 bool IsSigned = false) {
5579 SDLoc DL(Node);
5580 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5581 // Check the ImmArg.
5582 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5583 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5584 DAG.getContext()->emitError(Node->getOperationName(0) +
5585 ": argument out of range.");
5586 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
5587 }
5588 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
5589}
5590
5591template <unsigned N>
5592static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
5593 SelectionDAG &DAG, bool IsSigned = false) {
5594 SDLoc DL(Node);
5595 EVT ResTy = Node->getValueType(0);
5596 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
5597
5598 // Check the ImmArg.
5599 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
5600 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
5601 DAG.getContext()->emitError(Node->getOperationName(0) +
5602 ": argument out of range.");
5603 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5604 }
5605 return DAG.getConstant(
5607 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
5608 DL, ResTy);
5609}
5610
5612 SDLoc DL(Node);
5613 EVT ResTy = Node->getValueType(0);
5614 SDValue Vec = Node->getOperand(2);
5615 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
5616 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
5617}
5618
5620 SDLoc DL(Node);
5621 EVT ResTy = Node->getValueType(0);
5622 SDValue One = DAG.getConstant(1, DL, ResTy);
5623 SDValue Bit =
5624 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
5625
5626 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
5627 DAG.getNOT(DL, Bit, ResTy));
5628}
5629
5630template <unsigned N>
5632 SDLoc DL(Node);
5633 EVT ResTy = Node->getValueType(0);
5634 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5635 // Check the unsigned ImmArg.
5636 if (!isUInt<N>(CImm->getZExtValue())) {
5637 DAG.getContext()->emitError(Node->getOperationName(0) +
5638 ": argument out of range.");
5639 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5640 }
5641
5642 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5643 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
5644
5645 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
5646}
5647
5648template <unsigned N>
5650 SDLoc DL(Node);
5651 EVT ResTy = Node->getValueType(0);
5652 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5653 // Check the unsigned ImmArg.
5654 if (!isUInt<N>(CImm->getZExtValue())) {
5655 DAG.getContext()->emitError(Node->getOperationName(0) +
5656 ": argument out of range.");
5657 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5658 }
5659
5660 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5661 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5662 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
5663}
5664
5665template <unsigned N>
5667 SDLoc DL(Node);
5668 EVT ResTy = Node->getValueType(0);
5669 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
5670 // Check the unsigned ImmArg.
5671 if (!isUInt<N>(CImm->getZExtValue())) {
5672 DAG.getContext()->emitError(Node->getOperationName(0) +
5673 ": argument out of range.");
5674 return DAG.getNode(ISD::UNDEF, DL, ResTy);
5675 }
5676
5677 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
5678 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
5679 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
5680}
5681
5682static SDValue
5685 const LoongArchSubtarget &Subtarget) {
5686 SDLoc DL(N);
5687 switch (N->getConstantOperandVal(0)) {
5688 default:
5689 break;
5690 case Intrinsic::loongarch_lsx_vadd_b:
5691 case Intrinsic::loongarch_lsx_vadd_h:
5692 case Intrinsic::loongarch_lsx_vadd_w:
5693 case Intrinsic::loongarch_lsx_vadd_d:
5694 case Intrinsic::loongarch_lasx_xvadd_b:
5695 case Intrinsic::loongarch_lasx_xvadd_h:
5696 case Intrinsic::loongarch_lasx_xvadd_w:
5697 case Intrinsic::loongarch_lasx_xvadd_d:
5698 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5699 N->getOperand(2));
5700 case Intrinsic::loongarch_lsx_vaddi_bu:
5701 case Intrinsic::loongarch_lsx_vaddi_hu:
5702 case Intrinsic::loongarch_lsx_vaddi_wu:
5703 case Intrinsic::loongarch_lsx_vaddi_du:
5704 case Intrinsic::loongarch_lasx_xvaddi_bu:
5705 case Intrinsic::loongarch_lasx_xvaddi_hu:
5706 case Intrinsic::loongarch_lasx_xvaddi_wu:
5707 case Intrinsic::loongarch_lasx_xvaddi_du:
5708 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
5709 lowerVectorSplatImm<5>(N, 2, DAG));
5710 case Intrinsic::loongarch_lsx_vsub_b:
5711 case Intrinsic::loongarch_lsx_vsub_h:
5712 case Intrinsic::loongarch_lsx_vsub_w:
5713 case Intrinsic::loongarch_lsx_vsub_d:
5714 case Intrinsic::loongarch_lasx_xvsub_b:
5715 case Intrinsic::loongarch_lasx_xvsub_h:
5716 case Intrinsic::loongarch_lasx_xvsub_w:
5717 case Intrinsic::loongarch_lasx_xvsub_d:
5718 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5719 N->getOperand(2));
5720 case Intrinsic::loongarch_lsx_vsubi_bu:
5721 case Intrinsic::loongarch_lsx_vsubi_hu:
5722 case Intrinsic::loongarch_lsx_vsubi_wu:
5723 case Intrinsic::loongarch_lsx_vsubi_du:
5724 case Intrinsic::loongarch_lasx_xvsubi_bu:
5725 case Intrinsic::loongarch_lasx_xvsubi_hu:
5726 case Intrinsic::loongarch_lasx_xvsubi_wu:
5727 case Intrinsic::loongarch_lasx_xvsubi_du:
5728 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
5729 lowerVectorSplatImm<5>(N, 2, DAG));
5730 case Intrinsic::loongarch_lsx_vneg_b:
5731 case Intrinsic::loongarch_lsx_vneg_h:
5732 case Intrinsic::loongarch_lsx_vneg_w:
5733 case Intrinsic::loongarch_lsx_vneg_d:
5734 case Intrinsic::loongarch_lasx_xvneg_b:
5735 case Intrinsic::loongarch_lasx_xvneg_h:
5736 case Intrinsic::loongarch_lasx_xvneg_w:
5737 case Intrinsic::loongarch_lasx_xvneg_d:
5738 return DAG.getNode(
5739 ISD::SUB, DL, N->getValueType(0),
5740 DAG.getConstant(
5741 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
5742 /*isSigned=*/true),
5743 SDLoc(N), N->getValueType(0)),
5744 N->getOperand(1));
5745 case Intrinsic::loongarch_lsx_vmax_b:
5746 case Intrinsic::loongarch_lsx_vmax_h:
5747 case Intrinsic::loongarch_lsx_vmax_w:
5748 case Intrinsic::loongarch_lsx_vmax_d:
5749 case Intrinsic::loongarch_lasx_xvmax_b:
5750 case Intrinsic::loongarch_lasx_xvmax_h:
5751 case Intrinsic::loongarch_lasx_xvmax_w:
5752 case Intrinsic::loongarch_lasx_xvmax_d:
5753 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5754 N->getOperand(2));
5755 case Intrinsic::loongarch_lsx_vmax_bu:
5756 case Intrinsic::loongarch_lsx_vmax_hu:
5757 case Intrinsic::loongarch_lsx_vmax_wu:
5758 case Intrinsic::loongarch_lsx_vmax_du:
5759 case Intrinsic::loongarch_lasx_xvmax_bu:
5760 case Intrinsic::loongarch_lasx_xvmax_hu:
5761 case Intrinsic::loongarch_lasx_xvmax_wu:
5762 case Intrinsic::loongarch_lasx_xvmax_du:
5763 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5764 N->getOperand(2));
5765 case Intrinsic::loongarch_lsx_vmaxi_b:
5766 case Intrinsic::loongarch_lsx_vmaxi_h:
5767 case Intrinsic::loongarch_lsx_vmaxi_w:
5768 case Intrinsic::loongarch_lsx_vmaxi_d:
5769 case Intrinsic::loongarch_lasx_xvmaxi_b:
5770 case Intrinsic::loongarch_lasx_xvmaxi_h:
5771 case Intrinsic::loongarch_lasx_xvmaxi_w:
5772 case Intrinsic::loongarch_lasx_xvmaxi_d:
5773 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
5774 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5775 case Intrinsic::loongarch_lsx_vmaxi_bu:
5776 case Intrinsic::loongarch_lsx_vmaxi_hu:
5777 case Intrinsic::loongarch_lsx_vmaxi_wu:
5778 case Intrinsic::loongarch_lsx_vmaxi_du:
5779 case Intrinsic::loongarch_lasx_xvmaxi_bu:
5780 case Intrinsic::loongarch_lasx_xvmaxi_hu:
5781 case Intrinsic::loongarch_lasx_xvmaxi_wu:
5782 case Intrinsic::loongarch_lasx_xvmaxi_du:
5783 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
5784 lowerVectorSplatImm<5>(N, 2, DAG));
5785 case Intrinsic::loongarch_lsx_vmin_b:
5786 case Intrinsic::loongarch_lsx_vmin_h:
5787 case Intrinsic::loongarch_lsx_vmin_w:
5788 case Intrinsic::loongarch_lsx_vmin_d:
5789 case Intrinsic::loongarch_lasx_xvmin_b:
5790 case Intrinsic::loongarch_lasx_xvmin_h:
5791 case Intrinsic::loongarch_lasx_xvmin_w:
5792 case Intrinsic::loongarch_lasx_xvmin_d:
5793 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5794 N->getOperand(2));
5795 case Intrinsic::loongarch_lsx_vmin_bu:
5796 case Intrinsic::loongarch_lsx_vmin_hu:
5797 case Intrinsic::loongarch_lsx_vmin_wu:
5798 case Intrinsic::loongarch_lsx_vmin_du:
5799 case Intrinsic::loongarch_lasx_xvmin_bu:
5800 case Intrinsic::loongarch_lasx_xvmin_hu:
5801 case Intrinsic::loongarch_lasx_xvmin_wu:
5802 case Intrinsic::loongarch_lasx_xvmin_du:
5803 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5804 N->getOperand(2));
5805 case Intrinsic::loongarch_lsx_vmini_b:
5806 case Intrinsic::loongarch_lsx_vmini_h:
5807 case Intrinsic::loongarch_lsx_vmini_w:
5808 case Intrinsic::loongarch_lsx_vmini_d:
5809 case Intrinsic::loongarch_lasx_xvmini_b:
5810 case Intrinsic::loongarch_lasx_xvmini_h:
5811 case Intrinsic::loongarch_lasx_xvmini_w:
5812 case Intrinsic::loongarch_lasx_xvmini_d:
5813 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
5814 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
5815 case Intrinsic::loongarch_lsx_vmini_bu:
5816 case Intrinsic::loongarch_lsx_vmini_hu:
5817 case Intrinsic::loongarch_lsx_vmini_wu:
5818 case Intrinsic::loongarch_lsx_vmini_du:
5819 case Intrinsic::loongarch_lasx_xvmini_bu:
5820 case Intrinsic::loongarch_lasx_xvmini_hu:
5821 case Intrinsic::loongarch_lasx_xvmini_wu:
5822 case Intrinsic::loongarch_lasx_xvmini_du:
5823 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
5824 lowerVectorSplatImm<5>(N, 2, DAG));
5825 case Intrinsic::loongarch_lsx_vmul_b:
5826 case Intrinsic::loongarch_lsx_vmul_h:
5827 case Intrinsic::loongarch_lsx_vmul_w:
5828 case Intrinsic::loongarch_lsx_vmul_d:
5829 case Intrinsic::loongarch_lasx_xvmul_b:
5830 case Intrinsic::loongarch_lasx_xvmul_h:
5831 case Intrinsic::loongarch_lasx_xvmul_w:
5832 case Intrinsic::loongarch_lasx_xvmul_d:
5833 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
5834 N->getOperand(2));
5835 case Intrinsic::loongarch_lsx_vmadd_b:
5836 case Intrinsic::loongarch_lsx_vmadd_h:
5837 case Intrinsic::loongarch_lsx_vmadd_w:
5838 case Intrinsic::loongarch_lsx_vmadd_d:
5839 case Intrinsic::loongarch_lasx_xvmadd_b:
5840 case Intrinsic::loongarch_lasx_xvmadd_h:
5841 case Intrinsic::loongarch_lasx_xvmadd_w:
5842 case Intrinsic::loongarch_lasx_xvmadd_d: {
5843 EVT ResTy = N->getValueType(0);
5844 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
5845 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5846 N->getOperand(3)));
5847 }
5848 case Intrinsic::loongarch_lsx_vmsub_b:
5849 case Intrinsic::loongarch_lsx_vmsub_h:
5850 case Intrinsic::loongarch_lsx_vmsub_w:
5851 case Intrinsic::loongarch_lsx_vmsub_d:
5852 case Intrinsic::loongarch_lasx_xvmsub_b:
5853 case Intrinsic::loongarch_lasx_xvmsub_h:
5854 case Intrinsic::loongarch_lasx_xvmsub_w:
5855 case Intrinsic::loongarch_lasx_xvmsub_d: {
5856 EVT ResTy = N->getValueType(0);
5857 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
5858 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
5859 N->getOperand(3)));
5860 }
5861 case Intrinsic::loongarch_lsx_vdiv_b:
5862 case Intrinsic::loongarch_lsx_vdiv_h:
5863 case Intrinsic::loongarch_lsx_vdiv_w:
5864 case Intrinsic::loongarch_lsx_vdiv_d:
5865 case Intrinsic::loongarch_lasx_xvdiv_b:
5866 case Intrinsic::loongarch_lasx_xvdiv_h:
5867 case Intrinsic::loongarch_lasx_xvdiv_w:
5868 case Intrinsic::loongarch_lasx_xvdiv_d:
5869 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
5870 N->getOperand(2));
5871 case Intrinsic::loongarch_lsx_vdiv_bu:
5872 case Intrinsic::loongarch_lsx_vdiv_hu:
5873 case Intrinsic::loongarch_lsx_vdiv_wu:
5874 case Intrinsic::loongarch_lsx_vdiv_du:
5875 case Intrinsic::loongarch_lasx_xvdiv_bu:
5876 case Intrinsic::loongarch_lasx_xvdiv_hu:
5877 case Intrinsic::loongarch_lasx_xvdiv_wu:
5878 case Intrinsic::loongarch_lasx_xvdiv_du:
5879 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
5880 N->getOperand(2));
5881 case Intrinsic::loongarch_lsx_vmod_b:
5882 case Intrinsic::loongarch_lsx_vmod_h:
5883 case Intrinsic::loongarch_lsx_vmod_w:
5884 case Intrinsic::loongarch_lsx_vmod_d:
5885 case Intrinsic::loongarch_lasx_xvmod_b:
5886 case Intrinsic::loongarch_lasx_xvmod_h:
5887 case Intrinsic::loongarch_lasx_xvmod_w:
5888 case Intrinsic::loongarch_lasx_xvmod_d:
5889 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
5890 N->getOperand(2));
5891 case Intrinsic::loongarch_lsx_vmod_bu:
5892 case Intrinsic::loongarch_lsx_vmod_hu:
5893 case Intrinsic::loongarch_lsx_vmod_wu:
5894 case Intrinsic::loongarch_lsx_vmod_du:
5895 case Intrinsic::loongarch_lasx_xvmod_bu:
5896 case Intrinsic::loongarch_lasx_xvmod_hu:
5897 case Intrinsic::loongarch_lasx_xvmod_wu:
5898 case Intrinsic::loongarch_lasx_xvmod_du:
5899 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
5900 N->getOperand(2));
5901 case Intrinsic::loongarch_lsx_vand_v:
5902 case Intrinsic::loongarch_lasx_xvand_v:
5903 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5904 N->getOperand(2));
5905 case Intrinsic::loongarch_lsx_vor_v:
5906 case Intrinsic::loongarch_lasx_xvor_v:
5907 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5908 N->getOperand(2));
5909 case Intrinsic::loongarch_lsx_vxor_v:
5910 case Intrinsic::loongarch_lasx_xvxor_v:
5911 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5912 N->getOperand(2));
5913 case Intrinsic::loongarch_lsx_vnor_v:
5914 case Intrinsic::loongarch_lasx_xvnor_v: {
5915 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5916 N->getOperand(2));
5917 return DAG.getNOT(DL, Res, Res->getValueType(0));
5918 }
5919 case Intrinsic::loongarch_lsx_vandi_b:
5920 case Intrinsic::loongarch_lasx_xvandi_b:
5921 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
5922 lowerVectorSplatImm<8>(N, 2, DAG));
5923 case Intrinsic::loongarch_lsx_vori_b:
5924 case Intrinsic::loongarch_lasx_xvori_b:
5925 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
5926 lowerVectorSplatImm<8>(N, 2, DAG));
5927 case Intrinsic::loongarch_lsx_vxori_b:
5928 case Intrinsic::loongarch_lasx_xvxori_b:
5929 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
5930 lowerVectorSplatImm<8>(N, 2, DAG));
5931 case Intrinsic::loongarch_lsx_vsll_b:
5932 case Intrinsic::loongarch_lsx_vsll_h:
5933 case Intrinsic::loongarch_lsx_vsll_w:
5934 case Intrinsic::loongarch_lsx_vsll_d:
5935 case Intrinsic::loongarch_lasx_xvsll_b:
5936 case Intrinsic::loongarch_lasx_xvsll_h:
5937 case Intrinsic::loongarch_lasx_xvsll_w:
5938 case Intrinsic::loongarch_lasx_xvsll_d:
5939 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5940 truncateVecElts(N, DAG));
5941 case Intrinsic::loongarch_lsx_vslli_b:
5942 case Intrinsic::loongarch_lasx_xvslli_b:
5943 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5944 lowerVectorSplatImm<3>(N, 2, DAG));
5945 case Intrinsic::loongarch_lsx_vslli_h:
5946 case Intrinsic::loongarch_lasx_xvslli_h:
5947 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5948 lowerVectorSplatImm<4>(N, 2, DAG));
5949 case Intrinsic::loongarch_lsx_vslli_w:
5950 case Intrinsic::loongarch_lasx_xvslli_w:
5951 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5952 lowerVectorSplatImm<5>(N, 2, DAG));
5953 case Intrinsic::loongarch_lsx_vslli_d:
5954 case Intrinsic::loongarch_lasx_xvslli_d:
5955 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
5956 lowerVectorSplatImm<6>(N, 2, DAG));
5957 case Intrinsic::loongarch_lsx_vsrl_b:
5958 case Intrinsic::loongarch_lsx_vsrl_h:
5959 case Intrinsic::loongarch_lsx_vsrl_w:
5960 case Intrinsic::loongarch_lsx_vsrl_d:
5961 case Intrinsic::loongarch_lasx_xvsrl_b:
5962 case Intrinsic::loongarch_lasx_xvsrl_h:
5963 case Intrinsic::loongarch_lasx_xvsrl_w:
5964 case Intrinsic::loongarch_lasx_xvsrl_d:
5965 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5966 truncateVecElts(N, DAG));
5967 case Intrinsic::loongarch_lsx_vsrli_b:
5968 case Intrinsic::loongarch_lasx_xvsrli_b:
5969 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5970 lowerVectorSplatImm<3>(N, 2, DAG));
5971 case Intrinsic::loongarch_lsx_vsrli_h:
5972 case Intrinsic::loongarch_lasx_xvsrli_h:
5973 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5974 lowerVectorSplatImm<4>(N, 2, DAG));
5975 case Intrinsic::loongarch_lsx_vsrli_w:
5976 case Intrinsic::loongarch_lasx_xvsrli_w:
5977 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5978 lowerVectorSplatImm<5>(N, 2, DAG));
5979 case Intrinsic::loongarch_lsx_vsrli_d:
5980 case Intrinsic::loongarch_lasx_xvsrli_d:
5981 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
5982 lowerVectorSplatImm<6>(N, 2, DAG));
5983 case Intrinsic::loongarch_lsx_vsra_b:
5984 case Intrinsic::loongarch_lsx_vsra_h:
5985 case Intrinsic::loongarch_lsx_vsra_w:
5986 case Intrinsic::loongarch_lsx_vsra_d:
5987 case Intrinsic::loongarch_lasx_xvsra_b:
5988 case Intrinsic::loongarch_lasx_xvsra_h:
5989 case Intrinsic::loongarch_lasx_xvsra_w:
5990 case Intrinsic::loongarch_lasx_xvsra_d:
5991 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5992 truncateVecElts(N, DAG));
5993 case Intrinsic::loongarch_lsx_vsrai_b:
5994 case Intrinsic::loongarch_lasx_xvsrai_b:
5995 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
5996 lowerVectorSplatImm<3>(N, 2, DAG));
5997 case Intrinsic::loongarch_lsx_vsrai_h:
5998 case Intrinsic::loongarch_lasx_xvsrai_h:
5999 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6000 lowerVectorSplatImm<4>(N, 2, DAG));
6001 case Intrinsic::loongarch_lsx_vsrai_w:
6002 case Intrinsic::loongarch_lasx_xvsrai_w:
6003 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6004 lowerVectorSplatImm<5>(N, 2, DAG));
6005 case Intrinsic::loongarch_lsx_vsrai_d:
6006 case Intrinsic::loongarch_lasx_xvsrai_d:
6007 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6008 lowerVectorSplatImm<6>(N, 2, DAG));
6009 case Intrinsic::loongarch_lsx_vclz_b:
6010 case Intrinsic::loongarch_lsx_vclz_h:
6011 case Intrinsic::loongarch_lsx_vclz_w:
6012 case Intrinsic::loongarch_lsx_vclz_d:
6013 case Intrinsic::loongarch_lasx_xvclz_b:
6014 case Intrinsic::loongarch_lasx_xvclz_h:
6015 case Intrinsic::loongarch_lasx_xvclz_w:
6016 case Intrinsic::loongarch_lasx_xvclz_d:
6017 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6018 case Intrinsic::loongarch_lsx_vpcnt_b:
6019 case Intrinsic::loongarch_lsx_vpcnt_h:
6020 case Intrinsic::loongarch_lsx_vpcnt_w:
6021 case Intrinsic::loongarch_lsx_vpcnt_d:
6022 case Intrinsic::loongarch_lasx_xvpcnt_b:
6023 case Intrinsic::loongarch_lasx_xvpcnt_h:
6024 case Intrinsic::loongarch_lasx_xvpcnt_w:
6025 case Intrinsic::loongarch_lasx_xvpcnt_d:
6026 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6027 case Intrinsic::loongarch_lsx_vbitclr_b:
6028 case Intrinsic::loongarch_lsx_vbitclr_h:
6029 case Intrinsic::loongarch_lsx_vbitclr_w:
6030 case Intrinsic::loongarch_lsx_vbitclr_d:
6031 case Intrinsic::loongarch_lasx_xvbitclr_b:
6032 case Intrinsic::loongarch_lasx_xvbitclr_h:
6033 case Intrinsic::loongarch_lasx_xvbitclr_w:
6034 case Intrinsic::loongarch_lasx_xvbitclr_d:
6035 return lowerVectorBitClear(N, DAG);
6036 case Intrinsic::loongarch_lsx_vbitclri_b:
6037 case Intrinsic::loongarch_lasx_xvbitclri_b:
6038 return lowerVectorBitClearImm<3>(N, DAG);
6039 case Intrinsic::loongarch_lsx_vbitclri_h:
6040 case Intrinsic::loongarch_lasx_xvbitclri_h:
6041 return lowerVectorBitClearImm<4>(N, DAG);
6042 case Intrinsic::loongarch_lsx_vbitclri_w:
6043 case Intrinsic::loongarch_lasx_xvbitclri_w:
6044 return lowerVectorBitClearImm<5>(N, DAG);
6045 case Intrinsic::loongarch_lsx_vbitclri_d:
6046 case Intrinsic::loongarch_lasx_xvbitclri_d:
6047 return lowerVectorBitClearImm<6>(N, DAG);
6048 case Intrinsic::loongarch_lsx_vbitset_b:
6049 case Intrinsic::loongarch_lsx_vbitset_h:
6050 case Intrinsic::loongarch_lsx_vbitset_w:
6051 case Intrinsic::loongarch_lsx_vbitset_d:
6052 case Intrinsic::loongarch_lasx_xvbitset_b:
6053 case Intrinsic::loongarch_lasx_xvbitset_h:
6054 case Intrinsic::loongarch_lasx_xvbitset_w:
6055 case Intrinsic::loongarch_lasx_xvbitset_d: {
6056 EVT VecTy = N->getValueType(0);
6057 SDValue One = DAG.getConstant(1, DL, VecTy);
6058 return DAG.getNode(
6059 ISD::OR, DL, VecTy, N->getOperand(1),
6060 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6061 }
6062 case Intrinsic::loongarch_lsx_vbitseti_b:
6063 case Intrinsic::loongarch_lasx_xvbitseti_b:
6064 return lowerVectorBitSetImm<3>(N, DAG);
6065 case Intrinsic::loongarch_lsx_vbitseti_h:
6066 case Intrinsic::loongarch_lasx_xvbitseti_h:
6067 return lowerVectorBitSetImm<4>(N, DAG);
6068 case Intrinsic::loongarch_lsx_vbitseti_w:
6069 case Intrinsic::loongarch_lasx_xvbitseti_w:
6070 return lowerVectorBitSetImm<5>(N, DAG);
6071 case Intrinsic::loongarch_lsx_vbitseti_d:
6072 case Intrinsic::loongarch_lasx_xvbitseti_d:
6073 return lowerVectorBitSetImm<6>(N, DAG);
6074 case Intrinsic::loongarch_lsx_vbitrev_b:
6075 case Intrinsic::loongarch_lsx_vbitrev_h:
6076 case Intrinsic::loongarch_lsx_vbitrev_w:
6077 case Intrinsic::loongarch_lsx_vbitrev_d:
6078 case Intrinsic::loongarch_lasx_xvbitrev_b:
6079 case Intrinsic::loongarch_lasx_xvbitrev_h:
6080 case Intrinsic::loongarch_lasx_xvbitrev_w:
6081 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6082 EVT VecTy = N->getValueType(0);
6083 SDValue One = DAG.getConstant(1, DL, VecTy);
6084 return DAG.getNode(
6085 ISD::XOR, DL, VecTy, N->getOperand(1),
6086 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6087 }
6088 case Intrinsic::loongarch_lsx_vbitrevi_b:
6089 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6090 return lowerVectorBitRevImm<3>(N, DAG);
6091 case Intrinsic::loongarch_lsx_vbitrevi_h:
6092 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6093 return lowerVectorBitRevImm<4>(N, DAG);
6094 case Intrinsic::loongarch_lsx_vbitrevi_w:
6095 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6096 return lowerVectorBitRevImm<5>(N, DAG);
6097 case Intrinsic::loongarch_lsx_vbitrevi_d:
6098 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6099 return lowerVectorBitRevImm<6>(N, DAG);
6100 case Intrinsic::loongarch_lsx_vfadd_s:
6101 case Intrinsic::loongarch_lsx_vfadd_d:
6102 case Intrinsic::loongarch_lasx_xvfadd_s:
6103 case Intrinsic::loongarch_lasx_xvfadd_d:
6104 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6105 N->getOperand(2));
6106 case Intrinsic::loongarch_lsx_vfsub_s:
6107 case Intrinsic::loongarch_lsx_vfsub_d:
6108 case Intrinsic::loongarch_lasx_xvfsub_s:
6109 case Intrinsic::loongarch_lasx_xvfsub_d:
6110 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6111 N->getOperand(2));
6112 case Intrinsic::loongarch_lsx_vfmul_s:
6113 case Intrinsic::loongarch_lsx_vfmul_d:
6114 case Intrinsic::loongarch_lasx_xvfmul_s:
6115 case Intrinsic::loongarch_lasx_xvfmul_d:
6116 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6117 N->getOperand(2));
6118 case Intrinsic::loongarch_lsx_vfdiv_s:
6119 case Intrinsic::loongarch_lsx_vfdiv_d:
6120 case Intrinsic::loongarch_lasx_xvfdiv_s:
6121 case Intrinsic::loongarch_lasx_xvfdiv_d:
6122 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6123 N->getOperand(2));
6124 case Intrinsic::loongarch_lsx_vfmadd_s:
6125 case Intrinsic::loongarch_lsx_vfmadd_d:
6126 case Intrinsic::loongarch_lasx_xvfmadd_s:
6127 case Intrinsic::loongarch_lasx_xvfmadd_d:
6128 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6129 N->getOperand(2), N->getOperand(3));
6130 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6131 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6132 N->getOperand(1), N->getOperand(2),
6133 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6134 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6135 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6136 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6137 N->getOperand(1), N->getOperand(2),
6138 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6139 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6140 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6141 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6142 N->getOperand(1), N->getOperand(2),
6143 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6144 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6145 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6146 N->getOperand(1), N->getOperand(2),
6147 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6148 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6149 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6150 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6151 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6152 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6153 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6154 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6155 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6156 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6157 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6158 N->getOperand(1)));
6159 case Intrinsic::loongarch_lsx_vreplve_b:
6160 case Intrinsic::loongarch_lsx_vreplve_h:
6161 case Intrinsic::loongarch_lsx_vreplve_w:
6162 case Intrinsic::loongarch_lsx_vreplve_d:
6163 case Intrinsic::loongarch_lasx_xvreplve_b:
6164 case Intrinsic::loongarch_lasx_xvreplve_h:
6165 case Intrinsic::loongarch_lasx_xvreplve_w:
6166 case Intrinsic::loongarch_lasx_xvreplve_d:
6167 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6168 N->getOperand(1),
6169 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6170 N->getOperand(2)));
6171 }
6172 return SDValue();
6173}
6174
6177 const LoongArchSubtarget &Subtarget) {
6178 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6179 // conversion is unnecessary and can be replaced with the
6180 // MOVFR2GR_S_LA64 operand.
6181 SDValue Op0 = N->getOperand(0);
6183 return Op0.getOperand(0);
6184 return SDValue();
6185}
6186
6189 const LoongArchSubtarget &Subtarget) {
6190 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6191 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6192 // operand.
6193 SDValue Op0 = N->getOperand(0);
6195 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6196 "Unexpected value type!");
6197 return Op0.getOperand(0);
6198 }
6199 return SDValue();
6200}
6201
6204 const LoongArchSubtarget &Subtarget) {
6205 MVT VT = N->getSimpleValueType(0);
6206 unsigned NumBits = VT.getScalarSizeInBits();
6207
6208 // Simplify the inputs.
6209 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6210 APInt DemandedMask(APInt::getAllOnes(NumBits));
6211 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6212 return SDValue(N, 0);
6213
6214 return SDValue();
6215}
6216
6217static SDValue
6220 const LoongArchSubtarget &Subtarget) {
6221 SDValue Op0 = N->getOperand(0);
6222 SDLoc DL(N);
6223
6224 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6225 // redundant. Instead, use BuildPairF64's operands directly.
6227 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6228
6229 if (Op0->isUndef()) {
6230 SDValue Lo = DAG.getUNDEF(MVT::i32);
6231 SDValue Hi = DAG.getUNDEF(MVT::i32);
6232 return DCI.CombineTo(N, Lo, Hi);
6233 }
6234
6235 // It's cheaper to materialise two 32-bit integers than to load a double
6236 // from the constant pool and transfer it to integer registers through the
6237 // stack.
6239 APInt V = C->getValueAPF().bitcastToAPInt();
6240 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6241 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6242 return DCI.CombineTo(N, Lo, Hi);
6243 }
6244
6245 return SDValue();
6246}
6247
6248static SDValue
6251 const LoongArchSubtarget &Subtarget) {
6252 if (!DCI.isBeforeLegalize())
6253 return SDValue();
6254
6255 MVT EltVT = N->getSimpleValueType(0);
6256 SDValue Vec = N->getOperand(0);
6257 EVT VecTy = Vec->getValueType(0);
6258 SDValue Idx = N->getOperand(1);
6259 unsigned IdxOp = Idx.getOpcode();
6260 SDLoc DL(N);
6261
6262 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6263 return SDValue();
6264
6265 // Combine:
6266 // t2 = truncate t1
6267 // t3 = {zero/sign/any}_extend t2
6268 // t4 = extract_vector_elt t0, t3
6269 // to:
6270 // t4 = extract_vector_elt t0, t1
6271 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6272 IdxOp == ISD::ANY_EXTEND) {
6273 SDValue IdxOrig = Idx.getOperand(0);
6274 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6275 return SDValue();
6276
6277 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6278 IdxOrig.getOperand(0));
6279 }
6280
6281 return SDValue();
6282}
6283
6285 DAGCombinerInfo &DCI) const {
6286 SelectionDAG &DAG = DCI.DAG;
6287 switch (N->getOpcode()) {
6288 default:
6289 break;
6290 case ISD::AND:
6291 return performANDCombine(N, DAG, DCI, Subtarget);
6292 case ISD::OR:
6293 return performORCombine(N, DAG, DCI, Subtarget);
6294 case ISD::SETCC:
6295 return performSETCCCombine(N, DAG, DCI, Subtarget);
6296 case ISD::SRL:
6297 return performSRLCombine(N, DAG, DCI, Subtarget);
6298 case ISD::BITCAST:
6299 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6301 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6303 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6305 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6307 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6309 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6311 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6314 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6316 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6318 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6319 }
6320 return SDValue();
6321}
6322
6325 if (!ZeroDivCheck)
6326 return MBB;
6327
6328 // Build instructions:
6329 // MBB:
6330 // div(or mod) $dst, $dividend, $divisor
6331 // bne $divisor, $zero, SinkMBB
6332 // BreakMBB:
6333 // break 7 // BRK_DIVZERO
6334 // SinkMBB:
6335 // fallthrough
6336 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
6337 MachineFunction::iterator It = ++MBB->getIterator();
6338 MachineFunction *MF = MBB->getParent();
6339 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6340 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
6341 MF->insert(It, BreakMBB);
6342 MF->insert(It, SinkMBB);
6343
6344 // Transfer the remainder of MBB and its successor edges to SinkMBB.
6345 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
6346 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6347
6348 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
6349 DebugLoc DL = MI.getDebugLoc();
6350 MachineOperand &Divisor = MI.getOperand(2);
6351 Register DivisorReg = Divisor.getReg();
6352
6353 // MBB:
6354 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
6355 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
6356 .addReg(LoongArch::R0)
6357 .addMBB(SinkMBB);
6358 MBB->addSuccessor(BreakMBB);
6359 MBB->addSuccessor(SinkMBB);
6360
6361 // BreakMBB:
6362 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
6363 // definition of BRK_DIVZERO.
6364 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
6365 BreakMBB->addSuccessor(SinkMBB);
6366
6367 // Clear Divisor's kill flag.
6368 Divisor.setIsKill(false);
6369
6370 return SinkMBB;
6371}
6372
6373static MachineBasicBlock *
6375 const LoongArchSubtarget &Subtarget) {
6376 unsigned CondOpc;
6377 switch (MI.getOpcode()) {
6378 default:
6379 llvm_unreachable("Unexpected opcode");
6380 case LoongArch::PseudoVBZ:
6381 CondOpc = LoongArch::VSETEQZ_V;
6382 break;
6383 case LoongArch::PseudoVBZ_B:
6384 CondOpc = LoongArch::VSETANYEQZ_B;
6385 break;
6386 case LoongArch::PseudoVBZ_H:
6387 CondOpc = LoongArch::VSETANYEQZ_H;
6388 break;
6389 case LoongArch::PseudoVBZ_W:
6390 CondOpc = LoongArch::VSETANYEQZ_W;
6391 break;
6392 case LoongArch::PseudoVBZ_D:
6393 CondOpc = LoongArch::VSETANYEQZ_D;
6394 break;
6395 case LoongArch::PseudoVBNZ:
6396 CondOpc = LoongArch::VSETNEZ_V;
6397 break;
6398 case LoongArch::PseudoVBNZ_B:
6399 CondOpc = LoongArch::VSETALLNEZ_B;
6400 break;
6401 case LoongArch::PseudoVBNZ_H:
6402 CondOpc = LoongArch::VSETALLNEZ_H;
6403 break;
6404 case LoongArch::PseudoVBNZ_W:
6405 CondOpc = LoongArch::VSETALLNEZ_W;
6406 break;
6407 case LoongArch::PseudoVBNZ_D:
6408 CondOpc = LoongArch::VSETALLNEZ_D;
6409 break;
6410 case LoongArch::PseudoXVBZ:
6411 CondOpc = LoongArch::XVSETEQZ_V;
6412 break;
6413 case LoongArch::PseudoXVBZ_B:
6414 CondOpc = LoongArch::XVSETANYEQZ_B;
6415 break;
6416 case LoongArch::PseudoXVBZ_H:
6417 CondOpc = LoongArch::XVSETANYEQZ_H;
6418 break;
6419 case LoongArch::PseudoXVBZ_W:
6420 CondOpc = LoongArch::XVSETANYEQZ_W;
6421 break;
6422 case LoongArch::PseudoXVBZ_D:
6423 CondOpc = LoongArch::XVSETANYEQZ_D;
6424 break;
6425 case LoongArch::PseudoXVBNZ:
6426 CondOpc = LoongArch::XVSETNEZ_V;
6427 break;
6428 case LoongArch::PseudoXVBNZ_B:
6429 CondOpc = LoongArch::XVSETALLNEZ_B;
6430 break;
6431 case LoongArch::PseudoXVBNZ_H:
6432 CondOpc = LoongArch::XVSETALLNEZ_H;
6433 break;
6434 case LoongArch::PseudoXVBNZ_W:
6435 CondOpc = LoongArch::XVSETALLNEZ_W;
6436 break;
6437 case LoongArch::PseudoXVBNZ_D:
6438 CondOpc = LoongArch::XVSETALLNEZ_D;
6439 break;
6440 }
6441
6442 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6443 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6444 DebugLoc DL = MI.getDebugLoc();
6447
6448 MachineFunction *F = BB->getParent();
6449 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
6450 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
6451 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
6452
6453 F->insert(It, FalseBB);
6454 F->insert(It, TrueBB);
6455 F->insert(It, SinkBB);
6456
6457 // Transfer the remainder of MBB and its successor edges to Sink.
6458 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
6460
6461 // Insert the real instruction to BB.
6462 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
6463 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
6464
6465 // Insert branch.
6466 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
6467 BB->addSuccessor(FalseBB);
6468 BB->addSuccessor(TrueBB);
6469
6470 // FalseBB.
6471 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6472 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
6473 .addReg(LoongArch::R0)
6474 .addImm(0);
6475 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
6476 FalseBB->addSuccessor(SinkBB);
6477
6478 // TrueBB.
6479 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6480 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
6481 .addReg(LoongArch::R0)
6482 .addImm(1);
6483 TrueBB->addSuccessor(SinkBB);
6484
6485 // SinkBB: merge the results.
6486 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
6487 MI.getOperand(0).getReg())
6488 .addReg(RD1)
6489 .addMBB(FalseBB)
6490 .addReg(RD2)
6491 .addMBB(TrueBB);
6492
6493 // The pseudo instruction is gone now.
6494 MI.eraseFromParent();
6495 return SinkBB;
6496}
6497
6498static MachineBasicBlock *
6500 const LoongArchSubtarget &Subtarget) {
6501 unsigned InsOp;
6502 unsigned BroadcastOp;
6503 unsigned HalfSize;
6504 switch (MI.getOpcode()) {
6505 default:
6506 llvm_unreachable("Unexpected opcode");
6507 case LoongArch::PseudoXVINSGR2VR_B:
6508 HalfSize = 16;
6509 BroadcastOp = LoongArch::XVREPLGR2VR_B;
6510 InsOp = LoongArch::XVEXTRINS_B;
6511 break;
6512 case LoongArch::PseudoXVINSGR2VR_H:
6513 HalfSize = 8;
6514 BroadcastOp = LoongArch::XVREPLGR2VR_H;
6515 InsOp = LoongArch::XVEXTRINS_H;
6516 break;
6517 }
6518 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6519 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
6520 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
6521 DebugLoc DL = MI.getDebugLoc();
6523 // XDst = vector_insert XSrc, Elt, Idx
6524 Register XDst = MI.getOperand(0).getReg();
6525 Register XSrc = MI.getOperand(1).getReg();
6526 Register Elt = MI.getOperand(2).getReg();
6527 unsigned Idx = MI.getOperand(3).getImm();
6528
6529 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
6530 Idx < HalfSize) {
6531 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
6532 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
6533
6534 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
6535 .addReg(XSrc, 0, LoongArch::sub_128);
6536 BuildMI(*BB, MI, DL,
6537 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
6538 : LoongArch::VINSGR2VR_B),
6539 ScratchSubReg2)
6540 .addReg(ScratchSubReg1)
6541 .addReg(Elt)
6542 .addImm(Idx);
6543
6544 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
6545 .addImm(0)
6546 .addReg(ScratchSubReg2)
6547 .addImm(LoongArch::sub_128);
6548 } else {
6549 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6550 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6551
6552 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
6553
6554 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
6555 .addReg(ScratchReg1)
6556 .addReg(XSrc)
6557 .addImm(Idx >= HalfSize ? 48 : 18);
6558
6559 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
6560 .addReg(XSrc)
6561 .addReg(ScratchReg2)
6562 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
6563 }
6564
6565 MI.eraseFromParent();
6566 return BB;
6567}
6568
6571 const LoongArchSubtarget &Subtarget) {
6572 assert(Subtarget.hasExtLSX());
6573 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6574 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6575 DebugLoc DL = MI.getDebugLoc();
6577 Register Dst = MI.getOperand(0).getReg();
6578 Register Src = MI.getOperand(1).getReg();
6579 Register ScratchReg1 = MRI.createVirtualRegister(RC);
6580 Register ScratchReg2 = MRI.createVirtualRegister(RC);
6581 Register ScratchReg3 = MRI.createVirtualRegister(RC);
6582
6583 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
6584 BuildMI(*BB, MI, DL,
6585 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
6586 : LoongArch::VINSGR2VR_W),
6587 ScratchReg2)
6588 .addReg(ScratchReg1)
6589 .addReg(Src)
6590 .addImm(0);
6591 BuildMI(
6592 *BB, MI, DL,
6593 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
6594 ScratchReg3)
6595 .addReg(ScratchReg2);
6596 BuildMI(*BB, MI, DL,
6597 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
6598 : LoongArch::VPICKVE2GR_W),
6599 Dst)
6600 .addReg(ScratchReg3)
6601 .addImm(0);
6602
6603 MI.eraseFromParent();
6604 return BB;
6605}
6606
6607static MachineBasicBlock *
6609 const LoongArchSubtarget &Subtarget) {
6610 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6611 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
6612 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
6614 Register Dst = MI.getOperand(0).getReg();
6615 Register Src = MI.getOperand(1).getReg();
6616 DebugLoc DL = MI.getDebugLoc();
6617 unsigned EleBits = 8;
6618 unsigned NotOpc = 0;
6619 unsigned MskOpc;
6620
6621 switch (MI.getOpcode()) {
6622 default:
6623 llvm_unreachable("Unexpected opcode");
6624 case LoongArch::PseudoVMSKLTZ_B:
6625 MskOpc = LoongArch::VMSKLTZ_B;
6626 break;
6627 case LoongArch::PseudoVMSKLTZ_H:
6628 MskOpc = LoongArch::VMSKLTZ_H;
6629 EleBits = 16;
6630 break;
6631 case LoongArch::PseudoVMSKLTZ_W:
6632 MskOpc = LoongArch::VMSKLTZ_W;
6633 EleBits = 32;
6634 break;
6635 case LoongArch::PseudoVMSKLTZ_D:
6636 MskOpc = LoongArch::VMSKLTZ_D;
6637 EleBits = 64;
6638 break;
6639 case LoongArch::PseudoVMSKGEZ_B:
6640 MskOpc = LoongArch::VMSKGEZ_B;
6641 break;
6642 case LoongArch::PseudoVMSKEQZ_B:
6643 MskOpc = LoongArch::VMSKNZ_B;
6644 NotOpc = LoongArch::VNOR_V;
6645 break;
6646 case LoongArch::PseudoVMSKNEZ_B:
6647 MskOpc = LoongArch::VMSKNZ_B;
6648 break;
6649 case LoongArch::PseudoXVMSKLTZ_B:
6650 MskOpc = LoongArch::XVMSKLTZ_B;
6651 RC = &LoongArch::LASX256RegClass;
6652 break;
6653 case LoongArch::PseudoXVMSKLTZ_H:
6654 MskOpc = LoongArch::XVMSKLTZ_H;
6655 RC = &LoongArch::LASX256RegClass;
6656 EleBits = 16;
6657 break;
6658 case LoongArch::PseudoXVMSKLTZ_W:
6659 MskOpc = LoongArch::XVMSKLTZ_W;
6660 RC = &LoongArch::LASX256RegClass;
6661 EleBits = 32;
6662 break;
6663 case LoongArch::PseudoXVMSKLTZ_D:
6664 MskOpc = LoongArch::XVMSKLTZ_D;
6665 RC = &LoongArch::LASX256RegClass;
6666 EleBits = 64;
6667 break;
6668 case LoongArch::PseudoXVMSKGEZ_B:
6669 MskOpc = LoongArch::XVMSKGEZ_B;
6670 RC = &LoongArch::LASX256RegClass;
6671 break;
6672 case LoongArch::PseudoXVMSKEQZ_B:
6673 MskOpc = LoongArch::XVMSKNZ_B;
6674 NotOpc = LoongArch::XVNOR_V;
6675 RC = &LoongArch::LASX256RegClass;
6676 break;
6677 case LoongArch::PseudoXVMSKNEZ_B:
6678 MskOpc = LoongArch::XVMSKNZ_B;
6679 RC = &LoongArch::LASX256RegClass;
6680 break;
6681 }
6682
6683 Register Msk = MRI.createVirtualRegister(RC);
6684 if (NotOpc) {
6685 Register Tmp = MRI.createVirtualRegister(RC);
6686 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
6687 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
6688 .addReg(Tmp, RegState::Kill)
6689 .addReg(Tmp, RegState::Kill);
6690 } else {
6691 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
6692 }
6693
6694 if (TRI->getRegSizeInBits(*RC) > 128) {
6695 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6696 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
6697 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
6698 .addReg(Msk)
6699 .addImm(0);
6700 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
6701 .addReg(Msk, RegState::Kill)
6702 .addImm(4);
6703 BuildMI(*BB, MI, DL,
6704 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
6705 : LoongArch::BSTRINS_W),
6706 Dst)
6709 .addImm(256 / EleBits - 1)
6710 .addImm(128 / EleBits);
6711 } else {
6712 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
6713 .addReg(Msk, RegState::Kill)
6714 .addImm(0);
6715 }
6716
6717 MI.eraseFromParent();
6718 return BB;
6719}
6720
6721static MachineBasicBlock *
6723 const LoongArchSubtarget &Subtarget) {
6724 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
6725 "Unexpected instruction");
6726
6727 MachineFunction &MF = *BB->getParent();
6728 DebugLoc DL = MI.getDebugLoc();
6730 Register LoReg = MI.getOperand(0).getReg();
6731 Register HiReg = MI.getOperand(1).getReg();
6732 Register SrcReg = MI.getOperand(2).getReg();
6733
6734 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
6735 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
6736 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
6737 MI.eraseFromParent(); // The pseudo instruction is gone now.
6738 return BB;
6739}
6740
6741static MachineBasicBlock *
6743 const LoongArchSubtarget &Subtarget) {
6744 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
6745 "Unexpected instruction");
6746
6747 MachineFunction &MF = *BB->getParent();
6748 DebugLoc DL = MI.getDebugLoc();
6751 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
6752 Register DstReg = MI.getOperand(0).getReg();
6753 Register LoReg = MI.getOperand(1).getReg();
6754 Register HiReg = MI.getOperand(2).getReg();
6755
6756 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
6757 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
6758 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
6759 .addReg(TmpReg, RegState::Kill)
6760 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
6761 MI.eraseFromParent(); // The pseudo instruction is gone now.
6762 return BB;
6763}
6764
6766 switch (MI.getOpcode()) {
6767 default:
6768 return false;
6769 case LoongArch::Select_GPR_Using_CC_GPR:
6770 return true;
6771 }
6772}
6773
6774static MachineBasicBlock *
6776 const LoongArchSubtarget &Subtarget) {
6777 // To "insert" Select_* instructions, we actually have to insert the triangle
6778 // control-flow pattern. The incoming instructions know the destination vreg
6779 // to set, the condition code register to branch on, the true/false values to
6780 // select between, and the condcode to use to select the appropriate branch.
6781 //
6782 // We produce the following control flow:
6783 // HeadMBB
6784 // | \
6785 // | IfFalseMBB
6786 // | /
6787 // TailMBB
6788 //
6789 // When we find a sequence of selects we attempt to optimize their emission
6790 // by sharing the control flow. Currently we only handle cases where we have
6791 // multiple selects with the exact same condition (same LHS, RHS and CC).
6792 // The selects may be interleaved with other instructions if the other
6793 // instructions meet some requirements we deem safe:
6794 // - They are not pseudo instructions.
6795 // - They are debug instructions. Otherwise,
6796 // - They do not have side-effects, do not access memory and their inputs do
6797 // not depend on the results of the select pseudo-instructions.
6798 // The TrueV/FalseV operands of the selects cannot depend on the result of
6799 // previous selects in the sequence.
6800 // These conditions could be further relaxed. See the X86 target for a
6801 // related approach and more information.
6802
6803 Register LHS = MI.getOperand(1).getReg();
6804 Register RHS;
6805 if (MI.getOperand(2).isReg())
6806 RHS = MI.getOperand(2).getReg();
6807 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
6808
6809 SmallVector<MachineInstr *, 4> SelectDebugValues;
6810 SmallSet<Register, 4> SelectDests;
6811 SelectDests.insert(MI.getOperand(0).getReg());
6812
6813 MachineInstr *LastSelectPseudo = &MI;
6814 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
6815 SequenceMBBI != E; ++SequenceMBBI) {
6816 if (SequenceMBBI->isDebugInstr())
6817 continue;
6818 if (isSelectPseudo(*SequenceMBBI)) {
6819 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
6820 !SequenceMBBI->getOperand(2).isReg() ||
6821 SequenceMBBI->getOperand(2).getReg() != RHS ||
6822 SequenceMBBI->getOperand(3).getImm() != CC ||
6823 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
6824 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
6825 break;
6826 LastSelectPseudo = &*SequenceMBBI;
6827 SequenceMBBI->collectDebugValues(SelectDebugValues);
6828 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
6829 continue;
6830 }
6831 if (SequenceMBBI->hasUnmodeledSideEffects() ||
6832 SequenceMBBI->mayLoadOrStore() ||
6833 SequenceMBBI->usesCustomInsertionHook())
6834 break;
6835 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
6836 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
6837 }))
6838 break;
6839 }
6840
6841 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
6842 const BasicBlock *LLVM_BB = BB->getBasicBlock();
6843 DebugLoc DL = MI.getDebugLoc();
6845
6846 MachineBasicBlock *HeadMBB = BB;
6847 MachineFunction *F = BB->getParent();
6848 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
6849 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
6850
6851 F->insert(I, IfFalseMBB);
6852 F->insert(I, TailMBB);
6853
6854 // Set the call frame size on entry to the new basic blocks.
6855 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
6856 IfFalseMBB->setCallFrameSize(CallFrameSize);
6857 TailMBB->setCallFrameSize(CallFrameSize);
6858
6859 // Transfer debug instructions associated with the selects to TailMBB.
6860 for (MachineInstr *DebugInstr : SelectDebugValues) {
6861 TailMBB->push_back(DebugInstr->removeFromParent());
6862 }
6863
6864 // Move all instructions after the sequence to TailMBB.
6865 TailMBB->splice(TailMBB->end(), HeadMBB,
6866 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
6867 // Update machine-CFG edges by transferring all successors of the current
6868 // block to the new block which will contain the Phi nodes for the selects.
6869 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
6870 // Set the successors for HeadMBB.
6871 HeadMBB->addSuccessor(IfFalseMBB);
6872 HeadMBB->addSuccessor(TailMBB);
6873
6874 // Insert appropriate branch.
6875 if (MI.getOperand(2).isImm())
6876 BuildMI(HeadMBB, DL, TII.get(CC))
6877 .addReg(LHS)
6878 .addImm(MI.getOperand(2).getImm())
6879 .addMBB(TailMBB);
6880 else
6881 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
6882
6883 // IfFalseMBB just falls through to TailMBB.
6884 IfFalseMBB->addSuccessor(TailMBB);
6885
6886 // Create PHIs for all of the select pseudo-instructions.
6887 auto SelectMBBI = MI.getIterator();
6888 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
6889 auto InsertionPoint = TailMBB->begin();
6890 while (SelectMBBI != SelectEnd) {
6891 auto Next = std::next(SelectMBBI);
6892 if (isSelectPseudo(*SelectMBBI)) {
6893 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
6894 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
6895 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
6896 .addReg(SelectMBBI->getOperand(4).getReg())
6897 .addMBB(HeadMBB)
6898 .addReg(SelectMBBI->getOperand(5).getReg())
6899 .addMBB(IfFalseMBB);
6900 SelectMBBI->eraseFromParent();
6901 }
6902 SelectMBBI = Next;
6903 }
6904
6905 F->getProperties().resetNoPHIs();
6906 return TailMBB;
6907}
6908
6909MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
6910 MachineInstr &MI, MachineBasicBlock *BB) const {
6911 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6912 DebugLoc DL = MI.getDebugLoc();
6913
6914 switch (MI.getOpcode()) {
6915 default:
6916 llvm_unreachable("Unexpected instr type to insert");
6917 case LoongArch::DIV_W:
6918 case LoongArch::DIV_WU:
6919 case LoongArch::MOD_W:
6920 case LoongArch::MOD_WU:
6921 case LoongArch::DIV_D:
6922 case LoongArch::DIV_DU:
6923 case LoongArch::MOD_D:
6924 case LoongArch::MOD_DU:
6925 return insertDivByZeroTrap(MI, BB);
6926 break;
6927 case LoongArch::WRFCSR: {
6928 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
6929 LoongArch::FCSR0 + MI.getOperand(0).getImm())
6930 .addReg(MI.getOperand(1).getReg());
6931 MI.eraseFromParent();
6932 return BB;
6933 }
6934 case LoongArch::RDFCSR: {
6935 MachineInstr *ReadFCSR =
6936 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
6937 MI.getOperand(0).getReg())
6938 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
6939 ReadFCSR->getOperand(1).setIsUndef();
6940 MI.eraseFromParent();
6941 return BB;
6942 }
6943 case LoongArch::Select_GPR_Using_CC_GPR:
6944 return emitSelectPseudo(MI, BB, Subtarget);
6945 case LoongArch::BuildPairF64Pseudo:
6946 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
6947 case LoongArch::SplitPairF64Pseudo:
6948 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
6949 case LoongArch::PseudoVBZ:
6950 case LoongArch::PseudoVBZ_B:
6951 case LoongArch::PseudoVBZ_H:
6952 case LoongArch::PseudoVBZ_W:
6953 case LoongArch::PseudoVBZ_D:
6954 case LoongArch::PseudoVBNZ:
6955 case LoongArch::PseudoVBNZ_B:
6956 case LoongArch::PseudoVBNZ_H:
6957 case LoongArch::PseudoVBNZ_W:
6958 case LoongArch::PseudoVBNZ_D:
6959 case LoongArch::PseudoXVBZ:
6960 case LoongArch::PseudoXVBZ_B:
6961 case LoongArch::PseudoXVBZ_H:
6962 case LoongArch::PseudoXVBZ_W:
6963 case LoongArch::PseudoXVBZ_D:
6964 case LoongArch::PseudoXVBNZ:
6965 case LoongArch::PseudoXVBNZ_B:
6966 case LoongArch::PseudoXVBNZ_H:
6967 case LoongArch::PseudoXVBNZ_W:
6968 case LoongArch::PseudoXVBNZ_D:
6969 return emitVecCondBranchPseudo(MI, BB, Subtarget);
6970 case LoongArch::PseudoXVINSGR2VR_B:
6971 case LoongArch::PseudoXVINSGR2VR_H:
6972 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
6973 case LoongArch::PseudoCTPOP:
6974 return emitPseudoCTPOP(MI, BB, Subtarget);
6975 case LoongArch::PseudoVMSKLTZ_B:
6976 case LoongArch::PseudoVMSKLTZ_H:
6977 case LoongArch::PseudoVMSKLTZ_W:
6978 case LoongArch::PseudoVMSKLTZ_D:
6979 case LoongArch::PseudoVMSKGEZ_B:
6980 case LoongArch::PseudoVMSKEQZ_B:
6981 case LoongArch::PseudoVMSKNEZ_B:
6982 case LoongArch::PseudoXVMSKLTZ_B:
6983 case LoongArch::PseudoXVMSKLTZ_H:
6984 case LoongArch::PseudoXVMSKLTZ_W:
6985 case LoongArch::PseudoXVMSKLTZ_D:
6986 case LoongArch::PseudoXVMSKGEZ_B:
6987 case LoongArch::PseudoXVMSKEQZ_B:
6988 case LoongArch::PseudoXVMSKNEZ_B:
6989 return emitPseudoVMSKCOND(MI, BB, Subtarget);
6990 case TargetOpcode::STATEPOINT:
6991 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
6992 // while bl call instruction (where statepoint will be lowered at the
6993 // end) has implicit def. This def is early-clobber as it will be set at
6994 // the moment of the call and earlier than any use is read.
6995 // Add this implicit dead def here as a workaround.
6996 MI.addOperand(*MI.getMF(),
6998 LoongArch::R1, /*isDef*/ true,
6999 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7000 /*isUndef*/ false, /*isEarlyClobber*/ true));
7001 if (!Subtarget.is64Bit())
7002 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7003 return emitPatchPoint(MI, BB);
7004 }
7005}
7006
7008 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7009 unsigned *Fast) const {
7010 if (!Subtarget.hasUAL())
7011 return false;
7012
7013 // TODO: set reasonable speed number.
7014 if (Fast)
7015 *Fast = 1;
7016 return true;
7017}
7018
7019const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
7020 switch ((LoongArchISD::NodeType)Opcode) {
7022 break;
7023
7024#define NODE_NAME_CASE(node) \
7025 case LoongArchISD::node: \
7026 return "LoongArchISD::" #node;
7027
7028 // TODO: Add more target-dependent nodes later.
7029 NODE_NAME_CASE(CALL)
7030 NODE_NAME_CASE(CALL_MEDIUM)
7031 NODE_NAME_CASE(CALL_LARGE)
7032 NODE_NAME_CASE(RET)
7033 NODE_NAME_CASE(TAIL)
7034 NODE_NAME_CASE(TAIL_MEDIUM)
7035 NODE_NAME_CASE(TAIL_LARGE)
7036 NODE_NAME_CASE(SELECT_CC)
7037 NODE_NAME_CASE(BR_CC)
7038 NODE_NAME_CASE(BRCOND)
7039 NODE_NAME_CASE(SLL_W)
7040 NODE_NAME_CASE(SRA_W)
7041 NODE_NAME_CASE(SRL_W)
7042 NODE_NAME_CASE(BSTRINS)
7043 NODE_NAME_CASE(BSTRPICK)
7044 NODE_NAME_CASE(MOVGR2FR_W_LA64)
7045 NODE_NAME_CASE(MOVFR2GR_S_LA64)
7046 NODE_NAME_CASE(FTINT)
7047 NODE_NAME_CASE(BUILD_PAIR_F64)
7048 NODE_NAME_CASE(SPLIT_PAIR_F64)
7049 NODE_NAME_CASE(REVB_2H)
7050 NODE_NAME_CASE(REVB_2W)
7051 NODE_NAME_CASE(BITREV_4B)
7052 NODE_NAME_CASE(BITREV_8B)
7053 NODE_NAME_CASE(BITREV_W)
7054 NODE_NAME_CASE(ROTR_W)
7055 NODE_NAME_CASE(ROTL_W)
7056 NODE_NAME_CASE(DIV_W)
7057 NODE_NAME_CASE(DIV_WU)
7058 NODE_NAME_CASE(MOD_W)
7059 NODE_NAME_CASE(MOD_WU)
7060 NODE_NAME_CASE(CLZ_W)
7061 NODE_NAME_CASE(CTZ_W)
7062 NODE_NAME_CASE(DBAR)
7063 NODE_NAME_CASE(IBAR)
7064 NODE_NAME_CASE(BREAK)
7065 NODE_NAME_CASE(SYSCALL)
7066 NODE_NAME_CASE(CRC_W_B_W)
7067 NODE_NAME_CASE(CRC_W_H_W)
7068 NODE_NAME_CASE(CRC_W_W_W)
7069 NODE_NAME_CASE(CRC_W_D_W)
7070 NODE_NAME_CASE(CRCC_W_B_W)
7071 NODE_NAME_CASE(CRCC_W_H_W)
7072 NODE_NAME_CASE(CRCC_W_W_W)
7073 NODE_NAME_CASE(CRCC_W_D_W)
7074 NODE_NAME_CASE(CSRRD)
7075 NODE_NAME_CASE(CSRWR)
7076 NODE_NAME_CASE(CSRXCHG)
7077 NODE_NAME_CASE(IOCSRRD_B)
7078 NODE_NAME_CASE(IOCSRRD_H)
7079 NODE_NAME_CASE(IOCSRRD_W)
7080 NODE_NAME_CASE(IOCSRRD_D)
7081 NODE_NAME_CASE(IOCSRWR_B)
7082 NODE_NAME_CASE(IOCSRWR_H)
7083 NODE_NAME_CASE(IOCSRWR_W)
7084 NODE_NAME_CASE(IOCSRWR_D)
7085 NODE_NAME_CASE(CPUCFG)
7086 NODE_NAME_CASE(MOVGR2FCSR)
7087 NODE_NAME_CASE(MOVFCSR2GR)
7088 NODE_NAME_CASE(CACOP_D)
7089 NODE_NAME_CASE(CACOP_W)
7090 NODE_NAME_CASE(VSHUF)
7091 NODE_NAME_CASE(VPICKEV)
7092 NODE_NAME_CASE(VPICKOD)
7093 NODE_NAME_CASE(VPACKEV)
7094 NODE_NAME_CASE(VPACKOD)
7095 NODE_NAME_CASE(VILVL)
7096 NODE_NAME_CASE(VILVH)
7097 NODE_NAME_CASE(VSHUF4I)
7098 NODE_NAME_CASE(VREPLVEI)
7099 NODE_NAME_CASE(VREPLGR2VR)
7100 NODE_NAME_CASE(XVPERMI)
7101 NODE_NAME_CASE(XVPERM)
7102 NODE_NAME_CASE(VPICK_SEXT_ELT)
7103 NODE_NAME_CASE(VPICK_ZEXT_ELT)
7104 NODE_NAME_CASE(VREPLVE)
7105 NODE_NAME_CASE(VALL_ZERO)
7106 NODE_NAME_CASE(VANY_ZERO)
7107 NODE_NAME_CASE(VALL_NONZERO)
7108 NODE_NAME_CASE(VANY_NONZERO)
7109 NODE_NAME_CASE(FRECIPE)
7110 NODE_NAME_CASE(FRSQRTE)
7111 NODE_NAME_CASE(VSLLI)
7112 NODE_NAME_CASE(VSRLI)
7113 NODE_NAME_CASE(VBSLL)
7114 NODE_NAME_CASE(VBSRL)
7115 NODE_NAME_CASE(VLDREPL)
7116 NODE_NAME_CASE(VMSKLTZ)
7117 NODE_NAME_CASE(VMSKGEZ)
7118 NODE_NAME_CASE(VMSKEQZ)
7119 NODE_NAME_CASE(VMSKNEZ)
7120 NODE_NAME_CASE(XVMSKLTZ)
7121 NODE_NAME_CASE(XVMSKGEZ)
7122 NODE_NAME_CASE(XVMSKEQZ)
7123 NODE_NAME_CASE(XVMSKNEZ)
7124 NODE_NAME_CASE(VHADDW)
7125 }
7126#undef NODE_NAME_CASE
7127 return nullptr;
7128}
7129
7130//===----------------------------------------------------------------------===//
7131// Calling Convention Implementation
7132//===----------------------------------------------------------------------===//
7133
7134// Eight general-purpose registers a0-a7 used for passing integer arguments,
7135// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7136// fixed-point arguments, and floating-point arguments when no FPR is available
7137// or with soft float ABI.
7138const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7139 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7140 LoongArch::R10, LoongArch::R11};
7141// Eight floating-point registers fa0-fa7 used for passing floating-point
7142// arguments, and fa0-fa1 are also used to return values.
7143const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7144 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7145 LoongArch::F6, LoongArch::F7};
7146// FPR32 and FPR64 alias each other.
7148 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7149 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7150
7151const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7152 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7153 LoongArch::VR6, LoongArch::VR7};
7154
7155const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7156 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7157 LoongArch::XR6, LoongArch::XR7};
7158
7159// Pass a 2*GRLen argument that has been split into two GRLen values through
7160// registers or the stack as necessary.
7161static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7162 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7163 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7164 ISD::ArgFlagsTy ArgFlags2) {
7165 unsigned GRLenInBytes = GRLen / 8;
7166 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7167 // At least one half can be passed via register.
7168 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7169 VA1.getLocVT(), CCValAssign::Full));
7170 } else {
7171 // Both halves must be passed on the stack, with proper alignment.
7172 Align StackAlign =
7173 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7174 State.addLoc(
7176 State.AllocateStack(GRLenInBytes, StackAlign),
7177 VA1.getLocVT(), CCValAssign::Full));
7178 State.addLoc(CCValAssign::getMem(
7179 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7180 LocVT2, CCValAssign::Full));
7181 return false;
7182 }
7183 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7184 // The second half can also be passed via register.
7185 State.addLoc(
7186 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7187 } else {
7188 // The second half is passed via the stack, without additional alignment.
7189 State.addLoc(CCValAssign::getMem(
7190 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7191 LocVT2, CCValAssign::Full));
7192 }
7193 return false;
7194}
7195
7196// Implements the LoongArch calling convention. Returns true upon failure.
7198 unsigned ValNo, MVT ValVT,
7199 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7200 CCState &State, bool IsRet, Type *OrigTy) {
7201 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7202 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7203 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7204 MVT LocVT = ValVT;
7205
7206 // Any return value split into more than two values can't be returned
7207 // directly.
7208 if (IsRet && ValNo > 1)
7209 return true;
7210
7211 // If passing a variadic argument, or if no FPR is available.
7212 bool UseGPRForFloat = true;
7213
7214 switch (ABI) {
7215 default:
7216 llvm_unreachable("Unexpected ABI");
7217 break;
7222 UseGPRForFloat = ArgFlags.isVarArg();
7223 break;
7226 break;
7227 }
7228
7229 // If this is a variadic argument, the LoongArch calling convention requires
7230 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7231 // byte alignment. An aligned register should be used regardless of whether
7232 // the original argument was split during legalisation or not. The argument
7233 // will not be passed by registers if the original type is larger than
7234 // 2*GRLen, so the register alignment rule does not apply.
7235 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7236 if (ArgFlags.isVarArg() &&
7237 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7238 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7239 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7240 // Skip 'odd' register if necessary.
7241 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7242 State.AllocateReg(ArgGPRs);
7243 }
7244
7245 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7246 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7247 State.getPendingArgFlags();
7248
7249 assert(PendingLocs.size() == PendingArgFlags.size() &&
7250 "PendingLocs and PendingArgFlags out of sync");
7251
7252 // FPR32 and FPR64 alias each other.
7253 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7254 UseGPRForFloat = true;
7255
7256 if (UseGPRForFloat && ValVT == MVT::f32) {
7257 LocVT = GRLenVT;
7258 LocInfo = CCValAssign::BCvt;
7259 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7260 LocVT = MVT::i64;
7261 LocInfo = CCValAssign::BCvt;
7262 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7263 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7264 // registers are exhausted.
7265 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7266 // Depending on available argument GPRS, f64 may be passed in a pair of
7267 // GPRs, split between a GPR and the stack, or passed completely on the
7268 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7269 // cases.
7270 MCRegister Reg = State.AllocateReg(ArgGPRs);
7271 if (!Reg) {
7272 int64_t StackOffset = State.AllocateStack(8, Align(8));
7273 State.addLoc(
7274 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7275 return false;
7276 }
7277 LocVT = MVT::i32;
7278 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7279 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7280 if (HiReg) {
7281 State.addLoc(
7282 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7283 } else {
7284 int64_t StackOffset = State.AllocateStack(4, Align(4));
7285 State.addLoc(
7286 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7287 }
7288 return false;
7289 }
7290
7291 // Split arguments might be passed indirectly, so keep track of the pending
7292 // values.
7293 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7294 LocVT = GRLenVT;
7295 LocInfo = CCValAssign::Indirect;
7296 PendingLocs.push_back(
7297 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7298 PendingArgFlags.push_back(ArgFlags);
7299 if (!ArgFlags.isSplitEnd()) {
7300 return false;
7301 }
7302 }
7303
7304 // If the split argument only had two elements, it should be passed directly
7305 // in registers or on the stack.
7306 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7307 PendingLocs.size() <= 2) {
7308 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7309 // Apply the normal calling convention rules to the first half of the
7310 // split argument.
7311 CCValAssign VA = PendingLocs[0];
7312 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7313 PendingLocs.clear();
7314 PendingArgFlags.clear();
7315 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7316 ArgFlags);
7317 }
7318
7319 // Allocate to a register if possible, or else a stack slot.
7320 Register Reg;
7321 unsigned StoreSizeBytes = GRLen / 8;
7322 Align StackAlign = Align(GRLen / 8);
7323
7324 if (ValVT == MVT::f32 && !UseGPRForFloat)
7325 Reg = State.AllocateReg(ArgFPR32s);
7326 else if (ValVT == MVT::f64 && !UseGPRForFloat)
7327 Reg = State.AllocateReg(ArgFPR64s);
7328 else if (ValVT.is128BitVector())
7329 Reg = State.AllocateReg(ArgVRs);
7330 else if (ValVT.is256BitVector())
7331 Reg = State.AllocateReg(ArgXRs);
7332 else
7333 Reg = State.AllocateReg(ArgGPRs);
7334
7335 unsigned StackOffset =
7336 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7337
7338 // If we reach this point and PendingLocs is non-empty, we must be at the
7339 // end of a split argument that must be passed indirectly.
7340 if (!PendingLocs.empty()) {
7341 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7342 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7343 for (auto &It : PendingLocs) {
7344 if (Reg)
7345 It.convertToReg(Reg);
7346 else
7347 It.convertToMem(StackOffset);
7348 State.addLoc(It);
7349 }
7350 PendingLocs.clear();
7351 PendingArgFlags.clear();
7352 return false;
7353 }
7354 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7355 "Expected an GRLenVT at this stage");
7356
7357 if (Reg) {
7358 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7359 return false;
7360 }
7361
7362 // When a floating-point value is passed on the stack, no bit-cast is needed.
7363 if (ValVT.isFloatingPoint()) {
7364 LocVT = ValVT;
7365 LocInfo = CCValAssign::Full;
7366 }
7367
7368 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7369 return false;
7370}
7371
7372void LoongArchTargetLowering::analyzeInputArgs(
7373 MachineFunction &MF, CCState &CCInfo,
7374 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7375 LoongArchCCAssignFn Fn) const {
7376 FunctionType *FType = MF.getFunction().getFunctionType();
7377 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7378 MVT ArgVT = Ins[i].VT;
7379 Type *ArgTy = nullptr;
7380 if (IsRet)
7381 ArgTy = FType->getReturnType();
7382 else if (Ins[i].isOrigArg())
7383 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7385 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7386 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7387 CCInfo, IsRet, ArgTy)) {
7388 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7389 << '\n');
7390 llvm_unreachable("");
7391 }
7392 }
7393}
7394
7395void LoongArchTargetLowering::analyzeOutputArgs(
7396 MachineFunction &MF, CCState &CCInfo,
7397 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7398 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7399 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7400 MVT ArgVT = Outs[i].VT;
7401 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7403 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7404 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7405 CCInfo, IsRet, OrigTy)) {
7406 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7407 << "\n");
7408 llvm_unreachable("");
7409 }
7410 }
7411}
7412
7413// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7414// values.
7416 const CCValAssign &VA, const SDLoc &DL) {
7417 switch (VA.getLocInfo()) {
7418 default:
7419 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7420 case CCValAssign::Full:
7422 break;
7423 case CCValAssign::BCvt:
7424 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7425 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7426 else
7427 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
7428 break;
7429 }
7430 return Val;
7431}
7432
7434 const CCValAssign &VA, const SDLoc &DL,
7435 const ISD::InputArg &In,
7436 const LoongArchTargetLowering &TLI) {
7439 EVT LocVT = VA.getLocVT();
7440 SDValue Val;
7441 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
7442 Register VReg = RegInfo.createVirtualRegister(RC);
7443 RegInfo.addLiveIn(VA.getLocReg(), VReg);
7444 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
7445
7446 // If input is sign extended from 32 bits, note it for the OptW pass.
7447 if (In.isOrigArg()) {
7448 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
7449 if (OrigArg->getType()->isIntegerTy()) {
7450 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
7451 // An input zero extended from i31 can also be considered sign extended.
7452 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
7453 (BitWidth < 32 && In.Flags.isZExt())) {
7456 LAFI->addSExt32Register(VReg);
7457 }
7458 }
7459 }
7460
7461 return convertLocVTToValVT(DAG, Val, VA, DL);
7462}
7463
7464// The caller is responsible for loading the full value if the argument is
7465// passed with CCValAssign::Indirect.
7467 const CCValAssign &VA, const SDLoc &DL) {
7469 MachineFrameInfo &MFI = MF.getFrameInfo();
7470 EVT ValVT = VA.getValVT();
7471 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
7472 /*IsImmutable=*/true);
7473 SDValue FIN = DAG.getFrameIndex(
7475
7476 ISD::LoadExtType ExtType;
7477 switch (VA.getLocInfo()) {
7478 default:
7479 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7480 case CCValAssign::Full:
7482 case CCValAssign::BCvt:
7483 ExtType = ISD::NON_EXTLOAD;
7484 break;
7485 }
7486 return DAG.getExtLoad(
7487 ExtType, DL, VA.getLocVT(), Chain, FIN,
7489}
7490
7492 const CCValAssign &VA,
7493 const CCValAssign &HiVA,
7494 const SDLoc &DL) {
7495 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
7496 "Unexpected VA");
7498 MachineFrameInfo &MFI = MF.getFrameInfo();
7500
7501 assert(VA.isRegLoc() && "Expected register VA assignment");
7502
7503 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7504 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
7505 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
7506 SDValue Hi;
7507 if (HiVA.isMemLoc()) {
7508 // Second half of f64 is passed on the stack.
7509 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
7510 /*IsImmutable=*/true);
7511 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
7512 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
7514 } else {
7515 // Second half of f64 is passed in another GPR.
7516 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
7517 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
7518 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
7519 }
7520 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
7521}
7522
7524 const CCValAssign &VA, const SDLoc &DL) {
7525 EVT LocVT = VA.getLocVT();
7526
7527 switch (VA.getLocInfo()) {
7528 default:
7529 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7530 case CCValAssign::Full:
7531 break;
7532 case CCValAssign::BCvt:
7533 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7534 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
7535 else
7536 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
7537 break;
7538 }
7539 return Val;
7540}
7541
7542static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
7543 CCValAssign::LocInfo LocInfo,
7544 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
7545 CCState &State) {
7546 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
7547 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
7548 // s0 s1 s2 s3 s4 s5 s6 s7 s8
7549 static const MCPhysReg GPRList[] = {
7550 LoongArch::R23, LoongArch::R24, LoongArch::R25,
7551 LoongArch::R26, LoongArch::R27, LoongArch::R28,
7552 LoongArch::R29, LoongArch::R30, LoongArch::R31};
7553 if (MCRegister Reg = State.AllocateReg(GPRList)) {
7554 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7555 return false;
7556 }
7557 }
7558
7559 if (LocVT == MVT::f32) {
7560 // Pass in STG registers: F1, F2, F3, F4
7561 // fs0,fs1,fs2,fs3
7562 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
7563 LoongArch::F26, LoongArch::F27};
7564 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
7565 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7566 return false;
7567 }
7568 }
7569
7570 if (LocVT == MVT::f64) {
7571 // Pass in STG registers: D1, D2, D3, D4
7572 // fs4,fs5,fs6,fs7
7573 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
7574 LoongArch::F30_64, LoongArch::F31_64};
7575 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
7576 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7577 return false;
7578 }
7579 }
7580
7581 report_fatal_error("No registers left in GHC calling convention");
7582 return true;
7583}
7584
7585// Transform physical registers into virtual registers.
7587 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
7588 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
7589 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7590
7592
7593 switch (CallConv) {
7594 default:
7595 llvm_unreachable("Unsupported calling convention");
7596 case CallingConv::C:
7597 case CallingConv::Fast:
7599 break;
7600 case CallingConv::GHC:
7601 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
7602 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
7604 "GHC calling convention requires the F and D extensions");
7605 }
7606
7607 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7608 MVT GRLenVT = Subtarget.getGRLenVT();
7609 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
7610 // Used with varargs to acumulate store chains.
7611 std::vector<SDValue> OutChains;
7612
7613 // Assign locations to all of the incoming arguments.
7615 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7616
7617 if (CallConv == CallingConv::GHC)
7619 else
7620 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
7621
7622 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
7623 CCValAssign &VA = ArgLocs[i];
7624 SDValue ArgValue;
7625 // Passing f64 on LA32D with a soft float ABI must be handled as a special
7626 // case.
7627 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7628 assert(VA.needsCustom());
7629 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
7630 } else if (VA.isRegLoc())
7631 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
7632 else
7633 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
7634 if (VA.getLocInfo() == CCValAssign::Indirect) {
7635 // If the original argument was split and passed by reference, we need to
7636 // load all parts of it here (using the same address).
7637 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
7639 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
7640 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
7641 assert(ArgPartOffset == 0);
7642 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
7643 CCValAssign &PartVA = ArgLocs[i + 1];
7644 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
7645 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7646 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
7647 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
7649 ++i;
7650 ++InsIdx;
7651 }
7652 continue;
7653 }
7654 InVals.push_back(ArgValue);
7655 }
7656
7657 if (IsVarArg) {
7659 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
7660 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
7661 MachineFrameInfo &MFI = MF.getFrameInfo();
7662 MachineRegisterInfo &RegInfo = MF.getRegInfo();
7663 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
7664
7665 // Offset of the first variable argument from stack pointer, and size of
7666 // the vararg save area. For now, the varargs save area is either zero or
7667 // large enough to hold a0-a7.
7668 int VaArgOffset, VarArgsSaveSize;
7669
7670 // If all registers are allocated, then all varargs must be passed on the
7671 // stack and we don't need to save any argregs.
7672 if (ArgRegs.size() == Idx) {
7673 VaArgOffset = CCInfo.getStackSize();
7674 VarArgsSaveSize = 0;
7675 } else {
7676 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
7677 VaArgOffset = -VarArgsSaveSize;
7678 }
7679
7680 // Record the frame index of the first variable argument
7681 // which is a value necessary to VASTART.
7682 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7683 LoongArchFI->setVarArgsFrameIndex(FI);
7684
7685 // If saving an odd number of registers then create an extra stack slot to
7686 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
7687 // offsets to even-numbered registered remain 2*GRLen-aligned.
7688 if (Idx % 2) {
7689 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
7690 true);
7691 VarArgsSaveSize += GRLenInBytes;
7692 }
7693
7694 // Copy the integer registers that may have been used for passing varargs
7695 // to the vararg save area.
7696 for (unsigned I = Idx; I < ArgRegs.size();
7697 ++I, VaArgOffset += GRLenInBytes) {
7698 const Register Reg = RegInfo.createVirtualRegister(RC);
7699 RegInfo.addLiveIn(ArgRegs[I], Reg);
7700 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
7701 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
7702 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7703 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
7705 cast<StoreSDNode>(Store.getNode())
7706 ->getMemOperand()
7707 ->setValue((Value *)nullptr);
7708 OutChains.push_back(Store);
7709 }
7710 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
7711 }
7712
7713 // All stores are grouped in one node to allow the matching between
7714 // the size of Ins and InVals. This only happens for vararg functions.
7715 if (!OutChains.empty()) {
7716 OutChains.push_back(Chain);
7717 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
7718 }
7719
7720 return Chain;
7721}
7722
7724 return CI->isTailCall();
7725}
7726
7727// Check if the return value is used as only a return value, as otherwise
7728// we can't perform a tail-call.
7730 SDValue &Chain) const {
7731 if (N->getNumValues() != 1)
7732 return false;
7733 if (!N->hasNUsesOfValue(1, 0))
7734 return false;
7735
7736 SDNode *Copy = *N->user_begin();
7737 if (Copy->getOpcode() != ISD::CopyToReg)
7738 return false;
7739
7740 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
7741 // isn't safe to perform a tail call.
7742 if (Copy->getGluedNode())
7743 return false;
7744
7745 // The copy must be used by a LoongArchISD::RET, and nothing else.
7746 bool HasRet = false;
7747 for (SDNode *Node : Copy->users()) {
7748 if (Node->getOpcode() != LoongArchISD::RET)
7749 return false;
7750 HasRet = true;
7751 }
7752
7753 if (!HasRet)
7754 return false;
7755
7756 Chain = Copy->getOperand(0);
7757 return true;
7758}
7759
7760// Check whether the call is eligible for tail call optimization.
7761bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
7762 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
7763 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
7764
7765 auto CalleeCC = CLI.CallConv;
7766 auto &Outs = CLI.Outs;
7767 auto &Caller = MF.getFunction();
7768 auto CallerCC = Caller.getCallingConv();
7769
7770 // Do not tail call opt if the stack is used to pass parameters.
7771 if (CCInfo.getStackSize() != 0)
7772 return false;
7773
7774 // Do not tail call opt if any parameters need to be passed indirectly.
7775 for (auto &VA : ArgLocs)
7776 if (VA.getLocInfo() == CCValAssign::Indirect)
7777 return false;
7778
7779 // Do not tail call opt if either caller or callee uses struct return
7780 // semantics.
7781 auto IsCallerStructRet = Caller.hasStructRetAttr();
7782 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
7783 if (IsCallerStructRet || IsCalleeStructRet)
7784 return false;
7785
7786 // Do not tail call opt if either the callee or caller has a byval argument.
7787 for (auto &Arg : Outs)
7788 if (Arg.Flags.isByVal())
7789 return false;
7790
7791 // The callee has to preserve all registers the caller needs to preserve.
7792 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7793 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
7794 if (CalleeCC != CallerCC) {
7795 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
7796 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
7797 return false;
7798 }
7799 return true;
7800}
7801
7803 return DAG.getDataLayout().getPrefTypeAlign(
7804 VT.getTypeForEVT(*DAG.getContext()));
7805}
7806
7807// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
7808// and output parameter nodes.
7809SDValue
7811 SmallVectorImpl<SDValue> &InVals) const {
7812 SelectionDAG &DAG = CLI.DAG;
7813 SDLoc &DL = CLI.DL;
7815 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
7817 SDValue Chain = CLI.Chain;
7818 SDValue Callee = CLI.Callee;
7819 CallingConv::ID CallConv = CLI.CallConv;
7820 bool IsVarArg = CLI.IsVarArg;
7821 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7822 MVT GRLenVT = Subtarget.getGRLenVT();
7823 bool &IsTailCall = CLI.IsTailCall;
7824
7826
7827 // Analyze the operands of the call, assigning locations to each operand.
7829 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
7830
7831 if (CallConv == CallingConv::GHC)
7832 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
7833 else
7834 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
7835
7836 // Check if it's really possible to do a tail call.
7837 if (IsTailCall)
7838 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
7839
7840 if (IsTailCall)
7841 ++NumTailCalls;
7842 else if (CLI.CB && CLI.CB->isMustTailCall())
7843 report_fatal_error("failed to perform tail call elimination on a call "
7844 "site marked musttail");
7845
7846 // Get a count of how many bytes are to be pushed on the stack.
7847 unsigned NumBytes = ArgCCInfo.getStackSize();
7848
7849 // Create local copies for byval args.
7850 SmallVector<SDValue> ByValArgs;
7851 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7852 ISD::ArgFlagsTy Flags = Outs[i].Flags;
7853 if (!Flags.isByVal())
7854 continue;
7855
7856 SDValue Arg = OutVals[i];
7857 unsigned Size = Flags.getByValSize();
7858 Align Alignment = Flags.getNonZeroByValAlign();
7859
7860 int FI =
7861 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
7862 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
7863 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
7864
7865 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
7866 /*IsVolatile=*/false,
7867 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
7869 ByValArgs.push_back(FIPtr);
7870 }
7871
7872 if (!IsTailCall)
7873 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
7874
7875 // Copy argument values to their designated locations.
7877 SmallVector<SDValue> MemOpChains;
7878 SDValue StackPtr;
7879 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
7880 ++i, ++OutIdx) {
7881 CCValAssign &VA = ArgLocs[i];
7882 SDValue ArgValue = OutVals[OutIdx];
7883 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
7884
7885 // Handle passing f64 on LA32D with a soft float ABI as a special case.
7886 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
7887 assert(VA.isRegLoc() && "Expected register VA assignment");
7888 assert(VA.needsCustom());
7889 SDValue SplitF64 =
7891 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
7892 SDValue Lo = SplitF64.getValue(0);
7893 SDValue Hi = SplitF64.getValue(1);
7894
7895 Register RegLo = VA.getLocReg();
7896 RegsToPass.push_back(std::make_pair(RegLo, Lo));
7897
7898 // Get the CCValAssign for the Hi part.
7899 CCValAssign &HiVA = ArgLocs[++i];
7900
7901 if (HiVA.isMemLoc()) {
7902 // Second half of f64 is passed on the stack.
7903 if (!StackPtr.getNode())
7904 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7906 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7907 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
7908 // Emit the store.
7909 MemOpChains.push_back(DAG.getStore(
7910 Chain, DL, Hi, Address,
7912 } else {
7913 // Second half of f64 is passed in another GPR.
7914 Register RegHigh = HiVA.getLocReg();
7915 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
7916 }
7917 continue;
7918 }
7919
7920 // Promote the value if needed.
7921 // For now, only handle fully promoted and indirect arguments.
7922 if (VA.getLocInfo() == CCValAssign::Indirect) {
7923 // Store the argument in a stack slot and pass its address.
7924 Align StackAlign =
7925 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
7926 getPrefTypeAlign(ArgValue.getValueType(), DAG));
7927 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
7928 // If the original argument was split and passed by reference, we need to
7929 // store the required parts of it here (and pass just one address).
7930 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
7931 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
7932 assert(ArgPartOffset == 0);
7933 // Calculate the total size to store. We don't have access to what we're
7934 // actually storing other than performing the loop and collecting the
7935 // info.
7937 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
7938 SDValue PartValue = OutVals[OutIdx + 1];
7939 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
7940 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
7941 EVT PartVT = PartValue.getValueType();
7942
7943 StoredSize += PartVT.getStoreSize();
7944 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
7945 Parts.push_back(std::make_pair(PartValue, Offset));
7946 ++i;
7947 ++OutIdx;
7948 }
7949 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
7950 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
7951 MemOpChains.push_back(
7952 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
7954 for (const auto &Part : Parts) {
7955 SDValue PartValue = Part.first;
7956 SDValue PartOffset = Part.second;
7958 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
7959 MemOpChains.push_back(
7960 DAG.getStore(Chain, DL, PartValue, Address,
7962 }
7963 ArgValue = SpillSlot;
7964 } else {
7965 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
7966 }
7967
7968 // Use local copy if it is a byval arg.
7969 if (Flags.isByVal())
7970 ArgValue = ByValArgs[j++];
7971
7972 if (VA.isRegLoc()) {
7973 // Queue up the argument copies and emit them at the end.
7974 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
7975 } else {
7976 assert(VA.isMemLoc() && "Argument not register or memory");
7977 assert(!IsTailCall && "Tail call not allowed if stack is used "
7978 "for passing parameters");
7979
7980 // Work out the address of the stack slot.
7981 if (!StackPtr.getNode())
7982 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
7984 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
7986
7987 // Emit the store.
7988 MemOpChains.push_back(
7989 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
7990 }
7991 }
7992
7993 // Join the stores, which are independent of one another.
7994 if (!MemOpChains.empty())
7995 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
7996
7997 SDValue Glue;
7998
7999 // Build a sequence of copy-to-reg nodes, chained and glued together.
8000 for (auto &Reg : RegsToPass) {
8001 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8002 Glue = Chain.getValue(1);
8003 }
8004
8005 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8006 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8007 // split it and then direct call can be matched by PseudoCALL.
8009 const GlobalValue *GV = S->getGlobal();
8010 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8013 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8014 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8015 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8018 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8019 }
8020
8021 // The first call operand is the chain and the second is the target address.
8023 Ops.push_back(Chain);
8024 Ops.push_back(Callee);
8025
8026 // Add argument registers to the end of the list so that they are
8027 // known live into the call.
8028 for (auto &Reg : RegsToPass)
8029 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8030
8031 if (!IsTailCall) {
8032 // Add a register mask operand representing the call-preserved registers.
8033 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8034 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8035 assert(Mask && "Missing call preserved mask for calling convention");
8036 Ops.push_back(DAG.getRegisterMask(Mask));
8037 }
8038
8039 // Glue the call to the argument copies, if any.
8040 if (Glue.getNode())
8041 Ops.push_back(Glue);
8042
8043 // Emit the call.
8044 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8045 unsigned Op;
8046 switch (DAG.getTarget().getCodeModel()) {
8047 default:
8048 report_fatal_error("Unsupported code model");
8049 case CodeModel::Small:
8050 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8051 break;
8052 case CodeModel::Medium:
8053 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8055 break;
8056 case CodeModel::Large:
8057 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8059 break;
8060 }
8061
8062 if (IsTailCall) {
8064 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8065 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8066 return Ret;
8067 }
8068
8069 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8070 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8071 Glue = Chain.getValue(1);
8072
8073 // Mark the end of the call, which is glued to the call itself.
8074 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8075 Glue = Chain.getValue(1);
8076
8077 // Assign locations to each value returned by this call.
8079 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8080 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8081
8082 // Copy all of the result registers out of their specified physreg.
8083 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8084 auto &VA = RVLocs[i];
8085 // Copy the value out.
8086 SDValue RetValue =
8087 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8088 // Glue the RetValue to the end of the call sequence.
8089 Chain = RetValue.getValue(1);
8090 Glue = RetValue.getValue(2);
8091
8092 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8093 assert(VA.needsCustom());
8094 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8095 MVT::i32, Glue);
8096 Chain = RetValue2.getValue(1);
8097 Glue = RetValue2.getValue(2);
8098 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8099 RetValue, RetValue2);
8100 } else
8101 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8102
8103 InVals.push_back(RetValue);
8104 }
8105
8106 return Chain;
8107}
8108
8110 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8111 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8112 const Type *RetTy) const {
8114 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8115
8116 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8117 LoongArchABI::ABI ABI =
8118 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8119 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8120 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8121 return false;
8122 }
8123 return true;
8124}
8125
8127 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8129 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8130 SelectionDAG &DAG) const {
8131 // Stores the assignment of the return value to a location.
8133
8134 // Info about the registers and stack slot.
8135 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8136 *DAG.getContext());
8137
8138 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8139 nullptr, CC_LoongArch);
8140 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8141 report_fatal_error("GHC functions return void only");
8142 SDValue Glue;
8143 SmallVector<SDValue, 4> RetOps(1, Chain);
8144
8145 // Copy the result values into the output registers.
8146 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8147 SDValue Val = OutVals[OutIdx];
8148 CCValAssign &VA = RVLocs[i];
8149 assert(VA.isRegLoc() && "Can only return in registers!");
8150
8151 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8152 // Handle returning f64 on LA32D with a soft float ABI.
8153 assert(VA.isRegLoc() && "Expected return via registers");
8154 assert(VA.needsCustom());
8156 DAG.getVTList(MVT::i32, MVT::i32), Val);
8157 SDValue Lo = SplitF64.getValue(0);
8158 SDValue Hi = SplitF64.getValue(1);
8159 Register RegLo = VA.getLocReg();
8160 Register RegHi = RVLocs[++i].getLocReg();
8161
8162 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8163 Glue = Chain.getValue(1);
8164 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8165 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8166 Glue = Chain.getValue(1);
8167 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8168 } else {
8169 // Handle a 'normal' return.
8170 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8171 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8172
8173 // Guarantee that all emitted copies are stuck together.
8174 Glue = Chain.getValue(1);
8175 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8176 }
8177 }
8178
8179 RetOps[0] = Chain; // Update chain.
8180
8181 // Add the glue node if we have it.
8182 if (Glue.getNode())
8183 RetOps.push_back(Glue);
8184
8185 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8186}
8187
8189 EVT VT) const {
8190 if (!Subtarget.hasExtLSX())
8191 return false;
8192
8193 if (VT == MVT::f32) {
8194 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8195 return (masked == 0x3e000000 || masked == 0x40000000);
8196 }
8197
8198 if (VT == MVT::f64) {
8199 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8200 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8201 }
8202
8203 return false;
8204}
8205
8206bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8207 bool ForCodeSize) const {
8208 // TODO: Maybe need more checks here after vector extension is supported.
8209 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8210 return false;
8211 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8212 return false;
8213 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8214}
8215
8217 return true;
8218}
8219
8221 return true;
8222}
8223
8224bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8225 const Instruction *I) const {
8226 if (!Subtarget.is64Bit())
8227 return isa<LoadInst>(I) || isa<StoreInst>(I);
8228
8229 if (isa<LoadInst>(I))
8230 return true;
8231
8232 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8233 // require fences beacuse we can use amswap_db.[w/d].
8234 Type *Ty = I->getOperand(0)->getType();
8235 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8236 unsigned Size = Ty->getIntegerBitWidth();
8237 return (Size == 8 || Size == 16);
8238 }
8239
8240 return false;
8241}
8242
8244 LLVMContext &Context,
8245 EVT VT) const {
8246 if (!VT.isVector())
8247 return getPointerTy(DL);
8249}
8250
8252 // TODO: Support vectors.
8253 return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
8254}
8255
8257 const CallInst &I,
8258 MachineFunction &MF,
8259 unsigned Intrinsic) const {
8260 switch (Intrinsic) {
8261 default:
8262 return false;
8263 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8264 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8265 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8266 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8267 Info.opc = ISD::INTRINSIC_W_CHAIN;
8268 Info.memVT = MVT::i32;
8269 Info.ptrVal = I.getArgOperand(0);
8270 Info.offset = 0;
8271 Info.align = Align(4);
8274 return true;
8275 // TODO: Add more Intrinsics later.
8276 }
8277}
8278
8279// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8280// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8281// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8282// regression, we need to implement it manually.
8285
8287 Op == AtomicRMWInst::And) &&
8288 "Unable to expand");
8289 unsigned MinWordSize = 4;
8290
8291 IRBuilder<> Builder(AI);
8292 LLVMContext &Ctx = Builder.getContext();
8293 const DataLayout &DL = AI->getDataLayout();
8294 Type *ValueType = AI->getType();
8295 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8296
8297 Value *Addr = AI->getPointerOperand();
8298 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8299 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8300
8301 Value *AlignedAddr = Builder.CreateIntrinsic(
8302 Intrinsic::ptrmask, {PtrTy, IntTy},
8303 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8304 "AlignedAddr");
8305
8306 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8307 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8308 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8309 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8310 Value *Mask = Builder.CreateShl(
8311 ConstantInt::get(WordType,
8312 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8313 ShiftAmt, "Mask");
8314 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8315 Value *ValOperand_Shifted =
8316 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8317 ShiftAmt, "ValOperand_Shifted");
8318 Value *NewOperand;
8319 if (Op == AtomicRMWInst::And)
8320 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8321 else
8322 NewOperand = ValOperand_Shifted;
8323
8324 AtomicRMWInst *NewAI =
8325 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8326 AI->getOrdering(), AI->getSyncScopeID());
8327
8328 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8329 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8330 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8331 AI->replaceAllUsesWith(FinalOldResult);
8332 AI->eraseFromParent();
8333}
8334
8337 // TODO: Add more AtomicRMWInst that needs to be extended.
8338
8339 // Since floating-point operation requires a non-trivial set of data
8340 // operations, use CmpXChg to expand.
8341 if (AI->isFloatingPointOperation() ||
8347
8348 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
8351 AI->getOperation() == AtomicRMWInst::Sub)) {
8353 }
8354
8355 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
8356 if (Subtarget.hasLAMCAS()) {
8357 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
8361 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
8363 }
8364
8365 if (Size == 8 || Size == 16)
8368}
8369
8370static Intrinsic::ID
8372 AtomicRMWInst::BinOp BinOp) {
8373 if (GRLen == 64) {
8374 switch (BinOp) {
8375 default:
8376 llvm_unreachable("Unexpected AtomicRMW BinOp");
8378 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
8379 case AtomicRMWInst::Add:
8380 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
8381 case AtomicRMWInst::Sub:
8382 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
8384 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
8386 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
8388 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
8389 case AtomicRMWInst::Max:
8390 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
8391 case AtomicRMWInst::Min:
8392 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
8393 // TODO: support other AtomicRMWInst.
8394 }
8395 }
8396
8397 if (GRLen == 32) {
8398 switch (BinOp) {
8399 default:
8400 llvm_unreachable("Unexpected AtomicRMW BinOp");
8402 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
8403 case AtomicRMWInst::Add:
8404 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
8405 case AtomicRMWInst::Sub:
8406 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
8408 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
8410 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
8412 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
8413 case AtomicRMWInst::Max:
8414 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
8415 case AtomicRMWInst::Min:
8416 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
8417 // TODO: support other AtomicRMWInst.
8418 }
8419 }
8420
8421 llvm_unreachable("Unexpected GRLen\n");
8422}
8423
8426 AtomicCmpXchgInst *CI) const {
8427
8428 if (Subtarget.hasLAMCAS())
8430
8432 if (Size == 8 || Size == 16)
8435}
8436
8438 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
8439 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
8440 unsigned GRLen = Subtarget.getGRLen();
8441 AtomicOrdering FailOrd = CI->getFailureOrdering();
8442 Value *FailureOrdering =
8443 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
8444 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
8445 if (GRLen == 64) {
8446 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
8447 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
8448 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
8449 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8450 }
8451 Type *Tys[] = {AlignedAddr->getType()};
8452 Value *Result = Builder.CreateIntrinsic(
8453 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
8454 if (GRLen == 64)
8455 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8456 return Result;
8457}
8458
8460 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
8461 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
8462 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
8463 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
8464 // mask, as this produces better code than the LL/SC loop emitted by
8465 // int_loongarch_masked_atomicrmw_xchg.
8466 if (AI->getOperation() == AtomicRMWInst::Xchg &&
8469 if (CVal->isZero())
8470 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
8471 Builder.CreateNot(Mask, "Inv_Mask"),
8472 AI->getAlign(), Ord);
8473 if (CVal->isMinusOne())
8474 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
8475 AI->getAlign(), Ord);
8476 }
8477
8478 unsigned GRLen = Subtarget.getGRLen();
8479 Value *Ordering =
8480 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
8481 Type *Tys[] = {AlignedAddr->getType()};
8483 AI->getModule(),
8485
8486 if (GRLen == 64) {
8487 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
8488 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
8489 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
8490 }
8491
8492 Value *Result;
8493
8494 // Must pass the shift amount needed to sign extend the loaded value prior
8495 // to performing a signed comparison for min/max. ShiftAmt is the number of
8496 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
8497 // is the number of bits to left+right shift the value in order to
8498 // sign-extend.
8499 if (AI->getOperation() == AtomicRMWInst::Min ||
8501 const DataLayout &DL = AI->getDataLayout();
8502 unsigned ValWidth =
8503 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
8504 Value *SextShamt =
8505 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
8506 Result = Builder.CreateCall(LlwOpScwLoop,
8507 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
8508 } else {
8509 Result =
8510 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
8511 }
8512
8513 if (GRLen == 64)
8514 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
8515 return Result;
8516}
8517
8519 const MachineFunction &MF, EVT VT) const {
8520 VT = VT.getScalarType();
8521
8522 if (!VT.isSimple())
8523 return false;
8524
8525 switch (VT.getSimpleVT().SimpleTy) {
8526 case MVT::f32:
8527 case MVT::f64:
8528 return true;
8529 default:
8530 break;
8531 }
8532
8533 return false;
8534}
8535
8537 const Constant *PersonalityFn) const {
8538 return LoongArch::R4;
8539}
8540
8542 const Constant *PersonalityFn) const {
8543 return LoongArch::R5;
8544}
8545
8546//===----------------------------------------------------------------------===//
8547// Target Optimization Hooks
8548//===----------------------------------------------------------------------===//
8549
8551 const LoongArchSubtarget &Subtarget) {
8552 // Feature FRECIPE instrucions relative accuracy is 2^-14.
8553 // IEEE float has 23 digits and double has 52 digits.
8554 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
8555 return RefinementSteps;
8556}
8557
8559 SelectionDAG &DAG, int Enabled,
8560 int &RefinementSteps,
8561 bool &UseOneConstNR,
8562 bool Reciprocal) const {
8563 if (Subtarget.hasFrecipe()) {
8564 SDLoc DL(Operand);
8565 EVT VT = Operand.getValueType();
8566
8567 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8568 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8569 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8570 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8571 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8572
8573 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8574 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8575
8576 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
8577 if (Reciprocal)
8578 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
8579
8580 return Estimate;
8581 }
8582 }
8583
8584 return SDValue();
8585}
8586
8588 SelectionDAG &DAG,
8589 int Enabled,
8590 int &RefinementSteps) const {
8591 if (Subtarget.hasFrecipe()) {
8592 SDLoc DL(Operand);
8593 EVT VT = Operand.getValueType();
8594
8595 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
8596 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
8597 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
8598 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
8599 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
8600
8601 if (RefinementSteps == ReciprocalEstimate::Unspecified)
8602 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
8603
8604 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
8605 }
8606 }
8607
8608 return SDValue();
8609}
8610
8611//===----------------------------------------------------------------------===//
8612// LoongArch Inline Assembly Support
8613//===----------------------------------------------------------------------===//
8614
8616LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
8617 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
8618 //
8619 // 'f': A floating-point register (if available).
8620 // 'k': A memory operand whose address is formed by a base register and
8621 // (optionally scaled) index register.
8622 // 'l': A signed 16-bit constant.
8623 // 'm': A memory operand whose address is formed by a base register and
8624 // offset that is suitable for use in instructions with the same
8625 // addressing mode as st.w and ld.w.
8626 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
8627 // instruction)
8628 // 'I': A signed 12-bit constant (for arithmetic instructions).
8629 // 'J': Integer zero.
8630 // 'K': An unsigned 12-bit constant (for logic instructions).
8631 // "ZB": An address that is held in a general-purpose register. The offset is
8632 // zero.
8633 // "ZC": A memory operand whose address is formed by a base register and
8634 // offset that is suitable for use in instructions with the same
8635 // addressing mode as ll.w and sc.w.
8636 if (Constraint.size() == 1) {
8637 switch (Constraint[0]) {
8638 default:
8639 break;
8640 case 'f':
8641 case 'q':
8642 return C_RegisterClass;
8643 case 'l':
8644 case 'I':
8645 case 'J':
8646 case 'K':
8647 return C_Immediate;
8648 case 'k':
8649 return C_Memory;
8650 }
8651 }
8652
8653 if (Constraint == "ZC" || Constraint == "ZB")
8654 return C_Memory;
8655
8656 // 'm' is handled here.
8657 return TargetLowering::getConstraintType(Constraint);
8658}
8659
8660InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
8661 StringRef ConstraintCode) const {
8662 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
8666 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
8667}
8668
8669std::pair<unsigned, const TargetRegisterClass *>
8670LoongArchTargetLowering::getRegForInlineAsmConstraint(
8671 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8672 // First, see if this is a constraint that directly corresponds to a LoongArch
8673 // register class.
8674 if (Constraint.size() == 1) {
8675 switch (Constraint[0]) {
8676 case 'r':
8677 // TODO: Support fixed vectors up to GRLen?
8678 if (VT.isVector())
8679 break;
8680 return std::make_pair(0U, &LoongArch::GPRRegClass);
8681 case 'q':
8682 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
8683 case 'f':
8684 if (Subtarget.hasBasicF() && VT == MVT::f32)
8685 return std::make_pair(0U, &LoongArch::FPR32RegClass);
8686 if (Subtarget.hasBasicD() && VT == MVT::f64)
8687 return std::make_pair(0U, &LoongArch::FPR64RegClass);
8688 if (Subtarget.hasExtLSX() &&
8689 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
8690 return std::make_pair(0U, &LoongArch::LSX128RegClass);
8691 if (Subtarget.hasExtLASX() &&
8692 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
8693 return std::make_pair(0U, &LoongArch::LASX256RegClass);
8694 break;
8695 default:
8696 break;
8697 }
8698 }
8699
8700 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
8701 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
8702 // constraints while the official register name is prefixed with a '$'. So we
8703 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
8704 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
8705 // case insensitive, so no need to convert the constraint to upper case here.
8706 //
8707 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
8708 // decode the usage of register name aliases into their official names. And
8709 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
8710 // official register names.
8711 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
8712 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
8713 bool IsFP = Constraint[2] == 'f';
8714 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
8715 std::pair<unsigned, const TargetRegisterClass *> R;
8717 TRI, join_items("", Temp.first, Temp.second), VT);
8718 // Match those names to the widest floating point register type available.
8719 if (IsFP) {
8720 unsigned RegNo = R.first;
8721 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
8722 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
8723 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
8724 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
8725 }
8726 }
8727 }
8728 return R;
8729 }
8730
8731 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8732}
8733
8734void LoongArchTargetLowering::LowerAsmOperandForConstraint(
8735 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
8736 SelectionDAG &DAG) const {
8737 // Currently only support length 1 constraints.
8738 if (Constraint.size() == 1) {
8739 switch (Constraint[0]) {
8740 case 'l':
8741 // Validate & create a 16-bit signed immediate operand.
8742 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8743 uint64_t CVal = C->getSExtValue();
8744 if (isInt<16>(CVal))
8745 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8746 Subtarget.getGRLenVT()));
8747 }
8748 return;
8749 case 'I':
8750 // Validate & create a 12-bit signed immediate operand.
8751 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8752 uint64_t CVal = C->getSExtValue();
8753 if (isInt<12>(CVal))
8754 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
8755 Subtarget.getGRLenVT()));
8756 }
8757 return;
8758 case 'J':
8759 // Validate & create an integer zero operand.
8760 if (auto *C = dyn_cast<ConstantSDNode>(Op))
8761 if (C->getZExtValue() == 0)
8762 Ops.push_back(
8763 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
8764 return;
8765 case 'K':
8766 // Validate & create a 12-bit unsigned immediate operand.
8767 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
8768 uint64_t CVal = C->getZExtValue();
8769 if (isUInt<12>(CVal))
8770 Ops.push_back(
8771 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
8772 }
8773 return;
8774 default:
8775 break;
8776 }
8777 }
8779}
8780
8781#define GET_REGISTER_MATCHER
8782#include "LoongArchGenAsmMatcher.inc"
8783
8786 const MachineFunction &MF) const {
8787 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
8788 std::string NewRegName = Name.second.str();
8789 Register Reg = MatchRegisterAltName(NewRegName);
8790 if (!Reg)
8791 Reg = MatchRegisterName(NewRegName);
8792 if (!Reg)
8793 return Reg;
8794 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
8795 if (!ReservedRegs.test(Reg))
8796 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
8797 StringRef(RegName) + "\"."));
8798 return Reg;
8799}
8800
8802 EVT VT, SDValue C) const {
8803 // TODO: Support vectors.
8804 if (!VT.isScalarInteger())
8805 return false;
8806
8807 // Omit the optimization if the data size exceeds GRLen.
8808 if (VT.getSizeInBits() > Subtarget.getGRLen())
8809 return false;
8810
8811 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
8812 const APInt &Imm = ConstNode->getAPIntValue();
8813 // Break MUL into (SLLI + ADD/SUB) or ALSL.
8814 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
8815 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
8816 return true;
8817 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
8818 if (ConstNode->hasOneUse() &&
8819 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
8820 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
8821 return true;
8822 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
8823 // in which the immediate has two set bits. Or Break (MUL x, imm)
8824 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
8825 // equals to (1 << s0) - (1 << s1).
8826 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
8827 unsigned Shifts = Imm.countr_zero();
8828 // Reject immediates which can be composed via a single LUI.
8829 if (Shifts >= 12)
8830 return false;
8831 // Reject multiplications can be optimized to
8832 // (SLLI (ALSL x, x, 1/2/3/4), s).
8833 APInt ImmPop = Imm.ashr(Shifts);
8834 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
8835 return false;
8836 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
8837 // since it needs one more instruction than other 3 cases.
8838 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
8839 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
8840 (ImmSmall - Imm).isPowerOf2())
8841 return true;
8842 }
8843 }
8844
8845 return false;
8846}
8847
8849 const AddrMode &AM,
8850 Type *Ty, unsigned AS,
8851 Instruction *I) const {
8852 // LoongArch has four basic addressing modes:
8853 // 1. reg
8854 // 2. reg + 12-bit signed offset
8855 // 3. reg + 14-bit signed offset left-shifted by 2
8856 // 4. reg1 + reg2
8857 // TODO: Add more checks after support vector extension.
8858
8859 // No global is ever allowed as a base.
8860 if (AM.BaseGV)
8861 return false;
8862
8863 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
8864 // with `UAL` feature.
8865 if (!isInt<12>(AM.BaseOffs) &&
8866 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
8867 return false;
8868
8869 switch (AM.Scale) {
8870 case 0:
8871 // "r+i" or just "i", depending on HasBaseReg.
8872 break;
8873 case 1:
8874 // "r+r+i" is not allowed.
8875 if (AM.HasBaseReg && AM.BaseOffs)
8876 return false;
8877 // Otherwise we have "r+r" or "r+i".
8878 break;
8879 case 2:
8880 // "2*r+r" or "2*r+i" is not allowed.
8881 if (AM.HasBaseReg || AM.BaseOffs)
8882 return false;
8883 // Allow "2*r" as "r+r".
8884 break;
8885 default:
8886 return false;
8887 }
8888
8889 return true;
8890}
8891
8893 return isInt<12>(Imm);
8894}
8895
8897 return isInt<12>(Imm);
8898}
8899
8901 // Zexts are free if they can be combined with a load.
8902 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
8903 // poorly with type legalization of compares preferring sext.
8904 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
8905 EVT MemVT = LD->getMemoryVT();
8906 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
8907 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
8908 LD->getExtensionType() == ISD::ZEXTLOAD))
8909 return true;
8910 }
8911
8912 return TargetLowering::isZExtFree(Val, VT2);
8913}
8914
8916 EVT DstVT) const {
8917 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
8918}
8919
8921 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
8922}
8923
8925 // TODO: Support vectors.
8926 if (Y.getValueType().isVector())
8927 return false;
8928
8929 return !isa<ConstantSDNode>(Y);
8930}
8931
8933 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
8934 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
8935}
8936
8938 Type *Ty, bool IsSigned) const {
8939 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
8940 return true;
8941
8942 return IsSigned;
8943}
8944
8946 // Return false to suppress the unnecessary extensions if the LibCall
8947 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
8948 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
8949 Type.getSizeInBits() < Subtarget.getGRLen()))
8950 return false;
8951 return true;
8952}
8953
8954// memcpy, and other memory intrinsics, typically tries to use wider load/store
8955// if the source/dest is aligned and the copy size is large enough. We therefore
8956// want to align such objects passed to memory intrinsics.
8958 unsigned &MinSize,
8959 Align &PrefAlign) const {
8960 if (!isa<MemIntrinsic>(CI))
8961 return false;
8962
8963 if (Subtarget.is64Bit()) {
8964 MinSize = 8;
8965 PrefAlign = Align(8);
8966 } else {
8967 MinSize = 4;
8968 PrefAlign = Align(4);
8969 }
8970
8971 return true;
8972}
8973
8982
8983bool LoongArchTargetLowering::splitValueIntoRegisterParts(
8984 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
8985 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
8986 bool IsABIRegCopy = CC.has_value();
8987 EVT ValueVT = Val.getValueType();
8988
8989 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
8990 PartVT == MVT::f32) {
8991 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
8992 // nan, and cast to f32.
8993 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
8994 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
8995 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
8996 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
8997 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
8998 Parts[0] = Val;
8999 return true;
9000 }
9001
9002 return false;
9003}
9004
9005SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9006 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9007 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9008 bool IsABIRegCopy = CC.has_value();
9009
9010 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9011 PartVT == MVT::f32) {
9012 SDValue Val = Parts[0];
9013
9014 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9015 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9016 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9017 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9018 return Val;
9019 }
9020
9021 return SDValue();
9022}
9023
9024MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9025 CallingConv::ID CC,
9026 EVT VT) const {
9027 // Use f32 to pass f16.
9028 if (VT == MVT::f16 && Subtarget.hasBasicF())
9029 return MVT::f32;
9030
9032}
9033
9034unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9035 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9036 // Use f32 to pass f16.
9037 if (VT == MVT::f16 && Subtarget.hasBasicF())
9038 return 1;
9039
9041}
9042
9044 SDValue Op, const APInt &OriginalDemandedBits,
9045 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9046 unsigned Depth) const {
9047 EVT VT = Op.getValueType();
9048 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9049 unsigned Opc = Op.getOpcode();
9050 switch (Opc) {
9051 default:
9052 break;
9055 SDValue Src = Op.getOperand(0);
9056 MVT SrcVT = Src.getSimpleValueType();
9057 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9058 unsigned NumElts = SrcVT.getVectorNumElements();
9059
9060 // If we don't need the sign bits at all just return zero.
9061 if (OriginalDemandedBits.countr_zero() >= NumElts)
9062 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9063
9064 // Only demand the vector elements of the sign bits we need.
9065 APInt KnownUndef, KnownZero;
9066 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9067 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9068 TLO, Depth + 1))
9069 return true;
9070
9071 Known.Zero = KnownZero.zext(BitWidth);
9072 Known.Zero.setHighBits(BitWidth - NumElts);
9073
9074 // [X]VMSKLTZ only uses the MSB from each vector element.
9075 KnownBits KnownSrc;
9076 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9077 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9078 Depth + 1))
9079 return true;
9080
9081 if (KnownSrc.One[SrcBits - 1])
9082 Known.One.setLowBits(NumElts);
9083 else if (KnownSrc.Zero[SrcBits - 1])
9084 Known.Zero.setLowBits(NumElts);
9085
9086 // Attempt to avoid multi-use ops if we don't need anything from it.
9088 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9089 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9090 return false;
9091 }
9092 }
9093
9095 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9096}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
#define NODE_NAME_CASE(node)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VSHUF.
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:229
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1391
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1639
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1257
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1388
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:286
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:461
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:390
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:303
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:710
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:269
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:528
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:535
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:713
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:262
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:270
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:282
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:270
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:198
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:191
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:376
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:318
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:323
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:331
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:448
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...