LLVM 22.0.0git
LoongArchISelLowering.cpp
Go to the documentation of this file.
1//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that LoongArch uses to lower LLVM code into
10// a selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
15#include "LoongArch.h"
19#include "LoongArchSubtarget.h"
23#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
30#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/IntrinsicsLoongArch.h"
34#include "llvm/Support/Debug.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-isel-lowering"
43
44STATISTIC(NumTailCalls, "Number of tail calls");
45
54
56 "loongarch-materialize-float-imm", cl::Hidden,
57 cl::desc("Maximum number of instructions used (including code sequence "
58 "to generate the value and moving the value to FPR) when "
59 "materializing floating-point immediates (default = 3)"),
61 cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"),
63 "Materialize FP immediate within 2 instructions"),
65 "Materialize FP immediate within 3 instructions"),
67 "Materialize FP immediate within 4 instructions"),
69 "Materialize FP immediate within 5 instructions"),
71 "Materialize FP immediate within 6 instructions "
72 "(behaves same as 5 on loongarch64)")));
73
74static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
75 cl::desc("Trap on integer division by zero."),
76 cl::init(false));
77
79 const LoongArchSubtarget &STI)
80 : TargetLowering(TM, STI), Subtarget(STI) {
81
82 MVT GRLenVT = Subtarget.getGRLenVT();
83
84 // Set up the register classes.
85
86 addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
87 if (Subtarget.hasBasicF())
88 addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
89 if (Subtarget.hasBasicD())
90 addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
91
92 static const MVT::SimpleValueType LSXVTs[] = {
93 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
94 static const MVT::SimpleValueType LASXVTs[] = {
95 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
96
97 if (Subtarget.hasExtLSX())
98 for (MVT VT : LSXVTs)
99 addRegisterClass(VT, &LoongArch::LSX128RegClass);
100
101 if (Subtarget.hasExtLASX())
102 for (MVT VT : LASXVTs)
103 addRegisterClass(VT, &LoongArch::LASX256RegClass);
104
105 // Set operations for LA32 and LA64.
106
108 MVT::i1, Promote);
109
116
119 GRLenVT, Custom);
120
122
123 setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
124 setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
125 setOperationAction(ISD::VASTART, MVT::Other, Custom);
126 setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
127
128 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
129 setOperationAction(ISD::TRAP, MVT::Other, Legal);
130
134
135 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
136
137 // BITREV/REVB requires the 32S feature.
138 if (STI.has32S()) {
139 // Expand bitreverse.i16 with native-width bitrev and shift for now, before
140 // we get to know which of sll and revb.2h is faster.
143
144 // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
145 // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
146 // and i32 could still be byte-swapped relatively cheaply.
148 } else {
156 }
157
158 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
159 setOperationAction(ISD::BR_CC, GRLenVT, Expand);
160 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
164
167
168 // Set operations for LA64 only.
169
170 if (Subtarget.is64Bit()) {
177 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
188
192 Custom);
193 setOperationAction(ISD::LROUND, MVT::i32, Custom);
194 }
195
196 // Set operations for LA32 only.
197
198 if (!Subtarget.is64Bit()) {
204 if (Subtarget.hasBasicD())
205 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
206 }
207
208 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
209
210 static const ISD::CondCode FPCCToExpand[] = {
213
214 // Set operations for 'F' feature.
215
216 if (Subtarget.hasBasicF()) {
217 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
218 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
219 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
220 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
221 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
222
225 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
227 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
228 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
229 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
230 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
235 setOperationAction(ISD::FSIN, MVT::f32, Expand);
236 setOperationAction(ISD::FCOS, MVT::f32, Expand);
237 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
238 setOperationAction(ISD::FPOW, MVT::f32, Expand);
240 setOperationAction(ISD::FP16_TO_FP, MVT::f32,
241 Subtarget.isSoftFPABI() ? LibCall : Custom);
242 setOperationAction(ISD::FP_TO_FP16, MVT::f32,
243 Subtarget.isSoftFPABI() ? LibCall : Custom);
244 setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
245 setOperationAction(ISD::FP_TO_BF16, MVT::f32,
246 Subtarget.isSoftFPABI() ? LibCall : Custom);
247
248 if (Subtarget.is64Bit()) {
249 setOperationAction(ISD::FRINT, MVT::f32, Legal);
250 setOperationAction(ISD::FLOG2, MVT::f32, Legal);
251 }
252
253 if (!Subtarget.hasBasicD()) {
255 if (Subtarget.is64Bit()) {
258 }
259 }
260 }
261
262 // Set operations for 'D' feature.
263
264 if (Subtarget.hasBasicD()) {
265 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
266 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
267 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
268 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
269 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
270 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
271 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
272
275 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
279 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
280 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
281 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
283 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
285 setOperationAction(ISD::FSIN, MVT::f64, Expand);
286 setOperationAction(ISD::FCOS, MVT::f64, Expand);
287 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
288 setOperationAction(ISD::FPOW, MVT::f64, Expand);
290 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
291 setOperationAction(ISD::FP_TO_FP16, MVT::f64,
292 Subtarget.isSoftFPABI() ? LibCall : Custom);
293 setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
294 setOperationAction(ISD::FP_TO_BF16, MVT::f64,
295 Subtarget.isSoftFPABI() ? LibCall : Custom);
296
297 if (Subtarget.is64Bit()) {
298 setOperationAction(ISD::FRINT, MVT::f64, Legal);
299 setOperationAction(ISD::FLOG2, MVT::f64, Legal);
300 }
301 }
302
303 // Set operations for 'LSX' feature.
304
305 if (Subtarget.hasExtLSX()) {
307 // Expand all truncating stores and extending loads.
308 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
309 setTruncStoreAction(VT, InnerVT, Expand);
312 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
313 }
314 // By default everything must be expanded. Then we will selectively turn
315 // on ones that can be effectively codegen'd.
316 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
318 }
319
320 for (MVT VT : LSXVTs) {
321 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
322 setOperationAction(ISD::BITCAST, VT, Legal);
324
328
333 }
334 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
337 Legal);
339 VT, Legal);
346 Expand);
357 }
358 for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
360 for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
362 for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
365 }
366 for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
370 setOperationAction(ISD::FSQRT, VT, Legal);
371 setOperationAction(ISD::FNEG, VT, Legal);
372 setOperationAction(ISD::FLOG2, VT, Legal);
375 VT, Expand);
377 setOperationAction(ISD::FCEIL, VT, Legal);
378 setOperationAction(ISD::FFLOOR, VT, Legal);
379 setOperationAction(ISD::FTRUNC, VT, Legal);
380 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
381 setOperationAction(ISD::FMINNUM, VT, Legal);
382 setOperationAction(ISD::FMAXNUM, VT, Legal);
383 }
385 setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal);
386 setOperationAction(ISD::FFLOOR, {MVT::f32, MVT::f64}, Legal);
387 setOperationAction(ISD::FTRUNC, {MVT::f32, MVT::f64}, Legal);
388 setOperationAction(ISD::FROUNDEVEN, {MVT::f32, MVT::f64}, Legal);
389
390 for (MVT VT :
391 {MVT::v16i8, MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v8i16, MVT::v4i16,
392 MVT::v2i16, MVT::v4i32, MVT::v2i32, MVT::v2i64}) {
394 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
395 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
396 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
397 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
398 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
399 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
400 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
401 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
402 }
403 }
404
405 // Set operations for 'LASX' feature.
406
407 if (Subtarget.hasExtLASX()) {
408 for (MVT VT : LASXVTs) {
409 setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
410 setOperationAction(ISD::BITCAST, VT, Legal);
412
418
422 }
423 for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
426 Legal);
428 VT, Legal);
435 Expand);
444 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
447 }
448 for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
450 for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
452 for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
455 }
456 for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
460 setOperationAction(ISD::FSQRT, VT, Legal);
461 setOperationAction(ISD::FNEG, VT, Legal);
462 setOperationAction(ISD::FLOG2, VT, Legal);
465 VT, Expand);
467 setOperationAction(ISD::FCEIL, VT, Legal);
468 setOperationAction(ISD::FFLOOR, VT, Legal);
469 setOperationAction(ISD::FTRUNC, VT, Legal);
470 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
471 setOperationAction(ISD::FMINNUM, VT, Legal);
472 setOperationAction(ISD::FMAXNUM, VT, Legal);
473 }
474 }
475
476 // Set DAG combine for LA32 and LA64.
477
482
483 // Set DAG combine for 'LSX' feature.
484
485 if (Subtarget.hasExtLSX()) {
487 setTargetDAGCombine(ISD::BITCAST);
488 }
489
490 // Set DAG combine for 'LASX' feature.
491
492 if (Subtarget.hasExtLASX())
494
495 // Compute derived properties from the register classes.
496 computeRegisterProperties(Subtarget.getRegisterInfo());
497
499
502
503 setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
504
506
507 // Function alignments.
509 // Set preferred alignments.
510 setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
511 setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
512 setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
513
514 // cmpxchg sizes down to 8 bits become legal if LAMCAS is available.
515 if (Subtarget.hasLAMCAS())
517
518 if (Subtarget.hasSCQ()) {
520 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
521 }
522}
523
525 const GlobalAddressSDNode *GA) const {
526 // In order to maximise the opportunity for common subexpression elimination,
527 // keep a separate ADD node for the global address offset instead of folding
528 // it in the global address node. Later peephole optimisations may choose to
529 // fold it back in when profitable.
530 return false;
531}
532
534 SelectionDAG &DAG) const {
535 switch (Op.getOpcode()) {
536 case ISD::ATOMIC_FENCE:
537 return lowerATOMIC_FENCE(Op, DAG);
539 return lowerEH_DWARF_CFA(Op, DAG);
541 return lowerGlobalAddress(Op, DAG);
543 return lowerGlobalTLSAddress(Op, DAG);
545 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
547 return lowerINTRINSIC_W_CHAIN(Op, DAG);
549 return lowerINTRINSIC_VOID(Op, DAG);
551 return lowerBlockAddress(Op, DAG);
552 case ISD::JumpTable:
553 return lowerJumpTable(Op, DAG);
554 case ISD::SHL_PARTS:
555 return lowerShiftLeftParts(Op, DAG);
556 case ISD::SRA_PARTS:
557 return lowerShiftRightParts(Op, DAG, true);
558 case ISD::SRL_PARTS:
559 return lowerShiftRightParts(Op, DAG, false);
561 return lowerConstantPool(Op, DAG);
562 case ISD::FP_TO_SINT:
563 return lowerFP_TO_SINT(Op, DAG);
564 case ISD::BITCAST:
565 return lowerBITCAST(Op, DAG);
566 case ISD::UINT_TO_FP:
567 return lowerUINT_TO_FP(Op, DAG);
568 case ISD::SINT_TO_FP:
569 return lowerSINT_TO_FP(Op, DAG);
570 case ISD::VASTART:
571 return lowerVASTART(Op, DAG);
572 case ISD::FRAMEADDR:
573 return lowerFRAMEADDR(Op, DAG);
574 case ISD::RETURNADDR:
575 return lowerRETURNADDR(Op, DAG);
577 return lowerWRITE_REGISTER(Op, DAG);
579 return lowerINSERT_VECTOR_ELT(Op, DAG);
581 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
583 return lowerBUILD_VECTOR(Op, DAG);
585 return lowerCONCAT_VECTORS(Op, DAG);
587 return lowerVECTOR_SHUFFLE(Op, DAG);
588 case ISD::BITREVERSE:
589 return lowerBITREVERSE(Op, DAG);
591 return lowerSCALAR_TO_VECTOR(Op, DAG);
592 case ISD::PREFETCH:
593 return lowerPREFETCH(Op, DAG);
594 case ISD::SELECT:
595 return lowerSELECT(Op, DAG);
596 case ISD::BRCOND:
597 return lowerBRCOND(Op, DAG);
598 case ISD::FP_TO_FP16:
599 return lowerFP_TO_FP16(Op, DAG);
600 case ISD::FP16_TO_FP:
601 return lowerFP16_TO_FP(Op, DAG);
602 case ISD::FP_TO_BF16:
603 return lowerFP_TO_BF16(Op, DAG);
604 case ISD::BF16_TO_FP:
605 return lowerBF16_TO_FP(Op, DAG);
606 case ISD::VECREDUCE_ADD:
607 return lowerVECREDUCE_ADD(Op, DAG);
608 case ISD::ROTL:
609 case ISD::ROTR:
610 return lowerRotate(Op, DAG);
611 case ISD::VECREDUCE_AND:
612 case ISD::VECREDUCE_OR:
613 case ISD::VECREDUCE_XOR:
614 case ISD::VECREDUCE_SMAX:
615 case ISD::VECREDUCE_SMIN:
616 case ISD::VECREDUCE_UMAX:
617 case ISD::VECREDUCE_UMIN:
618 return lowerVECREDUCE(Op, DAG);
619 case ISD::ConstantFP:
620 return lowerConstantFP(Op, DAG);
621 }
622 return SDValue();
623}
624
625// Helper to attempt to return a cheaper, bit-inverted version of \p V.
627 // TODO: don't always ignore oneuse constraints.
628 V = peekThroughBitcasts(V);
629 EVT VT = V.getValueType();
630
631 // Match not(xor X, -1) -> X.
632 if (V.getOpcode() == ISD::XOR &&
633 (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) ||
634 isAllOnesConstant(V.getOperand(1))))
635 return V.getOperand(0);
636
637 // Match not(extract_subvector(not(X)) -> extract_subvector(X).
638 if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
639 (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) {
640 if (SDValue Not = isNOT(V.getOperand(0), DAG)) {
641 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
642 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), VT, Not,
643 V.getOperand(1));
644 }
645 }
646
647 // Match not(SplatVector(not(X)) -> SplatVector(X).
648 if (V.getOpcode() == ISD::BUILD_VECTOR) {
649 if (SDValue SplatValue =
650 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
651 if (!V->isOnlyUserOf(SplatValue.getNode()))
652 return SDValue();
653
654 if (SDValue Not = isNOT(SplatValue, DAG)) {
655 Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not);
656 return DAG.getSplat(VT, SDLoc(Not), Not);
657 }
658 }
659 }
660
661 // Match not(or(not(X),not(Y))) -> and(X, Y).
662 if (V.getOpcode() == ISD::OR && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
663 V.getOperand(0).hasOneUse() && V.getOperand(1).hasOneUse()) {
664 // TODO: Handle cases with single NOT operand -> VANDN
665 if (SDValue Op1 = isNOT(V.getOperand(1), DAG))
666 if (SDValue Op0 = isNOT(V.getOperand(0), DAG))
667 return DAG.getNode(ISD::AND, SDLoc(V), VT, DAG.getBitcast(VT, Op0),
668 DAG.getBitcast(VT, Op1));
669 }
670
671 // TODO: Add more matching patterns. Such as,
672 // not(concat_vectors(not(X), not(Y))) -> concat_vectors(X, Y).
673 // not(slt(C, X)) -> slt(X - 1, C)
674
675 return SDValue();
676}
677
678SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op,
679 SelectionDAG &DAG) const {
680 EVT VT = Op.getValueType();
681 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
682 const APFloat &FPVal = CFP->getValueAPF();
683 SDLoc DL(CFP);
684
685 assert((VT == MVT::f32 && Subtarget.hasBasicF()) ||
686 (VT == MVT::f64 && Subtarget.hasBasicD()));
687
688 // If value is 0.0 or -0.0, just ignore it.
689 if (FPVal.isZero())
690 return SDValue();
691
692 // If lsx enabled, use cheaper 'vldi' instruction if possible.
693 if (isFPImmVLDILegal(FPVal, VT))
694 return SDValue();
695
696 // Construct as integer, and move to float register.
697 APInt INTVal = FPVal.bitcastToAPInt();
698
699 // If more than MaterializeFPImmInsNum instructions will be used to
700 // generate the INTVal and move it to float register, fallback to
701 // use floating point load from the constant pool.
703 int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1);
704 if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0))
705 return SDValue();
706
707 switch (VT.getSimpleVT().SimpleTy) {
708 default:
709 llvm_unreachable("Unexpected floating point type!");
710 break;
711 case MVT::f32: {
712 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i32);
713 if (Subtarget.is64Bit())
714 NewVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, NewVal);
715 return DAG.getNode(Subtarget.is64Bit() ? LoongArchISD::MOVGR2FR_W_LA64
716 : LoongArchISD::MOVGR2FR_W,
717 DL, VT, NewVal);
718 }
719 case MVT::f64: {
720 if (Subtarget.is64Bit()) {
721 SDValue NewVal = DAG.getConstant(INTVal, DL, MVT::i64);
722 return DAG.getNode(LoongArchISD::MOVGR2FR_D, DL, VT, NewVal);
723 }
724 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
725 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
726 return DAG.getNode(LoongArchISD::MOVGR2FR_D_LO_HI, DL, VT, Lo, Hi);
727 }
728 }
729
730 return SDValue();
731}
732
733// Lower vecreduce_add using vhaddw instructions.
734// For Example:
735// call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
736// can be lowered to:
737// VHADDW_D_W vr0, vr0, vr0
738// VHADDW_Q_D vr0, vr0, vr0
739// VPICKVE2GR_D a0, vr0, 0
740// ADDI_W a0, a0, 0
741SDValue LoongArchTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
742 SelectionDAG &DAG) const {
743
744 SDLoc DL(Op);
745 MVT OpVT = Op.getSimpleValueType();
746 SDValue Val = Op.getOperand(0);
747
748 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
749 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
750 unsigned ResBits = OpVT.getScalarSizeInBits();
751
752 unsigned LegalVecSize = 128;
753 bool isLASX256Vector =
754 Subtarget.hasExtLASX() && Val.getValueSizeInBits() == 256;
755
756 // Ensure operand type legal or enable it legal.
757 while (!isTypeLegal(Val.getSimpleValueType())) {
758 Val = DAG.WidenVector(Val, DL);
759 }
760
761 // NumEles is designed for iterations count, v4i32 for LSX
762 // and v8i32 for LASX should have the same count.
763 if (isLASX256Vector) {
764 NumEles /= 2;
765 LegalVecSize = 256;
766 }
767
768 for (unsigned i = 1; i < NumEles; i *= 2, EleBits *= 2) {
769 MVT IntTy = MVT::getIntegerVT(EleBits);
770 MVT VecTy = MVT::getVectorVT(IntTy, LegalVecSize / EleBits);
771 Val = DAG.getNode(LoongArchISD::VHADDW, DL, VecTy, Val, Val);
772 }
773
774 if (isLASX256Vector) {
775 SDValue Tmp = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, Val,
776 DAG.getConstant(2, DL, Subtarget.getGRLenVT()));
777 Val = DAG.getNode(ISD::ADD, DL, MVT::v4i64, Tmp, Val);
778 }
779
780 Val = DAG.getBitcast(MVT::getVectorVT(OpVT, LegalVecSize / ResBits), Val);
781 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
782 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
783}
784
785// Lower vecreduce_and/or/xor/[s/u]max/[s/u]min.
786// For Example:
787// call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a)
788// can be lowered to:
789// VBSRL_V vr1, vr0, 8
790// VMAX_W vr0, vr1, vr0
791// VBSRL_V vr1, vr0, 4
792// VMAX_W vr0, vr1, vr0
793// VPICKVE2GR_W a0, vr0, 0
794// For 256 bit vector, it is illegal and will be spilt into
795// two 128 bit vector by default then processed by this.
796SDValue LoongArchTargetLowering::lowerVECREDUCE(SDValue Op,
797 SelectionDAG &DAG) const {
798 SDLoc DL(Op);
799
800 MVT OpVT = Op.getSimpleValueType();
801 SDValue Val = Op.getOperand(0);
802
803 unsigned NumEles = Val.getSimpleValueType().getVectorNumElements();
804 unsigned EleBits = Val.getSimpleValueType().getScalarSizeInBits();
805
806 // Ensure operand type legal or enable it legal.
807 while (!isTypeLegal(Val.getSimpleValueType())) {
808 Val = DAG.WidenVector(Val, DL);
809 }
810
811 unsigned Opcode = ISD::getVecReduceBaseOpcode(Op.getOpcode());
812 MVT VecTy = Val.getSimpleValueType();
813 MVT GRLenVT = Subtarget.getGRLenVT();
814
815 for (int i = NumEles; i > 1; i /= 2) {
816 SDValue ShiftAmt = DAG.getConstant(i * EleBits / 16, DL, GRLenVT);
817 SDValue Tmp = DAG.getNode(LoongArchISD::VBSRL, DL, VecTy, Val, ShiftAmt);
818 Val = DAG.getNode(Opcode, DL, VecTy, Tmp, Val);
819 }
820
821 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Val,
822 DAG.getConstant(0, DL, GRLenVT));
823}
824
825SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
826 SelectionDAG &DAG) const {
827 unsigned IsData = Op.getConstantOperandVal(4);
828
829 // We don't support non-data prefetch.
830 // Just preserve the chain.
831 if (!IsData)
832 return Op.getOperand(0);
833
834 return Op;
835}
836
837SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
838 SelectionDAG &DAG) const {
839 MVT VT = Op.getSimpleValueType();
840 assert(VT.isVector() && "Unexpected type");
841
842 SDLoc DL(Op);
843 SDValue R = Op.getOperand(0);
844 SDValue Amt = Op.getOperand(1);
845 unsigned Opcode = Op.getOpcode();
846 unsigned EltSizeInBits = VT.getScalarSizeInBits();
847
848 auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
849 if (V.getOpcode() != ISD::BUILD_VECTOR)
850 return false;
851 if (SDValue SplatValue =
852 cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
853 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
854 CstSplatValue = C->getAPIntValue();
855 return true;
856 }
857 }
858 return false;
859 };
860
861 // Check for constant splat rotation amount.
862 APInt CstSplatValue;
863 bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
864 bool isROTL = Opcode == ISD::ROTL;
865
866 // Check for splat rotate by zero.
867 if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
868 return R;
869
870 // LoongArch targets always prefer ISD::ROTR.
871 if (isROTL) {
872 SDValue Zero = DAG.getConstant(0, DL, VT);
873 return DAG.getNode(ISD::ROTR, DL, VT, R,
874 DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
875 }
876
877 // Rotate by a immediate.
878 if (IsCstSplat) {
879 // ISD::ROTR: Attemp to rotate by a positive immediate.
880 SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
881 if (SDValue Urem =
882 DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
883 return DAG.getNode(Opcode, DL, VT, R, Urem);
884 }
885
886 return Op;
887}
888
889// Return true if Val is equal to (setcc LHS, RHS, CC).
890// Return false if Val is the inverse of (setcc LHS, RHS, CC).
891// Otherwise, return std::nullopt.
892static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
893 ISD::CondCode CC, SDValue Val) {
894 assert(Val->getOpcode() == ISD::SETCC);
895 SDValue LHS2 = Val.getOperand(0);
896 SDValue RHS2 = Val.getOperand(1);
897 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
898
899 if (LHS == LHS2 && RHS == RHS2) {
900 if (CC == CC2)
901 return true;
902 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
903 return false;
904 } else if (LHS == RHS2 && RHS == LHS2) {
906 if (CC == CC2)
907 return true;
908 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
909 return false;
910 }
911
912 return std::nullopt;
913}
914
916 const LoongArchSubtarget &Subtarget) {
917 SDValue CondV = N->getOperand(0);
918 SDValue TrueV = N->getOperand(1);
919 SDValue FalseV = N->getOperand(2);
920 MVT VT = N->getSimpleValueType(0);
921 SDLoc DL(N);
922
923 // (select c, -1, y) -> -c | y
924 if (isAllOnesConstant(TrueV)) {
925 SDValue Neg = DAG.getNegative(CondV, DL, VT);
926 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
927 }
928 // (select c, y, -1) -> (c-1) | y
929 if (isAllOnesConstant(FalseV)) {
930 SDValue Neg =
931 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
932 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
933 }
934
935 // (select c, 0, y) -> (c-1) & y
936 if (isNullConstant(TrueV)) {
937 SDValue Neg =
938 DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
939 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
940 }
941 // (select c, y, 0) -> -c & y
942 if (isNullConstant(FalseV)) {
943 SDValue Neg = DAG.getNegative(CondV, DL, VT);
944 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
945 }
946
947 // select c, ~x, x --> xor -c, x
948 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
949 const APInt &TrueVal = TrueV->getAsAPIntVal();
950 const APInt &FalseVal = FalseV->getAsAPIntVal();
951 if (~TrueVal == FalseVal) {
952 SDValue Neg = DAG.getNegative(CondV, DL, VT);
953 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
954 }
955 }
956
957 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
958 // when both truev and falsev are also setcc.
959 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
960 FalseV.getOpcode() == ISD::SETCC) {
961 SDValue LHS = CondV.getOperand(0);
962 SDValue RHS = CondV.getOperand(1);
963 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
964
965 // (select x, x, y) -> x | y
966 // (select !x, x, y) -> x & y
967 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
968 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
969 DAG.getFreeze(FalseV));
970 }
971 // (select x, y, x) -> x & y
972 // (select !x, y, x) -> x | y
973 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
974 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
975 DAG.getFreeze(TrueV), FalseV);
976 }
977 }
978
979 return SDValue();
980}
981
982// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
983// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
984// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
985// being `0` or `-1`. In such cases we can replace `select` with `and`.
986// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
987// than `c0`?
988static SDValue
990 const LoongArchSubtarget &Subtarget) {
991 unsigned SelOpNo = 0;
992 SDValue Sel = BO->getOperand(0);
993 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
994 SelOpNo = 1;
995 Sel = BO->getOperand(1);
996 }
997
998 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
999 return SDValue();
1000
1001 unsigned ConstSelOpNo = 1;
1002 unsigned OtherSelOpNo = 2;
1003 if (!isa<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
1004 ConstSelOpNo = 2;
1005 OtherSelOpNo = 1;
1006 }
1007 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
1008 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
1009 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
1010 return SDValue();
1011
1012 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
1013 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
1014 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
1015 return SDValue();
1016
1017 SDLoc DL(Sel);
1018 EVT VT = BO->getValueType(0);
1019
1020 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
1021 if (SelOpNo == 1)
1022 std::swap(NewConstOps[0], NewConstOps[1]);
1023
1024 SDValue NewConstOp =
1025 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
1026 if (!NewConstOp)
1027 return SDValue();
1028
1029 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
1030 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
1031 return SDValue();
1032
1033 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
1034 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
1035 if (SelOpNo == 1)
1036 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
1037 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
1038
1039 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
1040 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
1041 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
1042}
1043
1044// Changes the condition code and swaps operands if necessary, so the SetCC
1045// operation matches one of the comparisons supported directly by branches
1046// in the LoongArch ISA. May adjust compares to favor compare with 0 over
1047// compare with 1/-1.
1049 ISD::CondCode &CC, SelectionDAG &DAG) {
1050 // If this is a single bit test that can't be handled by ANDI, shift the
1051 // bit to be tested to the MSB and perform a signed compare with 0.
1052 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
1053 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
1054 isa<ConstantSDNode>(LHS.getOperand(1))) {
1055 uint64_t Mask = LHS.getConstantOperandVal(1);
1056 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
1057 unsigned ShAmt = 0;
1058 if (isPowerOf2_64(Mask)) {
1059 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
1060 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
1061 } else {
1062 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
1063 }
1064
1065 LHS = LHS.getOperand(0);
1066 if (ShAmt != 0)
1067 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
1068 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
1069 return;
1070 }
1071 }
1072
1073 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1074 int64_t C = RHSC->getSExtValue();
1075 switch (CC) {
1076 default:
1077 break;
1078 case ISD::SETGT:
1079 // Convert X > -1 to X >= 0.
1080 if (C == -1) {
1081 RHS = DAG.getConstant(0, DL, RHS.getValueType());
1082 CC = ISD::SETGE;
1083 return;
1084 }
1085 break;
1086 case ISD::SETLT:
1087 // Convert X < 1 to 0 >= X.
1088 if (C == 1) {
1089 RHS = LHS;
1090 LHS = DAG.getConstant(0, DL, RHS.getValueType());
1091 CC = ISD::SETGE;
1092 return;
1093 }
1094 break;
1095 }
1096 }
1097
1098 switch (CC) {
1099 default:
1100 break;
1101 case ISD::SETGT:
1102 case ISD::SETLE:
1103 case ISD::SETUGT:
1104 case ISD::SETULE:
1106 std::swap(LHS, RHS);
1107 break;
1108 }
1109}
1110
1111SDValue LoongArchTargetLowering::lowerSELECT(SDValue Op,
1112 SelectionDAG &DAG) const {
1113 SDValue CondV = Op.getOperand(0);
1114 SDValue TrueV = Op.getOperand(1);
1115 SDValue FalseV = Op.getOperand(2);
1116 SDLoc DL(Op);
1117 MVT VT = Op.getSimpleValueType();
1118 MVT GRLenVT = Subtarget.getGRLenVT();
1119
1120 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
1121 return V;
1122
1123 if (Op.hasOneUse()) {
1124 unsigned UseOpc = Op->user_begin()->getOpcode();
1125 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
1126 SDNode *BinOp = *Op->user_begin();
1127 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
1128 DAG, Subtarget)) {
1129 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
1130 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
1131 // may return a constant node and cause crash in lowerSELECT.
1132 if (NewSel.getOpcode() == ISD::SELECT)
1133 return lowerSELECT(NewSel, DAG);
1134 return NewSel;
1135 }
1136 }
1137 }
1138
1139 // If the condition is not an integer SETCC which operates on GRLenVT, we need
1140 // to emit a LoongArchISD::SELECT_CC comparing the condition to zero. i.e.:
1141 // (select condv, truev, falsev)
1142 // -> (loongarchisd::select_cc condv, zero, setne, truev, falsev)
1143 if (CondV.getOpcode() != ISD::SETCC ||
1144 CondV.getOperand(0).getSimpleValueType() != GRLenVT) {
1145 SDValue Zero = DAG.getConstant(0, DL, GRLenVT);
1146 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
1147
1148 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1149
1150 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1151 }
1152
1153 // If the CondV is the output of a SETCC node which operates on GRLenVT
1154 // inputs, then merge the SETCC node into the lowered LoongArchISD::SELECT_CC
1155 // to take advantage of the integer compare+branch instructions. i.e.: (select
1156 // (setcc lhs, rhs, cc), truev, falsev)
1157 // -> (loongarchisd::select_cc lhs, rhs, cc, truev, falsev)
1158 SDValue LHS = CondV.getOperand(0);
1159 SDValue RHS = CondV.getOperand(1);
1160 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1161
1162 // Special case for a select of 2 constants that have a difference of 1.
1163 // Normally this is done by DAGCombine, but if the select is introduced by
1164 // type legalization or op legalization, we miss it. Restricting to SETLT
1165 // case for now because that is what signed saturating add/sub need.
1166 // FIXME: We don't need the condition to be SETLT or even a SETCC,
1167 // but we would probably want to swap the true/false values if the condition
1168 // is SETGE/SETLE to avoid an XORI.
1169 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
1170 CCVal == ISD::SETLT) {
1171 const APInt &TrueVal = TrueV->getAsAPIntVal();
1172 const APInt &FalseVal = FalseV->getAsAPIntVal();
1173 if (TrueVal - 1 == FalseVal)
1174 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
1175 if (TrueVal + 1 == FalseVal)
1176 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
1177 }
1178
1179 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1180 // 1 < x ? x : 1 -> 0 < x ? x : 1
1181 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
1182 RHS == TrueV && LHS == FalseV) {
1183 LHS = DAG.getConstant(0, DL, VT);
1184 // 0 <u x is the same as x != 0.
1185 if (CCVal == ISD::SETULT) {
1186 std::swap(LHS, RHS);
1187 CCVal = ISD::SETNE;
1188 }
1189 }
1190
1191 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
1192 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
1193 RHS == FalseV) {
1194 RHS = DAG.getConstant(0, DL, VT);
1195 }
1196
1197 SDValue TargetCC = DAG.getCondCode(CCVal);
1198
1199 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
1200 // (select (setcc lhs, rhs, CC), constant, falsev)
1201 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
1202 std::swap(TrueV, FalseV);
1203 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
1204 }
1205
1206 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1207 return DAG.getNode(LoongArchISD::SELECT_CC, DL, VT, Ops);
1208}
1209
1210SDValue LoongArchTargetLowering::lowerBRCOND(SDValue Op,
1211 SelectionDAG &DAG) const {
1212 SDValue CondV = Op.getOperand(1);
1213 SDLoc DL(Op);
1214 MVT GRLenVT = Subtarget.getGRLenVT();
1215
1216 if (CondV.getOpcode() == ISD::SETCC) {
1217 if (CondV.getOperand(0).getValueType() == GRLenVT) {
1218 SDValue LHS = CondV.getOperand(0);
1219 SDValue RHS = CondV.getOperand(1);
1220 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
1221
1222 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
1223
1224 SDValue TargetCC = DAG.getCondCode(CCVal);
1225 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1226 Op.getOperand(0), LHS, RHS, TargetCC,
1227 Op.getOperand(2));
1228 } else if (CondV.getOperand(0).getValueType().isFloatingPoint()) {
1229 return DAG.getNode(LoongArchISD::BRCOND, DL, Op.getValueType(),
1230 Op.getOperand(0), CondV, Op.getOperand(2));
1231 }
1232 }
1233
1234 return DAG.getNode(LoongArchISD::BR_CC, DL, Op.getValueType(),
1235 Op.getOperand(0), CondV, DAG.getConstant(0, DL, GRLenVT),
1236 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
1237}
1238
1239SDValue
1240LoongArchTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
1241 SelectionDAG &DAG) const {
1242 SDLoc DL(Op);
1243 MVT OpVT = Op.getSimpleValueType();
1244
1245 SDValue Vector = DAG.getUNDEF(OpVT);
1246 SDValue Val = Op.getOperand(0);
1247 SDValue Idx = DAG.getConstant(0, DL, Subtarget.getGRLenVT());
1248
1249 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, OpVT, Vector, Val, Idx);
1250}
1251
1252SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
1253 SelectionDAG &DAG) const {
1254 EVT ResTy = Op->getValueType(0);
1255 SDValue Src = Op->getOperand(0);
1256 SDLoc DL(Op);
1257
1258 // LoongArchISD::BITREV_8B is not supported on LA32.
1259 if (!Subtarget.is64Bit() && (ResTy == MVT::v16i8 || ResTy == MVT::v32i8))
1260 return SDValue();
1261
1262 EVT NewVT = ResTy.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
1263 unsigned int OrigEltNum = ResTy.getVectorNumElements();
1264 unsigned int NewEltNum = NewVT.getVectorNumElements();
1265
1266 SDValue NewSrc = DAG.getNode(ISD::BITCAST, DL, NewVT, Src);
1267
1269 for (unsigned int i = 0; i < NewEltNum; i++) {
1270 SDValue Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, NewSrc,
1271 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1272 unsigned RevOp = (ResTy == MVT::v16i8 || ResTy == MVT::v32i8)
1273 ? (unsigned)LoongArchISD::BITREV_8B
1274 : (unsigned)ISD::BITREVERSE;
1275 Ops.push_back(DAG.getNode(RevOp, DL, MVT::i64, Op));
1276 }
1277 SDValue Res =
1278 DAG.getNode(ISD::BITCAST, DL, ResTy, DAG.getBuildVector(NewVT, DL, Ops));
1279
1280 switch (ResTy.getSimpleVT().SimpleTy) {
1281 default:
1282 return SDValue();
1283 case MVT::v16i8:
1284 case MVT::v32i8:
1285 return Res;
1286 case MVT::v8i16:
1287 case MVT::v16i16:
1288 case MVT::v4i32:
1289 case MVT::v8i32: {
1291 for (unsigned int i = 0; i < NewEltNum; i++)
1292 for (int j = OrigEltNum / NewEltNum - 1; j >= 0; j--)
1293 Mask.push_back(j + (OrigEltNum / NewEltNum) * i);
1294 return DAG.getVectorShuffle(ResTy, DL, Res, DAG.getUNDEF(ResTy), Mask);
1295 }
1296 }
1297}
1298
1299// Widen element type to get a new mask value (if possible).
1300// For example:
1301// shufflevector <4 x i32> %a, <4 x i32> %b,
1302// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
1303// is equivalent to:
1304// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
1305// can be lowered to:
1306// VPACKOD_D vr0, vr0, vr1
1308 SDValue V1, SDValue V2, SelectionDAG &DAG) {
1309 unsigned EltBits = VT.getScalarSizeInBits();
1310
1311 if (EltBits > 32 || EltBits == 1)
1312 return SDValue();
1313
1314 SmallVector<int, 8> NewMask;
1315 if (widenShuffleMaskElts(Mask, NewMask)) {
1316 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(EltBits * 2)
1317 : MVT::getIntegerVT(EltBits * 2);
1318 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
1319 if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
1320 SDValue NewV1 = DAG.getBitcast(NewVT, V1);
1321 SDValue NewV2 = DAG.getBitcast(NewVT, V2);
1322 return DAG.getBitcast(
1323 VT, DAG.getVectorShuffle(NewVT, DL, NewV1, NewV2, NewMask));
1324 }
1325 }
1326
1327 return SDValue();
1328}
1329
1330/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
1331/// instruction.
1332// The funciton matches elements from one of the input vector shuffled to the
1333// left or right with zeroable elements 'shifted in'. It handles both the
1334// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
1335// lane.
1336// Mostly copied from X86.
1337static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
1338 unsigned ScalarSizeInBits, ArrayRef<int> Mask,
1339 int MaskOffset, const APInt &Zeroable) {
1340 int Size = Mask.size();
1341 unsigned SizeInBits = Size * ScalarSizeInBits;
1342
1343 auto CheckZeros = [&](int Shift, int Scale, bool Left) {
1344 for (int i = 0; i < Size; i += Scale)
1345 for (int j = 0; j < Shift; ++j)
1346 if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
1347 return false;
1348
1349 return true;
1350 };
1351
1352 auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
1353 int Step = 1) {
1354 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
1355 if (!(Mask[i] == -1 || Mask[i] == Low))
1356 return false;
1357 return true;
1358 };
1359
1360 auto MatchShift = [&](int Shift, int Scale, bool Left) {
1361 for (int i = 0; i != Size; i += Scale) {
1362 unsigned Pos = Left ? i + Shift : i;
1363 unsigned Low = Left ? i : i + Shift;
1364 unsigned Len = Scale - Shift;
1365 if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
1366 return -1;
1367 }
1368
1369 int ShiftEltBits = ScalarSizeInBits * Scale;
1370 bool ByteShift = ShiftEltBits > 64;
1371 Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
1372 : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
1373 int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
1374
1375 // Normalize the scale for byte shifts to still produce an i64 element
1376 // type.
1377 Scale = ByteShift ? Scale / 2 : Scale;
1378
1379 // We need to round trip through the appropriate type for the shift.
1380 MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
1381 ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
1382 : MVT::getVectorVT(ShiftSVT, Size / Scale);
1383 return (int)ShiftAmt;
1384 };
1385
1386 unsigned MaxWidth = 128;
1387 for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
1388 for (int Shift = 1; Shift != Scale; ++Shift)
1389 for (bool Left : {true, false})
1390 if (CheckZeros(Shift, Scale, Left)) {
1391 int ShiftAmt = MatchShift(Shift, Scale, Left);
1392 if (0 < ShiftAmt)
1393 return ShiftAmt;
1394 }
1395
1396 // no match
1397 return -1;
1398}
1399
1400/// Lower VECTOR_SHUFFLE as shift (if possible).
1401///
1402/// For example:
1403/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1404/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
1405/// is lowered to:
1406/// (VBSLL_V $v0, $v0, 4)
1407///
1408/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1409/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
1410/// is lowered to:
1411/// (VSLLI_D $v0, $v0, 32)
1413 MVT VT, SDValue V1, SDValue V2,
1414 SelectionDAG &DAG,
1415 const LoongArchSubtarget &Subtarget,
1416 const APInt &Zeroable) {
1417 int Size = Mask.size();
1418 assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
1419
1420 MVT ShiftVT;
1421 SDValue V = V1;
1422 unsigned Opcode;
1423
1424 // Try to match shuffle against V1 shift.
1425 int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1426 Mask, 0, Zeroable);
1427
1428 // If V1 failed, try to match shuffle against V2 shift.
1429 if (ShiftAmt < 0) {
1430 ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
1431 Mask, Size, Zeroable);
1432 V = V2;
1433 }
1434
1435 if (ShiftAmt < 0)
1436 return SDValue();
1437
1438 assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
1439 "Illegal integer vector type");
1440 V = DAG.getBitcast(ShiftVT, V);
1441 V = DAG.getNode(Opcode, DL, ShiftVT, V,
1442 DAG.getConstant(ShiftAmt, DL, Subtarget.getGRLenVT()));
1443 return DAG.getBitcast(VT, V);
1444}
1445
1446/// Determine whether a range fits a regular pattern of values.
1447/// This function accounts for the possibility of jumping over the End iterator.
1448template <typename ValType>
1449static bool
1451 unsigned CheckStride,
1453 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
1454 auto &I = Begin;
1455
1456 while (I != End) {
1457 if (*I != -1 && *I != ExpectedIndex)
1458 return false;
1459 ExpectedIndex += ExpectedIndexStride;
1460
1461 // Incrementing past End is undefined behaviour so we must increment one
1462 // step at a time and check for End at each step.
1463 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
1464 ; // Empty loop body.
1465 }
1466 return true;
1467}
1468
1469/// Compute whether each element of a shuffle is zeroable.
1470///
1471/// A "zeroable" vector shuffle element is one which can be lowered to zero.
1473 SDValue V2, APInt &KnownUndef,
1474 APInt &KnownZero) {
1475 int Size = Mask.size();
1476 KnownUndef = KnownZero = APInt::getZero(Size);
1477
1478 V1 = peekThroughBitcasts(V1);
1479 V2 = peekThroughBitcasts(V2);
1480
1481 bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
1482 bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
1483
1484 int VectorSizeInBits = V1.getValueSizeInBits();
1485 int ScalarSizeInBits = VectorSizeInBits / Size;
1486 assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
1487 (void)ScalarSizeInBits;
1488
1489 for (int i = 0; i < Size; ++i) {
1490 int M = Mask[i];
1491 if (M < 0) {
1492 KnownUndef.setBit(i);
1493 continue;
1494 }
1495 if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
1496 KnownZero.setBit(i);
1497 continue;
1498 }
1499 }
1500}
1501
1502/// Test whether a shuffle mask is equivalent within each sub-lane.
1503///
1504/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
1505/// non-trivial to compute in the face of undef lanes. The representation is
1506/// suitable for use with existing 128-bit shuffles as entries from the second
1507/// vector have been remapped to [LaneSize, 2*LaneSize).
1508static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
1509 ArrayRef<int> Mask,
1510 SmallVectorImpl<int> &RepeatedMask) {
1511 auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
1512 RepeatedMask.assign(LaneSize, -1);
1513 int Size = Mask.size();
1514 for (int i = 0; i < Size; ++i) {
1515 assert(Mask[i] == -1 || Mask[i] >= 0);
1516 if (Mask[i] < 0)
1517 continue;
1518 if ((Mask[i] % Size) / LaneSize != i / LaneSize)
1519 // This entry crosses lanes, so there is no way to model this shuffle.
1520 return false;
1521
1522 // Ok, handle the in-lane shuffles by detecting if and when they repeat.
1523 // Adjust second vector indices to start at LaneSize instead of Size.
1524 int LocalM =
1525 Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
1526 if (RepeatedMask[i % LaneSize] < 0)
1527 // This is the first non-undef entry in this slot of a 128-bit lane.
1528 RepeatedMask[i % LaneSize] = LocalM;
1529 else if (RepeatedMask[i % LaneSize] != LocalM)
1530 // Found a mismatch with the repeated mask.
1531 return false;
1532 }
1533 return true;
1534}
1535
1536/// Attempts to match vector shuffle as byte rotation.
1538 ArrayRef<int> Mask) {
1539
1540 SDValue Lo, Hi;
1541 SmallVector<int, 16> RepeatedMask;
1542
1543 if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
1544 return -1;
1545
1546 int NumElts = RepeatedMask.size();
1547 int Rotation = 0;
1548 int Scale = 16 / NumElts;
1549
1550 for (int i = 0; i < NumElts; ++i) {
1551 int M = RepeatedMask[i];
1552 assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
1553 "Unexpected mask index.");
1554 if (M < 0)
1555 continue;
1556
1557 // Determine where a rotated vector would have started.
1558 int StartIdx = i - (M % NumElts);
1559 if (StartIdx == 0)
1560 return -1;
1561
1562 // If we found the tail of a vector the rotation must be the missing
1563 // front. If we found the head of a vector, it must be how much of the
1564 // head.
1565 int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
1566
1567 if (Rotation == 0)
1568 Rotation = CandidateRotation;
1569 else if (Rotation != CandidateRotation)
1570 return -1;
1571
1572 // Compute which value this mask is pointing at.
1573 SDValue MaskV = M < NumElts ? V1 : V2;
1574
1575 // Compute which of the two target values this index should be assigned
1576 // to. This reflects whether the high elements are remaining or the low
1577 // elements are remaining.
1578 SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
1579
1580 // Either set up this value if we've not encountered it before, or check
1581 // that it remains consistent.
1582 if (!TargetV)
1583 TargetV = MaskV;
1584 else if (TargetV != MaskV)
1585 return -1;
1586 }
1587
1588 // Check that we successfully analyzed the mask, and normalize the results.
1589 assert(Rotation != 0 && "Failed to locate a viable rotation!");
1590 assert((Lo || Hi) && "Failed to find a rotated input vector!");
1591 if (!Lo)
1592 Lo = Hi;
1593 else if (!Hi)
1594 Hi = Lo;
1595
1596 V1 = Lo;
1597 V2 = Hi;
1598
1599 return Rotation * Scale;
1600}
1601
1602/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
1603///
1604/// For example:
1605/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
1606/// <2 x i32> <i32 3, i32 0>
1607/// is lowered to:
1608/// (VBSRL_V $v1, $v1, 8)
1609/// (VBSLL_V $v0, $v0, 8)
1610/// (VOR_V $v0, $V0, $v1)
1611static SDValue
1613 SDValue V1, SDValue V2, SelectionDAG &DAG,
1614 const LoongArchSubtarget &Subtarget) {
1615
1616 SDValue Lo = V1, Hi = V2;
1617 int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
1618 if (ByteRotation <= 0)
1619 return SDValue();
1620
1621 MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
1622 Lo = DAG.getBitcast(ByteVT, Lo);
1623 Hi = DAG.getBitcast(ByteVT, Hi);
1624
1625 int LoByteShift = 16 - ByteRotation;
1626 int HiByteShift = ByteRotation;
1627 MVT GRLenVT = Subtarget.getGRLenVT();
1628
1629 SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
1630 DAG.getConstant(LoByteShift, DL, GRLenVT));
1631 SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
1632 DAG.getConstant(HiByteShift, DL, GRLenVT));
1633 return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
1634}
1635
1636/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
1637///
1638/// For example:
1639/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
1640/// <4 x i32> <i32 0, i32 4, i32 1, i32 4>
1641/// %3 = bitcast <4 x i32> %2 to <2 x i64>
1642/// is lowered to:
1643/// (VREPLI $v1, 0)
1644/// (VILVL $v0, $v1, $v0)
1646 ArrayRef<int> Mask, MVT VT,
1647 SDValue V1, SDValue V2,
1648 SelectionDAG &DAG,
1649 const APInt &Zeroable) {
1650 int Bits = VT.getSizeInBits();
1651 int EltBits = VT.getScalarSizeInBits();
1652 int NumElements = VT.getVectorNumElements();
1653
1654 if (Zeroable.isAllOnes())
1655 return DAG.getConstant(0, DL, VT);
1656
1657 // Define a helper function to check a particular ext-scale and lower to it if
1658 // valid.
1659 auto Lower = [&](int Scale) -> SDValue {
1660 SDValue InputV;
1661 bool AnyExt = true;
1662 int Offset = 0;
1663 for (int i = 0; i < NumElements; i++) {
1664 int M = Mask[i];
1665 if (M < 0)
1666 continue;
1667 if (i % Scale != 0) {
1668 // Each of the extended elements need to be zeroable.
1669 if (!Zeroable[i])
1670 return SDValue();
1671
1672 AnyExt = false;
1673 continue;
1674 }
1675
1676 // Each of the base elements needs to be consecutive indices into the
1677 // same input vector.
1678 SDValue V = M < NumElements ? V1 : V2;
1679 M = M % NumElements;
1680 if (!InputV) {
1681 InputV = V;
1682 Offset = M - (i / Scale);
1683
1684 // These offset can't be handled
1685 if (Offset % (NumElements / Scale))
1686 return SDValue();
1687 } else if (InputV != V)
1688 return SDValue();
1689
1690 if (M != (Offset + (i / Scale)))
1691 return SDValue(); // Non-consecutive strided elements.
1692 }
1693
1694 // If we fail to find an input, we have a zero-shuffle which should always
1695 // have already been handled.
1696 if (!InputV)
1697 return SDValue();
1698
1699 do {
1700 unsigned VilVLoHi = LoongArchISD::VILVL;
1701 if (Offset >= (NumElements / 2)) {
1702 VilVLoHi = LoongArchISD::VILVH;
1703 Offset -= (NumElements / 2);
1704 }
1705
1706 MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
1707 SDValue Ext =
1708 AnyExt ? DAG.getFreeze(InputV) : DAG.getConstant(0, DL, InputVT);
1709 InputV = DAG.getBitcast(InputVT, InputV);
1710 InputV = DAG.getNode(VilVLoHi, DL, InputVT, Ext, InputV);
1711 Scale /= 2;
1712 EltBits *= 2;
1713 NumElements /= 2;
1714 } while (Scale > 1);
1715 return DAG.getBitcast(VT, InputV);
1716 };
1717
1718 // Each iteration, try extending the elements half as much, but into twice as
1719 // many elements.
1720 for (int NumExtElements = Bits / 64; NumExtElements < NumElements;
1721 NumExtElements *= 2) {
1722 if (SDValue V = Lower(NumElements / NumExtElements))
1723 return V;
1724 }
1725 return SDValue();
1726}
1727
1728/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
1729///
1730/// VREPLVEI performs vector broadcast based on an element specified by an
1731/// integer immediate, with its mask being similar to:
1732/// <x, x, x, ...>
1733/// where x is any valid index.
1734///
1735/// When undef's appear in the mask they are treated as if they were whatever
1736/// value is necessary in order to fit the above form.
1737static SDValue
1739 SDValue V1, SelectionDAG &DAG,
1740 const LoongArchSubtarget &Subtarget) {
1741 int SplatIndex = -1;
1742 for (const auto &M : Mask) {
1743 if (M != -1) {
1744 SplatIndex = M;
1745 break;
1746 }
1747 }
1748
1749 if (SplatIndex == -1)
1750 return DAG.getUNDEF(VT);
1751
1752 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
1753 if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
1754 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
1755 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
1756 }
1757
1758 return SDValue();
1759}
1760
1761/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
1762///
1763/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
1764/// elements according to a <4 x i2> constant (encoded as an integer immediate).
1765///
1766/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
1767/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
1768/// When undef's appear they are treated as if they were whatever value is
1769/// necessary in order to fit the above forms.
1770///
1771/// For example:
1772/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
1773/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
1774/// i32 7, i32 6, i32 5, i32 4>
1775/// is lowered to:
1776/// (VSHUF4I_H $v0, $v1, 27)
1777/// where the 27 comes from:
1778/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
1779static SDValue
1781 SDValue V1, SDValue V2, SelectionDAG &DAG,
1782 const LoongArchSubtarget &Subtarget) {
1783
1784 unsigned SubVecSize = 4;
1785 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1786 SubVecSize = 2;
1787
1788 int SubMask[4] = {-1, -1, -1, -1};
1789 for (unsigned i = 0; i < SubVecSize; ++i) {
1790 for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
1791 int M = Mask[j];
1792
1793 // Convert from vector index to 4-element subvector index
1794 // If an index refers to an element outside of the subvector then give up
1795 if (M != -1) {
1796 M -= 4 * (j / SubVecSize);
1797 if (M < 0 || M >= 4)
1798 return SDValue();
1799 }
1800
1801 // If the mask has an undef, replace it with the current index.
1802 // Note that it might still be undef if the current index is also undef
1803 if (SubMask[i] == -1)
1804 SubMask[i] = M;
1805 // Check that non-undef values are the same as in the mask. If they
1806 // aren't then give up
1807 else if (M != -1 && M != SubMask[i])
1808 return SDValue();
1809 }
1810 }
1811
1812 // Calculate the immediate. Replace any remaining undefs with zero
1813 int Imm = 0;
1814 for (int i = SubVecSize - 1; i >= 0; --i) {
1815 int M = SubMask[i];
1816
1817 if (M == -1)
1818 M = 0;
1819
1820 Imm <<= 2;
1821 Imm |= M & 0x3;
1822 }
1823
1824 MVT GRLenVT = Subtarget.getGRLenVT();
1825
1826 // Return vshuf4i.d
1827 if (VT == MVT::v2f64 || VT == MVT::v2i64)
1828 return DAG.getNode(LoongArchISD::VSHUF4I_D, DL, VT, V1, V2,
1829 DAG.getConstant(Imm, DL, GRLenVT));
1830
1831 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
1832 DAG.getConstant(Imm, DL, GRLenVT));
1833}
1834
1835/// Lower VECTOR_SHUFFLE whose result is the reversed source vector.
1836///
1837/// It is possible to do optimization for VECTOR_SHUFFLE performing vector
1838/// reverse whose mask likes:
1839/// <7, 6, 5, 4, 3, 2, 1, 0>
1840///
1841/// When undef's appear in the mask they are treated as if they were whatever
1842/// value is necessary in order to fit the above forms.
1843static SDValue
1845 SDValue V1, SelectionDAG &DAG,
1846 const LoongArchSubtarget &Subtarget) {
1847 // Only vectors with i8/i16 elements which cannot match other patterns
1848 // directly needs to do this.
1849 if (VT != MVT::v16i8 && VT != MVT::v8i16 && VT != MVT::v32i8 &&
1850 VT != MVT::v16i16)
1851 return SDValue();
1852
1853 if (!ShuffleVectorInst::isReverseMask(Mask, Mask.size()))
1854 return SDValue();
1855
1856 int WidenNumElts = VT.getVectorNumElements() / 4;
1857 SmallVector<int, 16> WidenMask(WidenNumElts, -1);
1858 for (int i = 0; i < WidenNumElts; ++i)
1859 WidenMask[i] = WidenNumElts - 1 - i;
1860
1861 MVT WidenVT = MVT::getVectorVT(
1862 VT.getVectorElementType() == MVT::i8 ? MVT::i32 : MVT::i64, WidenNumElts);
1863 SDValue NewV1 = DAG.getBitcast(WidenVT, V1);
1864 SDValue WidenRev = DAG.getVectorShuffle(WidenVT, DL, NewV1,
1865 DAG.getUNDEF(WidenVT), WidenMask);
1866
1867 return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT,
1868 DAG.getBitcast(VT, WidenRev),
1869 DAG.getConstant(27, DL, Subtarget.getGRLenVT()));
1870}
1871
1872/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
1873///
1874/// VPACKEV interleaves the even elements from each vector.
1875///
1876/// It is possible to lower into VPACKEV when the mask consists of two of the
1877/// following forms interleaved:
1878/// <0, 2, 4, ...>
1879/// <n, n+2, n+4, ...>
1880/// where n is the number of elements in the vector.
1881/// For example:
1882/// <0, 0, 2, 2, 4, 4, ...>
1883/// <0, n, 2, n+2, 4, n+4, ...>
1884///
1885/// When undef's appear in the mask they are treated as if they were whatever
1886/// value is necessary in order to fit the above forms.
1888 MVT VT, SDValue V1, SDValue V2,
1889 SelectionDAG &DAG) {
1890
1891 const auto &Begin = Mask.begin();
1892 const auto &End = Mask.end();
1893 SDValue OriV1 = V1, OriV2 = V2;
1894
1895 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
1896 V1 = OriV1;
1897 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
1898 V1 = OriV2;
1899 else
1900 return SDValue();
1901
1902 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
1903 V2 = OriV1;
1904 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
1905 V2 = OriV2;
1906 else
1907 return SDValue();
1908
1909 return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
1910}
1911
1912/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
1913///
1914/// VPACKOD interleaves the odd elements from each vector.
1915///
1916/// It is possible to lower into VPACKOD when the mask consists of two of the
1917/// following forms interleaved:
1918/// <1, 3, 5, ...>
1919/// <n+1, n+3, n+5, ...>
1920/// where n is the number of elements in the vector.
1921/// For example:
1922/// <1, 1, 3, 3, 5, 5, ...>
1923/// <1, n+1, 3, n+3, 5, n+5, ...>
1924///
1925/// When undef's appear in the mask they are treated as if they were whatever
1926/// value is necessary in order to fit the above forms.
1928 MVT VT, SDValue V1, SDValue V2,
1929 SelectionDAG &DAG) {
1930
1931 const auto &Begin = Mask.begin();
1932 const auto &End = Mask.end();
1933 SDValue OriV1 = V1, OriV2 = V2;
1934
1935 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
1936 V1 = OriV1;
1937 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
1938 V1 = OriV2;
1939 else
1940 return SDValue();
1941
1942 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
1943 V2 = OriV1;
1944 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
1945 V2 = OriV2;
1946 else
1947 return SDValue();
1948
1949 return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
1950}
1951
1952/// Lower VECTOR_SHUFFLE into VILVH (if possible).
1953///
1954/// VILVH interleaves consecutive elements from the left (highest-indexed) half
1955/// of each vector.
1956///
1957/// It is possible to lower into VILVH when the mask consists of two of the
1958/// following forms interleaved:
1959/// <x, x+1, x+2, ...>
1960/// <n+x, n+x+1, n+x+2, ...>
1961/// where n is the number of elements in the vector and x is half n.
1962/// For example:
1963/// <x, x, x+1, x+1, x+2, x+2, ...>
1964/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
1965///
1966/// When undef's appear in the mask they are treated as if they were whatever
1967/// value is necessary in order to fit the above forms.
1969 MVT VT, SDValue V1, SDValue V2,
1970 SelectionDAG &DAG) {
1971
1972 const auto &Begin = Mask.begin();
1973 const auto &End = Mask.end();
1974 unsigned HalfSize = Mask.size() / 2;
1975 SDValue OriV1 = V1, OriV2 = V2;
1976
1977 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
1978 V1 = OriV1;
1979 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
1980 V1 = OriV2;
1981 else
1982 return SDValue();
1983
1984 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
1985 V2 = OriV1;
1986 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
1987 1))
1988 V2 = OriV2;
1989 else
1990 return SDValue();
1991
1992 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
1993}
1994
1995/// Lower VECTOR_SHUFFLE into VILVL (if possible).
1996///
1997/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
1998/// of each vector.
1999///
2000/// It is possible to lower into VILVL when the mask consists of two of the
2001/// following forms interleaved:
2002/// <0, 1, 2, ...>
2003/// <n, n+1, n+2, ...>
2004/// where n is the number of elements in the vector.
2005/// For example:
2006/// <0, 0, 1, 1, 2, 2, ...>
2007/// <0, n, 1, n+1, 2, n+2, ...>
2008///
2009/// When undef's appear in the mask they are treated as if they were whatever
2010/// value is necessary in order to fit the above forms.
2012 MVT VT, SDValue V1, SDValue V2,
2013 SelectionDAG &DAG) {
2014
2015 const auto &Begin = Mask.begin();
2016 const auto &End = Mask.end();
2017 SDValue OriV1 = V1, OriV2 = V2;
2018
2019 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2020 V1 = OriV1;
2021 else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
2022 V1 = OriV2;
2023 else
2024 return SDValue();
2025
2026 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2027 V2 = OriV1;
2028 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
2029 V2 = OriV2;
2030 else
2031 return SDValue();
2032
2033 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2034}
2035
2036/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
2037///
2038/// VPICKEV copies the even elements of each vector into the result vector.
2039///
2040/// It is possible to lower into VPICKEV when the mask consists of two of the
2041/// following forms concatenated:
2042/// <0, 2, 4, ...>
2043/// <n, n+2, n+4, ...>
2044/// where n is the number of elements in the vector.
2045/// For example:
2046/// <0, 2, 4, ..., 0, 2, 4, ...>
2047/// <0, 2, 4, ..., n, n+2, n+4, ...>
2048///
2049/// When undef's appear in the mask they are treated as if they were whatever
2050/// value is necessary in order to fit the above forms.
2052 MVT VT, SDValue V1, SDValue V2,
2053 SelectionDAG &DAG) {
2054
2055 const auto &Begin = Mask.begin();
2056 const auto &Mid = Mask.begin() + Mask.size() / 2;
2057 const auto &End = Mask.end();
2058 SDValue OriV1 = V1, OriV2 = V2;
2059
2060 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2061 V1 = OriV1;
2062 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
2063 V1 = OriV2;
2064 else
2065 return SDValue();
2066
2067 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2068 V2 = OriV1;
2069 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
2070 V2 = OriV2;
2071
2072 else
2073 return SDValue();
2074
2075 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2076}
2077
2078/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
2079///
2080/// VPICKOD copies the odd elements of each vector into the result vector.
2081///
2082/// It is possible to lower into VPICKOD when the mask consists of two of the
2083/// following forms concatenated:
2084/// <1, 3, 5, ...>
2085/// <n+1, n+3, n+5, ...>
2086/// where n is the number of elements in the vector.
2087/// For example:
2088/// <1, 3, 5, ..., 1, 3, 5, ...>
2089/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2090///
2091/// When undef's appear in the mask they are treated as if they were whatever
2092/// value is necessary in order to fit the above forms.
2094 MVT VT, SDValue V1, SDValue V2,
2095 SelectionDAG &DAG) {
2096
2097 const auto &Begin = Mask.begin();
2098 const auto &Mid = Mask.begin() + Mask.size() / 2;
2099 const auto &End = Mask.end();
2100 SDValue OriV1 = V1, OriV2 = V2;
2101
2102 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2103 V1 = OriV1;
2104 else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
2105 V1 = OriV2;
2106 else
2107 return SDValue();
2108
2109 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2110 V2 = OriV1;
2111 else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
2112 V2 = OriV2;
2113 else
2114 return SDValue();
2115
2116 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2117}
2118
2119/// Lower VECTOR_SHUFFLE into VSHUF.
2120///
2121/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
2122/// adding it as an operand to the resulting VSHUF.
2124 MVT VT, SDValue V1, SDValue V2,
2125 SelectionDAG &DAG,
2126 const LoongArchSubtarget &Subtarget) {
2127
2129 for (auto M : Mask)
2130 Ops.push_back(DAG.getSignedConstant(M, DL, Subtarget.getGRLenVT()));
2131
2132 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2133 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
2134
2135 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
2136 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
2137 // VSHF concatenates the vectors in a bitwise fashion:
2138 // <0b00, 0b01> + <0b10, 0b11> ->
2139 // 0b0100 + 0b1110 -> 0b01001110
2140 // <0b10, 0b11, 0b00, 0b01>
2141 // We must therefore swap the operands to get the correct result.
2142 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2143}
2144
2145/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
2146///
2147/// This routine breaks down the specific type of 128-bit shuffle and
2148/// dispatches to the lowering routines accordingly.
2150 SDValue V1, SDValue V2, SelectionDAG &DAG,
2151 const LoongArchSubtarget &Subtarget) {
2152 assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
2153 VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
2154 VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
2155 "Vector type is unsupported for lsx!");
2157 "Two operands have different types!");
2158 assert(VT.getVectorNumElements() == Mask.size() &&
2159 "Unexpected mask size for shuffle!");
2160 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2161
2162 APInt KnownUndef, KnownZero;
2163 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2164 APInt Zeroable = KnownUndef | KnownZero;
2165
2166 SDValue Result;
2167 // TODO: Add more comparison patterns.
2168 if (V2.isUndef()) {
2169 if ((Result =
2170 lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2171 return Result;
2172 if ((Result =
2173 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2174 return Result;
2175 if ((Result =
2176 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2177 return Result;
2178
2179 // TODO: This comment may be enabled in the future to better match the
2180 // pattern for instruction selection.
2181 /* V2 = V1; */
2182 }
2183
2184 // It is recommended not to change the pattern comparison order for better
2185 // performance.
2186 if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
2187 return Result;
2188 if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
2189 return Result;
2190 if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
2191 return Result;
2192 if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
2193 return Result;
2194 if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
2195 return Result;
2196 if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
2197 return Result;
2198 if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
2199 (Result =
2200 lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2201 return Result;
2202 if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
2203 Zeroable)))
2204 return Result;
2205 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2206 Zeroable)))
2207 return Result;
2208 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2209 Subtarget)))
2210 return Result;
2211 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2212 return NewShuffle;
2213 if ((Result =
2214 lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2215 return Result;
2216 return SDValue();
2217}
2218
2219/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
2220///
2221/// It is a XVREPLVEI when the mask is:
2222/// <x, x, x, ..., x+n, x+n, x+n, ...>
2223/// where the number of x is equal to n and n is half the length of vector.
2224///
2225/// When undef's appear in the mask they are treated as if they were whatever
2226/// value is necessary in order to fit the above form.
2227static SDValue
2229 SDValue V1, SelectionDAG &DAG,
2230 const LoongArchSubtarget &Subtarget) {
2231 int SplatIndex = -1;
2232 for (const auto &M : Mask) {
2233 if (M != -1) {
2234 SplatIndex = M;
2235 break;
2236 }
2237 }
2238
2239 if (SplatIndex == -1)
2240 return DAG.getUNDEF(VT);
2241
2242 const auto &Begin = Mask.begin();
2243 const auto &End = Mask.end();
2244 int HalfSize = Mask.size() / 2;
2245
2246 if (SplatIndex >= HalfSize)
2247 return SDValue();
2248
2249 assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
2250 if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
2251 fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
2252 0)) {
2253 return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
2254 DAG.getConstant(SplatIndex, DL, Subtarget.getGRLenVT()));
2255 }
2256
2257 return SDValue();
2258}
2259
2260/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
2261static SDValue
2263 SDValue V1, SDValue V2, SelectionDAG &DAG,
2264 const LoongArchSubtarget &Subtarget) {
2265 // When the size is less than or equal to 4, lower cost instructions may be
2266 // used.
2267 if (Mask.size() <= 4)
2268 return SDValue();
2269 return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
2270}
2271
2272/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
2273static SDValue
2275 SDValue V1, SelectionDAG &DAG,
2276 const LoongArchSubtarget &Subtarget) {
2277 // Only consider XVPERMI_D.
2278 if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
2279 return SDValue();
2280
2281 unsigned MaskImm = 0;
2282 for (unsigned i = 0; i < Mask.size(); ++i) {
2283 if (Mask[i] == -1)
2284 continue;
2285 MaskImm |= Mask[i] << (i * 2);
2286 }
2287
2288 return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
2289 DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
2290}
2291
2292/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
2294 MVT VT, SDValue V1, SelectionDAG &DAG,
2295 const LoongArchSubtarget &Subtarget) {
2296 // LoongArch LASX only have XVPERM_W.
2297 if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
2298 return SDValue();
2299
2300 unsigned NumElts = VT.getVectorNumElements();
2301 unsigned HalfSize = NumElts / 2;
2302 bool FrontLo = true, FrontHi = true;
2303 bool BackLo = true, BackHi = true;
2304
2305 auto inRange = [](int val, int low, int high) {
2306 return (val == -1) || (val >= low && val < high);
2307 };
2308
2309 for (unsigned i = 0; i < HalfSize; ++i) {
2310 int Fronti = Mask[i];
2311 int Backi = Mask[i + HalfSize];
2312
2313 FrontLo &= inRange(Fronti, 0, HalfSize);
2314 FrontHi &= inRange(Fronti, HalfSize, NumElts);
2315 BackLo &= inRange(Backi, 0, HalfSize);
2316 BackHi &= inRange(Backi, HalfSize, NumElts);
2317 }
2318
2319 // If both the lower and upper 128-bit parts access only one half of the
2320 // vector (either lower or upper), avoid using xvperm.w. The latency of
2321 // xvperm.w(3) is higher than using xvshuf(1) and xvori(1).
2322 if ((FrontLo || FrontHi) && (BackLo || BackHi))
2323 return SDValue();
2324
2326 MVT GRLenVT = Subtarget.getGRLenVT();
2327 for (unsigned i = 0; i < NumElts; ++i)
2328 Masks.push_back(Mask[i] == -1 ? DAG.getUNDEF(GRLenVT)
2329 : DAG.getConstant(Mask[i], DL, GRLenVT));
2330 SDValue MaskVec = DAG.getBuildVector(MVT::v8i32, DL, Masks);
2331
2332 return DAG.getNode(LoongArchISD::XVPERM, DL, VT, V1, MaskVec);
2333}
2334
2335/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
2337 MVT VT, SDValue V1, SDValue V2,
2338 SelectionDAG &DAG) {
2339 return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
2340}
2341
2342/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
2344 MVT VT, SDValue V1, SDValue V2,
2345 SelectionDAG &DAG) {
2346 return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
2347}
2348
2349/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
2351 MVT VT, SDValue V1, SDValue V2,
2352 SelectionDAG &DAG) {
2353
2354 const auto &Begin = Mask.begin();
2355 const auto &End = Mask.end();
2356 unsigned HalfSize = Mask.size() / 2;
2357 unsigned LeftSize = HalfSize / 2;
2358 SDValue OriV1 = V1, OriV2 = V2;
2359
2360 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
2361 1) &&
2362 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
2363 V1 = OriV1;
2364 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
2365 Mask.size() + HalfSize - LeftSize, 1) &&
2366 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2367 Mask.size() + HalfSize + LeftSize, 1))
2368 V1 = OriV2;
2369 else
2370 return SDValue();
2371
2372 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
2373 1) &&
2374 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
2375 1))
2376 V2 = OriV1;
2377 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
2378 Mask.size() + HalfSize - LeftSize, 1) &&
2379 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2380 Mask.size() + HalfSize + LeftSize, 1))
2381 V2 = OriV2;
2382 else
2383 return SDValue();
2384
2385 return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
2386}
2387
2388/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
2390 MVT VT, SDValue V1, SDValue V2,
2391 SelectionDAG &DAG) {
2392
2393 const auto &Begin = Mask.begin();
2394 const auto &End = Mask.end();
2395 unsigned HalfSize = Mask.size() / 2;
2396 SDValue OriV1 = V1, OriV2 = V2;
2397
2398 if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
2399 fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
2400 V1 = OriV1;
2401 else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
2402 fitsRegularPattern<int>(Begin + HalfSize, 2, End,
2403 Mask.size() + HalfSize, 1))
2404 V1 = OriV2;
2405 else
2406 return SDValue();
2407
2408 if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
2409 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
2410 V2 = OriV1;
2411 else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
2412 1) &&
2413 fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
2414 Mask.size() + HalfSize, 1))
2415 V2 = OriV2;
2416 else
2417 return SDValue();
2418
2419 return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
2420}
2421
2422/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
2424 MVT VT, SDValue V1, SDValue V2,
2425 SelectionDAG &DAG) {
2426
2427 const auto &Begin = Mask.begin();
2428 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2429 const auto &Mid = Mask.begin() + Mask.size() / 2;
2430 const auto &RightMid = Mask.end() - Mask.size() / 4;
2431 const auto &End = Mask.end();
2432 unsigned HalfSize = Mask.size() / 2;
2433 SDValue OriV1 = V1, OriV2 = V2;
2434
2435 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
2436 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
2437 V1 = OriV1;
2438 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
2439 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
2440 V1 = OriV2;
2441 else
2442 return SDValue();
2443
2444 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
2445 fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
2446 V2 = OriV1;
2447 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
2448 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
2449 V2 = OriV2;
2450
2451 else
2452 return SDValue();
2453
2454 return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
2455}
2456
2457/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
2459 MVT VT, SDValue V1, SDValue V2,
2460 SelectionDAG &DAG) {
2461
2462 const auto &Begin = Mask.begin();
2463 const auto &LeftMid = Mask.begin() + Mask.size() / 4;
2464 const auto &Mid = Mask.begin() + Mask.size() / 2;
2465 const auto &RightMid = Mask.end() - Mask.size() / 4;
2466 const auto &End = Mask.end();
2467 unsigned HalfSize = Mask.size() / 2;
2468 SDValue OriV1 = V1, OriV2 = V2;
2469
2470 if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
2471 fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
2472 V1 = OriV1;
2473 else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
2474 fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
2475 2))
2476 V1 = OriV2;
2477 else
2478 return SDValue();
2479
2480 if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
2481 fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
2482 V2 = OriV1;
2483 else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
2484 fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
2485 2))
2486 V2 = OriV2;
2487 else
2488 return SDValue();
2489
2490 return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
2491}
2492
2493/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2494static SDValue
2496 SDValue V1, SDValue V2, SelectionDAG &DAG,
2497 const LoongArchSubtarget &Subtarget) {
2498 // LoongArch LASX only supports xvinsve0.{w/d}.
2499 if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2500 VT != MVT::v4f64)
2501 return SDValue();
2502
2503 MVT GRLenVT = Subtarget.getGRLenVT();
2504 int MaskSize = Mask.size();
2505 assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2506
2507 // Check if exactly one element of the Mask is replaced by 'Replaced', while
2508 // all other elements are either 'Base + i' or undef (-1). On success, return
2509 // the index of the replaced element. Otherwise, just return -1.
2510 auto checkReplaceOne = [&](int Base, int Replaced) -> int {
2511 int Idx = -1;
2512 for (int i = 0; i < MaskSize; ++i) {
2513 if (Mask[i] == Base + i || Mask[i] == -1)
2514 continue;
2515 if (Mask[i] != Replaced)
2516 return -1;
2517 if (Idx == -1)
2518 Idx = i;
2519 else
2520 return -1;
2521 }
2522 return Idx;
2523 };
2524
2525 // Case 1: the lowest element of V2 replaces one element in V1.
2526 int Idx = checkReplaceOne(0, MaskSize);
2527 if (Idx != -1)
2528 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2529 DAG.getConstant(Idx, DL, GRLenVT));
2530
2531 // Case 2: the lowest element of V1 replaces one element in V2.
2532 Idx = checkReplaceOne(MaskSize, 0);
2533 if (Idx != -1)
2534 return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2535 DAG.getConstant(Idx, DL, GRLenVT));
2536
2537 return SDValue();
2538}
2539
2540/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
2542 MVT VT, SDValue V1, SDValue V2,
2543 SelectionDAG &DAG) {
2544
2545 int MaskSize = Mask.size();
2546 int HalfSize = Mask.size() / 2;
2547 const auto &Begin = Mask.begin();
2548 const auto &Mid = Mask.begin() + HalfSize;
2549 const auto &End = Mask.end();
2550
2551 // VECTOR_SHUFFLE concatenates the vectors:
2552 // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
2553 // shuffling ->
2554 // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
2555 //
2556 // XVSHUF concatenates the vectors:
2557 // <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
2558 // shuffling ->
2559 // <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
2560 SmallVector<SDValue, 8> MaskAlloc;
2561 for (auto it = Begin; it < Mid; it++) {
2562 if (*it < 0) // UNDEF
2563 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2564 else if ((*it >= 0 && *it < HalfSize) ||
2565 (*it >= MaskSize && *it < MaskSize + HalfSize)) {
2566 int M = *it < HalfSize ? *it : *it - HalfSize;
2567 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2568 } else
2569 return SDValue();
2570 }
2571 assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
2572
2573 for (auto it = Mid; it < End; it++) {
2574 if (*it < 0) // UNDEF
2575 MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
2576 else if ((*it >= HalfSize && *it < MaskSize) ||
2577 (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
2578 int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
2579 MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
2580 } else
2581 return SDValue();
2582 }
2583 assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
2584
2585 EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
2586 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
2587 return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
2588}
2589
2590/// Shuffle vectors by lane to generate more optimized instructions.
2591/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
2592///
2593/// Therefore, except for the following four cases, other cases are regarded
2594/// as cross-lane shuffles, where optimization is relatively limited.
2595///
2596/// - Shuffle high, low lanes of two inputs vector
2597/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
2598/// - Shuffle low, high lanes of two inputs vector
2599/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
2600/// - Shuffle low, low lanes of two inputs vector
2601/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
2602/// - Shuffle high, high lanes of two inputs vector
2603/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
2604///
2605/// The first case is the closest to LoongArch instructions and the other
2606/// cases need to be converted to it for processing.
2607///
2608/// This function will return true for the last three cases above and will
2609/// modify V1, V2 and Mask. Otherwise, return false for the first case and
2610/// cross-lane shuffle cases.
2612 const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
2613 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
2614
2615 enum HalfMaskType { HighLaneTy, LowLaneTy, None };
2616
2617 int MaskSize = Mask.size();
2618 int HalfSize = Mask.size() / 2;
2619 MVT GRLenVT = Subtarget.getGRLenVT();
2620
2621 HalfMaskType preMask = None, postMask = None;
2622
2623 if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2624 return M < 0 || (M >= 0 && M < HalfSize) ||
2625 (M >= MaskSize && M < MaskSize + HalfSize);
2626 }))
2627 preMask = HighLaneTy;
2628 else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
2629 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2630 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2631 }))
2632 preMask = LowLaneTy;
2633
2634 if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2635 return M < 0 || (M >= HalfSize && M < MaskSize) ||
2636 (M >= MaskSize + HalfSize && M < MaskSize * 2);
2637 }))
2638 postMask = LowLaneTy;
2639 else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2640 return M < 0 || (M >= 0 && M < HalfSize) ||
2641 (M >= MaskSize && M < MaskSize + HalfSize);
2642 }))
2643 postMask = HighLaneTy;
2644
2645 // The pre-half of mask is high lane type, and the post-half of mask
2646 // is low lane type, which is closest to the LoongArch instructions.
2647 //
2648 // Note: In the LoongArch architecture, the high lane of mask corresponds
2649 // to the lower 128-bit of vector register, and the low lane of mask
2650 // corresponds the higher 128-bit of vector register.
2651 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2652 return false;
2653 }
2654 if (preMask == LowLaneTy && postMask == HighLaneTy) {
2655 V1 = DAG.getBitcast(MVT::v4i64, V1);
2656 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2657 DAG.getConstant(0b01001110, DL, GRLenVT));
2658 V1 = DAG.getBitcast(VT, V1);
2659
2660 if (!V2.isUndef()) {
2661 V2 = DAG.getBitcast(MVT::v4i64, V2);
2662 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2663 DAG.getConstant(0b01001110, DL, GRLenVT));
2664 V2 = DAG.getBitcast(VT, V2);
2665 }
2666
2667 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2668 *it = *it < 0 ? *it : *it - HalfSize;
2669 }
2670 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2671 *it = *it < 0 ? *it : *it + HalfSize;
2672 }
2673 } else if (preMask == LowLaneTy && postMask == LowLaneTy) {
2674 V1 = DAG.getBitcast(MVT::v4i64, V1);
2675 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2676 DAG.getConstant(0b11101110, DL, GRLenVT));
2677 V1 = DAG.getBitcast(VT, V1);
2678
2679 if (!V2.isUndef()) {
2680 V2 = DAG.getBitcast(MVT::v4i64, V2);
2681 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2682 DAG.getConstant(0b11101110, DL, GRLenVT));
2683 V2 = DAG.getBitcast(VT, V2);
2684 }
2685
2686 for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
2687 *it = *it < 0 ? *it : *it - HalfSize;
2688 }
2689 } else if (preMask == HighLaneTy && postMask == HighLaneTy) {
2690 V1 = DAG.getBitcast(MVT::v4i64, V1);
2691 V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
2692 DAG.getConstant(0b01000100, DL, GRLenVT));
2693 V1 = DAG.getBitcast(VT, V1);
2694
2695 if (!V2.isUndef()) {
2696 V2 = DAG.getBitcast(MVT::v4i64, V2);
2697 V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
2698 DAG.getConstant(0b01000100, DL, GRLenVT));
2699 V2 = DAG.getBitcast(VT, V2);
2700 }
2701
2702 for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
2703 *it = *it < 0 ? *it : *it + HalfSize;
2704 }
2705 } else { // cross-lane
2706 return false;
2707 }
2708
2709 return true;
2710}
2711
2712/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
2713/// Only for 256-bit vector.
2714///
2715/// For example:
2716/// %2 = shufflevector <4 x i64> %0, <4 x i64> posion,
2717/// <4 x i64> <i32 0, i32 3, i32 2, i32 0>
2718/// is lowerded to:
2719/// (XVPERMI $xr2, $xr0, 78)
2720/// (XVSHUF $xr1, $xr2, $xr0)
2721/// (XVORI $xr0, $xr1, 0)
2723 ArrayRef<int> Mask,
2724 MVT VT, SDValue V1,
2725 SDValue V2,
2726 SelectionDAG &DAG) {
2727 assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
2728 int Size = Mask.size();
2729 int LaneSize = Size / 2;
2730
2731 bool LaneCrossing[2] = {false, false};
2732 for (int i = 0; i < Size; ++i)
2733 if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize))
2734 LaneCrossing[(Mask[i] % Size) / LaneSize] = true;
2735
2736 // Ensure that all lanes ared involved.
2737 if (!LaneCrossing[0] && !LaneCrossing[1])
2738 return SDValue();
2739
2740 SmallVector<int> InLaneMask;
2741 InLaneMask.assign(Mask.begin(), Mask.end());
2742 for (int i = 0; i < Size; ++i) {
2743 int &M = InLaneMask[i];
2744 if (M < 0)
2745 continue;
2746 if (((M % Size) / LaneSize) != (i / LaneSize))
2747 M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
2748 }
2749
2750 SDValue Flipped = DAG.getBitcast(MVT::v4i64, V1);
2751 Flipped = DAG.getVectorShuffle(MVT::v4i64, DL, Flipped,
2752 DAG.getUNDEF(MVT::v4i64), {2, 3, 0, 1});
2753 Flipped = DAG.getBitcast(VT, Flipped);
2754 return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask);
2755}
2756
2757/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
2758///
2759/// This routine breaks down the specific type of 256-bit shuffle and
2760/// dispatches to the lowering routines accordingly.
2762 SDValue V1, SDValue V2, SelectionDAG &DAG,
2763 const LoongArchSubtarget &Subtarget) {
2764 assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
2765 VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
2766 VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
2767 "Vector type is unsupported for lasx!");
2769 "Two operands have different types!");
2770 assert(VT.getVectorNumElements() == Mask.size() &&
2771 "Unexpected mask size for shuffle!");
2772 assert(Mask.size() % 2 == 0 && "Expected even mask size.");
2773 assert(Mask.size() >= 4 && "Mask size is less than 4.");
2774
2775 APInt KnownUndef, KnownZero;
2776 computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
2777 APInt Zeroable = KnownUndef | KnownZero;
2778
2779 SDValue Result;
2780 // TODO: Add more comparison patterns.
2781 if (V2.isUndef()) {
2782 if ((Result =
2783 lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
2784 return Result;
2785 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
2786 Subtarget)))
2787 return Result;
2788 // Try to widen vectors to gain more optimization opportunities.
2789 if (SDValue NewShuffle = widenShuffleMask(DL, Mask, VT, V1, V2, DAG))
2790 return NewShuffle;
2791 if ((Result =
2792 lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
2793 return Result;
2794 if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
2795 return Result;
2796 if ((Result =
2797 lowerVECTOR_SHUFFLE_IsReverse(DL, Mask, VT, V1, DAG, Subtarget)))
2798 return Result;
2799
2800 // TODO: This comment may be enabled in the future to better match the
2801 // pattern for instruction selection.
2802 /* V2 = V1; */
2803 }
2804
2805 // It is recommended not to change the pattern comparison order for better
2806 // performance.
2807 if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
2808 return Result;
2809 if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
2810 return Result;
2811 if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
2812 return Result;
2813 if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
2814 return Result;
2815 if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
2816 return Result;
2817 if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
2818 return Result;
2819 if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2820 Zeroable)))
2821 return Result;
2822 if ((Result =
2823 lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2824 return Result;
2825 if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
2826 Subtarget)))
2827 return Result;
2828
2829 // canonicalize non cross-lane shuffle vector
2830 SmallVector<int> NewMask(Mask);
2831 if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2832 return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2833
2834 // FIXME: Handling the remaining cases earlier can degrade performance
2835 // in some situations. Further analysis is required to enable more
2836 // effective optimizations.
2837 if (V2.isUndef()) {
2838 if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2839 V1, V2, DAG)))
2840 return Result;
2841 }
2842
2843 if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
2844 return NewShuffle;
2845 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
2846 return Result;
2847
2848 return SDValue();
2849}
2850
2851SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
2852 SelectionDAG &DAG) const {
2853 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
2854 ArrayRef<int> OrigMask = SVOp->getMask();
2855 SDValue V1 = Op.getOperand(0);
2856 SDValue V2 = Op.getOperand(1);
2857 MVT VT = Op.getSimpleValueType();
2858 int NumElements = VT.getVectorNumElements();
2859 SDLoc DL(Op);
2860
2861 bool V1IsUndef = V1.isUndef();
2862 bool V2IsUndef = V2.isUndef();
2863 if (V1IsUndef && V2IsUndef)
2864 return DAG.getUNDEF(VT);
2865
2866 // When we create a shuffle node we put the UNDEF node to second operand,
2867 // but in some cases the first operand may be transformed to UNDEF.
2868 // In this case we should just commute the node.
2869 if (V1IsUndef)
2870 return DAG.getCommutedVectorShuffle(*SVOp);
2871
2872 // Check for non-undef masks pointing at an undef vector and make the masks
2873 // undef as well. This makes it easier to match the shuffle based solely on
2874 // the mask.
2875 if (V2IsUndef &&
2876 any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
2877 SmallVector<int, 8> NewMask(OrigMask);
2878 for (int &M : NewMask)
2879 if (M >= NumElements)
2880 M = -1;
2881 return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
2882 }
2883
2884 // Check for illegal shuffle mask element index values.
2885 int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
2886 (void)MaskUpperLimit;
2887 assert(llvm::all_of(OrigMask,
2888 [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
2889 "Out of bounds shuffle index");
2890
2891 // For each vector width, delegate to a specialized lowering routine.
2892 if (VT.is128BitVector())
2893 return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2894
2895 if (VT.is256BitVector())
2896 return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG, Subtarget);
2897
2898 return SDValue();
2899}
2900
2901SDValue LoongArchTargetLowering::lowerFP_TO_FP16(SDValue Op,
2902 SelectionDAG &DAG) const {
2903 // Custom lower to ensure the libcall return is passed in an FPR on hard
2904 // float ABIs.
2905 SDLoc DL(Op);
2906 MakeLibCallOptions CallOptions;
2907 SDValue Op0 = Op.getOperand(0);
2908 SDValue Chain = SDValue();
2909 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
2910 SDValue Res;
2911 std::tie(Res, Chain) =
2912 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
2913 if (Subtarget.is64Bit())
2914 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2915 return DAG.getBitcast(MVT::i32, Res);
2916}
2917
2918SDValue LoongArchTargetLowering::lowerFP16_TO_FP(SDValue Op,
2919 SelectionDAG &DAG) const {
2920 // Custom lower to ensure the libcall argument is passed in an FPR on hard
2921 // float ABIs.
2922 SDLoc DL(Op);
2923 MakeLibCallOptions CallOptions;
2924 SDValue Op0 = Op.getOperand(0);
2925 SDValue Chain = SDValue();
2926 SDValue Arg = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2927 DL, MVT::f32, Op0)
2928 : DAG.getBitcast(MVT::f32, Op0);
2929 SDValue Res;
2930 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
2931 CallOptions, DL, Chain);
2932 return Res;
2933}
2934
2935SDValue LoongArchTargetLowering::lowerFP_TO_BF16(SDValue Op,
2936 SelectionDAG &DAG) const {
2937 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2938 SDLoc DL(Op);
2939 MakeLibCallOptions CallOptions;
2940 RTLIB::Libcall LC =
2941 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
2942 SDValue Res =
2943 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
2944 if (Subtarget.is64Bit())
2945 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Res);
2946 return DAG.getBitcast(MVT::i32, Res);
2947}
2948
2949SDValue LoongArchTargetLowering::lowerBF16_TO_FP(SDValue Op,
2950 SelectionDAG &DAG) const {
2951 assert(Subtarget.hasBasicF() && "Unexpected custom legalization");
2952 MVT VT = Op.getSimpleValueType();
2953 SDLoc DL(Op);
2954 Op = DAG.getNode(
2955 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
2956 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
2957 SDValue Res = Subtarget.is64Bit() ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64,
2958 DL, MVT::f32, Op)
2959 : DAG.getBitcast(MVT::f32, Op);
2960 if (VT != MVT::f32)
2961 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
2962 return Res;
2963}
2964
2965// Lower BUILD_VECTOR as broadcast load (if possible).
2966// For example:
2967// %a = load i8, ptr %ptr
2968// %b = build_vector %a, %a, %a, %a
2969// is lowered to :
2970// (VLDREPL_B $a0, 0)
2972 const SDLoc &DL,
2973 SelectionDAG &DAG) {
2974 MVT VT = BVOp->getSimpleValueType(0);
2975 int NumOps = BVOp->getNumOperands();
2976
2977 assert((VT.is128BitVector() || VT.is256BitVector()) &&
2978 "Unsupported vector type for broadcast.");
2979
2980 SDValue IdentitySrc;
2981 bool IsIdeneity = true;
2982
2983 for (int i = 0; i != NumOps; i++) {
2984 SDValue Op = BVOp->getOperand(i);
2985 if (Op.getOpcode() != ISD::LOAD || (IdentitySrc && Op != IdentitySrc)) {
2986 IsIdeneity = false;
2987 break;
2988 }
2989 IdentitySrc = BVOp->getOperand(0);
2990 }
2991
2992 // make sure that this load is valid and only has one user.
2993 if (!IsIdeneity || !IdentitySrc || !BVOp->isOnlyUserOf(IdentitySrc.getNode()))
2994 return SDValue();
2995
2996 auto *LN = cast<LoadSDNode>(IdentitySrc);
2997 auto ExtType = LN->getExtensionType();
2998
2999 if ((ExtType == ISD::EXTLOAD || ExtType == ISD::NON_EXTLOAD) &&
3000 VT.getScalarSizeInBits() == LN->getMemoryVT().getScalarSizeInBits()) {
3001 // Indexed loads and stores are not supported on LoongArch.
3002 assert(LN->isUnindexed() && "Unexpected indexed load.");
3003
3004 SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3005 // The offset operand of unindexed load is always undefined, so there is
3006 // no need to pass it to VLDREPL.
3007 SDValue Ops[] = {LN->getChain(), LN->getBasePtr()};
3008 SDValue BCast = DAG.getNode(LoongArchISD::VLDREPL, DL, Tys, Ops);
3009 DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1));
3010 return BCast;
3011 }
3012 return SDValue();
3013}
3014
3015// Sequentially insert elements from Ops into Vector, from low to high indices.
3016// Note: Ops can have fewer elements than Vector.
3018 const LoongArchSubtarget &Subtarget, SDValue &Vector,
3019 EVT ResTy) {
3020 assert(Ops.size() <= ResTy.getVectorNumElements());
3021
3022 SDValue Op0 = Ops[0];
3023 if (!Op0.isUndef())
3024 Vector = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ResTy, Op0);
3025 for (unsigned i = 1; i < Ops.size(); ++i) {
3026 SDValue Opi = Ops[i];
3027 if (Opi.isUndef())
3028 continue;
3029 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, Opi,
3030 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3031 }
3032}
3033
3034// Build a ResTy subvector from Node, taking NumElts elements starting at index
3035// 'first'.
3037 SelectionDAG &DAG, SDLoc DL,
3038 const LoongArchSubtarget &Subtarget,
3039 EVT ResTy, unsigned first) {
3040 unsigned NumElts = ResTy.getVectorNumElements();
3041
3042 assert(first + NumElts <= Node->getSimpleValueType(0).getVectorNumElements());
3043
3044 SmallVector<SDValue, 16> Ops(Node->op_begin() + first,
3045 Node->op_begin() + first + NumElts);
3046 SDValue Vector = DAG.getUNDEF(ResTy);
3047 fillVector(Ops, DAG, DL, Subtarget, Vector, ResTy);
3048 return Vector;
3049}
3050
3051SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
3052 SelectionDAG &DAG) const {
3053 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
3054 MVT VT = Node->getSimpleValueType(0);
3055 EVT ResTy = Op->getValueType(0);
3056 unsigned NumElts = ResTy.getVectorNumElements();
3057 SDLoc DL(Op);
3058 APInt SplatValue, SplatUndef;
3059 unsigned SplatBitSize;
3060 bool HasAnyUndefs;
3061 bool IsConstant = false;
3062 bool UseSameConstant = true;
3063 SDValue ConstantValue;
3064 bool Is128Vec = ResTy.is128BitVector();
3065 bool Is256Vec = ResTy.is256BitVector();
3066
3067 if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
3068 (!Subtarget.hasExtLASX() || !Is256Vec))
3069 return SDValue();
3070
3071 if (SDValue Result = lowerBUILD_VECTORAsBroadCastLoad(Node, DL, DAG))
3072 return Result;
3073
3074 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
3075 /*MinSplatBits=*/8) &&
3076 SplatBitSize <= 64) {
3077 // We can only cope with 8, 16, 32, or 64-bit elements.
3078 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
3079 SplatBitSize != 64)
3080 return SDValue();
3081
3082 if (SplatBitSize == 64 && !Subtarget.is64Bit()) {
3083 // We can only handle 64-bit elements that are within
3084 // the signed 10-bit range or match vldi patterns on 32-bit targets.
3085 // See the BUILD_VECTOR case in LoongArchDAGToDAGISel::Select().
3086 if (!SplatValue.isSignedIntN(10) &&
3087 !isImmVLDILegalForMode1(SplatValue, SplatBitSize).first)
3088 return SDValue();
3089 if ((Is128Vec && ResTy == MVT::v4i32) ||
3090 (Is256Vec && ResTy == MVT::v8i32))
3091 return Op;
3092 }
3093
3094 EVT ViaVecTy;
3095
3096 switch (SplatBitSize) {
3097 default:
3098 return SDValue();
3099 case 8:
3100 ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
3101 break;
3102 case 16:
3103 ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
3104 break;
3105 case 32:
3106 ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
3107 break;
3108 case 64:
3109 ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
3110 break;
3111 }
3112
3113 // SelectionDAG::getConstant will promote SplatValue appropriately.
3114 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
3115
3116 // Bitcast to the type we originally wanted.
3117 if (ViaVecTy != ResTy)
3118 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
3119
3120 return Result;
3121 }
3122
3123 if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
3124 return Op;
3125
3126 for (unsigned i = 0; i < NumElts; ++i) {
3127 SDValue Opi = Node->getOperand(i);
3128 if (isIntOrFPConstant(Opi)) {
3129 IsConstant = true;
3130 if (!ConstantValue.getNode())
3131 ConstantValue = Opi;
3132 else if (ConstantValue != Opi)
3133 UseSameConstant = false;
3134 }
3135 }
3136
3137 // If the type of BUILD_VECTOR is v2f64, custom legalizing it has no benefits.
3138 if (IsConstant && UseSameConstant && ResTy != MVT::v2f64) {
3139 SDValue Result = DAG.getSplatBuildVector(ResTy, DL, ConstantValue);
3140 for (unsigned i = 0; i < NumElts; ++i) {
3141 SDValue Opi = Node->getOperand(i);
3142 if (!isIntOrFPConstant(Opi))
3143 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Result, Opi,
3144 DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3145 }
3146 return Result;
3147 }
3148
3149 if (!IsConstant) {
3150 // If the BUILD_VECTOR has a repeated pattern, use INSERT_VECTOR_ELT to fill
3151 // the sub-sequence of the vector and then broadcast the sub-sequence.
3152 //
3153 // TODO: If the BUILD_VECTOR contains undef elements, consider falling
3154 // back to use INSERT_VECTOR_ELT to materialize the vector, because it
3155 // generates worse code in some cases. This could be further optimized
3156 // with more consideration.
3158 BitVector UndefElements;
3159 if (Node->getRepeatedSequence(Sequence, &UndefElements) &&
3160 UndefElements.count() == 0) {
3161 // Using LSX instructions to fill the sub-sequence of 256-bits vector,
3162 // because the high part can be simply treated as undef.
3163 SDValue Vector = DAG.getUNDEF(ResTy);
3164 EVT FillTy = Is256Vec
3166 : ResTy;
3167 SDValue FillVec =
3168 Is256Vec ? DAG.getExtractSubvector(DL, FillTy, Vector, 0) : Vector;
3169
3170 fillVector(Sequence, DAG, DL, Subtarget, FillVec, FillTy);
3171
3172 unsigned SeqLen = Sequence.size();
3173 unsigned SplatLen = NumElts / SeqLen;
3174 MVT SplatEltTy = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen);
3175 MVT SplatTy = MVT::getVectorVT(SplatEltTy, SplatLen);
3176
3177 // If size of the sub-sequence is half of a 256-bits vector, bitcast the
3178 // vector to v4i64 type in order to match the pattern of XVREPLVE0Q.
3179 if (SplatEltTy == MVT::i128)
3180 SplatTy = MVT::v4i64;
3181
3182 SDValue SplatVec;
3183 SDValue SrcVec = DAG.getBitcast(
3184 SplatTy,
3185 Is256Vec ? DAG.getInsertSubvector(DL, Vector, FillVec, 0) : FillVec);
3186 if (Is256Vec) {
3187 SplatVec =
3188 DAG.getNode((SplatEltTy == MVT::i128) ? LoongArchISD::XVREPLVE0Q
3189 : LoongArchISD::XVREPLVE0,
3190 DL, SplatTy, SrcVec);
3191 } else {
3192 SplatVec = DAG.getNode(LoongArchISD::VREPLVEI, DL, SplatTy, SrcVec,
3193 DAG.getConstant(0, DL, Subtarget.getGRLenVT()));
3194 }
3195
3196 return DAG.getBitcast(ResTy, SplatVec);
3197 }
3198
3199 // Use INSERT_VECTOR_ELT operations rather than expand to stores, because
3200 // using memory operations is much lower.
3201 //
3202 // For 256-bit vectors, normally split into two halves and concatenate.
3203 // Special case: for v8i32/v8f32/v4i64/v4f64, if the upper half has only
3204 // one non-undef element, skip spliting to avoid a worse result.
3205 if (ResTy == MVT::v8i32 || ResTy == MVT::v8f32 || ResTy == MVT::v4i64 ||
3206 ResTy == MVT::v4f64) {
3207 unsigned NonUndefCount = 0;
3208 for (unsigned i = NumElts / 2; i < NumElts; ++i) {
3209 if (!Node->getOperand(i).isUndef()) {
3210 ++NonUndefCount;
3211 if (NonUndefCount > 1)
3212 break;
3213 }
3214 }
3215 if (NonUndefCount == 1)
3216 return fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, ResTy, 0);
3217 }
3218
3219 EVT VecTy =
3220 Is256Vec ? ResTy.getHalfNumVectorElementsVT(*DAG.getContext()) : ResTy;
3221 SDValue Vector =
3222 fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget, VecTy, 0);
3223
3224 if (Is128Vec)
3225 return Vector;
3226
3227 SDValue VectorHi = fillSubVectorFromBuildVector(Node, DAG, DL, Subtarget,
3228 VecTy, NumElts / 2);
3229
3230 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResTy, Vector, VectorHi);
3231 }
3232
3233 return SDValue();
3234}
3235
3236SDValue LoongArchTargetLowering::lowerCONCAT_VECTORS(SDValue Op,
3237 SelectionDAG &DAG) const {
3238 SDLoc DL(Op);
3239 MVT ResVT = Op.getSimpleValueType();
3240 assert(ResVT.is256BitVector() && Op.getNumOperands() == 2);
3241
3242 unsigned NumOperands = Op.getNumOperands();
3243 unsigned NumFreezeUndef = 0;
3244 unsigned NumZero = 0;
3245 unsigned NumNonZero = 0;
3246 unsigned NonZeros = 0;
3247 SmallSet<SDValue, 4> Undefs;
3248 for (unsigned i = 0; i != NumOperands; ++i) {
3249 SDValue SubVec = Op.getOperand(i);
3250 if (SubVec.isUndef())
3251 continue;
3252 if (ISD::isFreezeUndef(SubVec.getNode())) {
3253 // If the freeze(undef) has multiple uses then we must fold to zero.
3254 if (SubVec.hasOneUse()) {
3255 ++NumFreezeUndef;
3256 } else {
3257 ++NumZero;
3258 Undefs.insert(SubVec);
3259 }
3260 } else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
3261 ++NumZero;
3262 else {
3263 assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range.
3264 NonZeros |= 1 << i;
3265 ++NumNonZero;
3266 }
3267 }
3268
3269 // If we have more than 2 non-zeros, build each half separately.
3270 if (NumNonZero > 2) {
3271 MVT HalfVT = ResVT.getHalfNumVectorElementsVT();
3272 ArrayRef<SDUse> Ops = Op->ops();
3273 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3274 Ops.slice(0, NumOperands / 2));
3275 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
3276 Ops.slice(NumOperands / 2));
3277 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
3278 }
3279
3280 // Otherwise, build it up through insert_subvectors.
3281 SDValue Vec = NumZero ? DAG.getConstant(0, DL, ResVT)
3282 : (NumFreezeUndef ? DAG.getFreeze(DAG.getUNDEF(ResVT))
3283 : DAG.getUNDEF(ResVT));
3284
3285 // Replace Undef operands with ZeroVector.
3286 for (SDValue U : Undefs)
3287 DAG.ReplaceAllUsesWith(U, DAG.getConstant(0, DL, U.getSimpleValueType()));
3288
3289 MVT SubVT = Op.getOperand(0).getSimpleValueType();
3290 unsigned NumSubElems = SubVT.getVectorNumElements();
3291 for (unsigned i = 0; i != NumOperands; ++i) {
3292 if ((NonZeros & (1 << i)) == 0)
3293 continue;
3294
3295 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResVT, Vec, Op.getOperand(i),
3296 DAG.getVectorIdxConstant(i * NumSubElems, DL));
3297 }
3298
3299 return Vec;
3300}
3301
3302SDValue
3303LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3304 SelectionDAG &DAG) const {
3305 MVT EltVT = Op.getSimpleValueType();
3306 SDValue Vec = Op->getOperand(0);
3307 EVT VecTy = Vec->getValueType(0);
3308 SDValue Idx = Op->getOperand(1);
3309 SDLoc DL(Op);
3310 MVT GRLenVT = Subtarget.getGRLenVT();
3311
3312 assert(VecTy.is256BitVector() && "Unexpected EXTRACT_VECTOR_ELT vector type");
3313
3314 if (isa<ConstantSDNode>(Idx))
3315 return Op;
3316
3317 switch (VecTy.getSimpleVT().SimpleTy) {
3318 default:
3319 llvm_unreachable("Unexpected type");
3320 case MVT::v32i8:
3321 case MVT::v16i16:
3322 case MVT::v4i64:
3323 case MVT::v4f64: {
3324 // Extract the high half subvector and place it to the low half of a new
3325 // vector. It doesn't matter what the high half of the new vector is.
3326 EVT HalfTy = VecTy.getHalfNumVectorElementsVT(*DAG.getContext());
3327 SDValue VecHi =
3328 DAG.getExtractSubvector(DL, HalfTy, Vec, HalfTy.getVectorNumElements());
3329 SDValue TmpVec =
3330 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecTy, DAG.getUNDEF(VecTy),
3331 VecHi, DAG.getConstant(0, DL, GRLenVT));
3332
3333 // Shuffle the origin Vec and the TmpVec using MaskVec, the lowest element
3334 // of MaskVec is Idx, the rest do not matter. ResVec[0] will hold the
3335 // desired element.
3336 SDValue IdxCp =
3337 Subtarget.is64Bit()
3338 ? DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Idx)
3339 : DAG.getBitcast(MVT::f32, Idx);
3340 SDValue IdxVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f32, IdxCp);
3341 SDValue MaskVec =
3342 DAG.getBitcast((VecTy == MVT::v4f64) ? MVT::v4i64 : VecTy, IdxVec);
3343 SDValue ResVec =
3344 DAG.getNode(LoongArchISD::VSHUF, DL, VecTy, MaskVec, TmpVec, Vec);
3345
3346 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ResVec,
3347 DAG.getConstant(0, DL, GRLenVT));
3348 }
3349 case MVT::v8i32:
3350 case MVT::v8f32: {
3351 SDValue SplatIdx = DAG.getSplatBuildVector(MVT::v8i32, DL, Idx);
3352 SDValue SplatValue =
3353 DAG.getNode(LoongArchISD::XVPERM, DL, VecTy, Vec, SplatIdx);
3354
3355 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, SplatValue,
3356 DAG.getConstant(0, DL, GRLenVT));
3357 }
3358 }
3359}
3360
3361SDValue
3362LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3363 SelectionDAG &DAG) const {
3364 MVT VT = Op.getSimpleValueType();
3365 MVT EltVT = VT.getVectorElementType();
3366 unsigned NumElts = VT.getVectorNumElements();
3367 unsigned EltSizeInBits = EltVT.getScalarSizeInBits();
3368 SDLoc DL(Op);
3369 SDValue Op0 = Op.getOperand(0);
3370 SDValue Op1 = Op.getOperand(1);
3371 SDValue Op2 = Op.getOperand(2);
3372
3373 if (isa<ConstantSDNode>(Op2))
3374 return Op;
3375
3376 MVT IdxTy = MVT::getIntegerVT(EltSizeInBits);
3377 MVT IdxVTy = MVT::getVectorVT(IdxTy, NumElts);
3378
3379 if (!isTypeLegal(VT) || !isTypeLegal(IdxVTy))
3380 return SDValue();
3381
3382 SDValue SplatElt = DAG.getSplatBuildVector(VT, DL, Op1);
3383 SmallVector<SDValue, 32> RawIndices;
3384 SDValue SplatIdx;
3385 SDValue Indices;
3386
3387 if (!Subtarget.is64Bit() && IdxTy == MVT::i64) {
3388 MVT PairVTy = MVT::getVectorVT(MVT::i32, NumElts * 2);
3389 for (unsigned i = 0; i < NumElts; ++i) {
3390 RawIndices.push_back(Op2);
3391 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3392 }
3393 SplatIdx = DAG.getBuildVector(PairVTy, DL, RawIndices);
3394 SplatIdx = DAG.getBitcast(IdxVTy, SplatIdx);
3395
3396 RawIndices.clear();
3397 for (unsigned i = 0; i < NumElts; ++i) {
3398 RawIndices.push_back(DAG.getConstant(i, DL, MVT::i32));
3399 RawIndices.push_back(DAG.getConstant(0, DL, MVT::i32));
3400 }
3401 Indices = DAG.getBuildVector(PairVTy, DL, RawIndices);
3402 Indices = DAG.getBitcast(IdxVTy, Indices);
3403 } else {
3404 SplatIdx = DAG.getSplatBuildVector(IdxVTy, DL, Op2);
3405
3406 for (unsigned i = 0; i < NumElts; ++i)
3407 RawIndices.push_back(DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
3408 Indices = DAG.getBuildVector(IdxVTy, DL, RawIndices);
3409 }
3410
3411 // insert vec, elt, idx
3412 // =>
3413 // select (splatidx == {0,1,2...}) ? splatelt : vec
3414 SDValue SelectCC =
3415 DAG.getSetCC(DL, IdxVTy, SplatIdx, Indices, ISD::CondCode::SETEQ);
3416 return DAG.getNode(ISD::VSELECT, DL, VT, SelectCC, SplatElt, Op0);
3417}
3418
3419SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3420 SelectionDAG &DAG) const {
3421 SDLoc DL(Op);
3422 SyncScope::ID FenceSSID =
3423 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
3424
3425 // singlethread fences only synchronize with signal handlers on the same
3426 // thread and thus only need to preserve instruction order, not actually
3427 // enforce memory ordering.
3428 if (FenceSSID == SyncScope::SingleThread)
3429 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3430 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3431
3432 return Op;
3433}
3434
3435SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
3436 SelectionDAG &DAG) const {
3437
3438 if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
3439 DAG.getContext()->emitError(
3440 "On LA64, only 64-bit registers can be written.");
3441 return Op.getOperand(0);
3442 }
3443
3444 if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
3445 DAG.getContext()->emitError(
3446 "On LA32, only 32-bit registers can be written.");
3447 return Op.getOperand(0);
3448 }
3449
3450 return Op;
3451}
3452
3453SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
3454 SelectionDAG &DAG) const {
3455 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3456 DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
3457 "be a constant integer");
3458 return SDValue();
3459 }
3460
3461 MachineFunction &MF = DAG.getMachineFunction();
3463 Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
3464 EVT VT = Op.getValueType();
3465 SDLoc DL(Op);
3466 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3467 unsigned Depth = Op.getConstantOperandVal(0);
3468 int GRLenInBytes = Subtarget.getGRLen() / 8;
3469
3470 while (Depth--) {
3471 int Offset = -(GRLenInBytes * 2);
3472 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3473 DAG.getSignedConstant(Offset, DL, VT));
3474 FrameAddr =
3475 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3476 }
3477 return FrameAddr;
3478}
3479
3480SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
3481 SelectionDAG &DAG) const {
3482 // Currently only support lowering return address for current frame.
3483 if (Op.getConstantOperandVal(0) != 0) {
3484 DAG.getContext()->emitError(
3485 "return address can only be determined for the current frame");
3486 return SDValue();
3487 }
3488
3489 MachineFunction &MF = DAG.getMachineFunction();
3491 MVT GRLenVT = Subtarget.getGRLenVT();
3492
3493 // Return the value of the return address register, marking it an implicit
3494 // live-in.
3495 Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
3496 getRegClassFor(GRLenVT));
3497 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
3498}
3499
3500SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
3501 SelectionDAG &DAG) const {
3502 MachineFunction &MF = DAG.getMachineFunction();
3503 auto Size = Subtarget.getGRLen() / 8;
3504 auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
3505 return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3506}
3507
3508SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
3509 SelectionDAG &DAG) const {
3510 MachineFunction &MF = DAG.getMachineFunction();
3511 auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
3512
3513 SDLoc DL(Op);
3514 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3516
3517 // vastart just stores the address of the VarArgsFrameIndex slot into the
3518 // memory location argument.
3519 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3520 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3521 MachinePointerInfo(SV));
3522}
3523
3524SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
3525 SelectionDAG &DAG) const {
3526 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3527 !Subtarget.hasBasicD() && "unexpected target features");
3528
3529 SDLoc DL(Op);
3530 SDValue Op0 = Op.getOperand(0);
3531 if (Op0->getOpcode() == ISD::AND) {
3532 auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
3533 if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
3534 return Op;
3535 }
3536
3537 if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
3538 Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
3539 Op0.getConstantOperandVal(2) == UINT64_C(0))
3540 return Op;
3541
3542 if (Op0.getOpcode() == ISD::AssertZext &&
3543 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
3544 return Op;
3545
3546 EVT OpVT = Op0.getValueType();
3547 EVT RetVT = Op.getValueType();
3548 RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
3549 MakeLibCallOptions CallOptions;
3550 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3551 SDValue Chain = SDValue();
3553 std::tie(Result, Chain) =
3554 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3555 return Result;
3556}
3557
3558SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
3559 SelectionDAG &DAG) const {
3560 assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
3561 !Subtarget.hasBasicD() && "unexpected target features");
3562
3563 SDLoc DL(Op);
3564 SDValue Op0 = Op.getOperand(0);
3565
3566 if ((Op0.getOpcode() == ISD::AssertSext ||
3568 dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
3569 return Op;
3570
3571 EVT OpVT = Op0.getValueType();
3572 EVT RetVT = Op.getValueType();
3573 RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
3574 MakeLibCallOptions CallOptions;
3575 CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
3576 SDValue Chain = SDValue();
3578 std::tie(Result, Chain) =
3579 makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
3580 return Result;
3581}
3582
3583SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
3584 SelectionDAG &DAG) const {
3585
3586 SDLoc DL(Op);
3587 EVT VT = Op.getValueType();
3588 SDValue Op0 = Op.getOperand(0);
3589 EVT Op0VT = Op0.getValueType();
3590
3591 if (Op.getValueType() == MVT::f32 && Op0VT == MVT::i32 &&
3592 Subtarget.is64Bit() && Subtarget.hasBasicF()) {
3593 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3594 return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
3595 }
3596 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit()) {
3597 SDValue Lo, Hi;
3598 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
3599 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
3600 }
3601 return Op;
3602}
3603
3604SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
3605 SelectionDAG &DAG) const {
3606
3607 SDLoc DL(Op);
3608 SDValue Op0 = Op.getOperand(0);
3609
3610 if (Op0.getValueType() == MVT::f16)
3611 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
3612
3613 if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
3614 !Subtarget.hasBasicD()) {
3615 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op0);
3616 return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
3617 }
3618
3619 EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
3620 SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op0);
3621 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
3622}
3623
3625 SelectionDAG &DAG, unsigned Flags) {
3626 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3627}
3628
3630 SelectionDAG &DAG, unsigned Flags) {
3631 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3632 Flags);
3633}
3634
3636 SelectionDAG &DAG, unsigned Flags) {
3637 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3638 N->getOffset(), Flags);
3639}
3640
3642 SelectionDAG &DAG, unsigned Flags) {
3643 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3644}
3645
3646template <class NodeTy>
3647SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3649 bool IsLocal) const {
3650 SDLoc DL(N);
3651 EVT Ty = getPointerTy(DAG.getDataLayout());
3652 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3653 SDValue Load;
3654
3655 switch (M) {
3656 default:
3657 report_fatal_error("Unsupported code model");
3658
3659 case CodeModel::Large: {
3660 assert(Subtarget.is64Bit() && "Large code model requires LA64");
3661
3662 // This is not actually used, but is necessary for successfully matching
3663 // the PseudoLA_*_LARGE nodes.
3664 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3665 if (IsLocal) {
3666 // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
3667 // eventually becomes the desired 5-insn code sequence.
3668 Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
3669 Tmp, Addr),
3670 0);
3671 } else {
3672 // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
3673 // eventually becomes the desired 5-insn code sequence.
3674 Load = SDValue(
3675 DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
3676 0);
3677 }
3678 break;
3679 }
3680
3681 case CodeModel::Small:
3682 case CodeModel::Medium:
3683 if (IsLocal) {
3684 // This generates the pattern (PseudoLA_PCREL sym), which expands to
3685 // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
3686 Load = SDValue(
3687 DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
3688 } else {
3689 // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
3690 // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
3691 Load =
3692 SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
3693 }
3694 }
3695
3696 if (!IsLocal) {
3697 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3698 MachineFunction &MF = DAG.getMachineFunction();
3699 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3703 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3704 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
3705 }
3706
3707 return Load;
3708}
3709
3710SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
3711 SelectionDAG &DAG) const {
3712 return getAddr(cast<BlockAddressSDNode>(Op), DAG,
3713 DAG.getTarget().getCodeModel());
3714}
3715
3716SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
3717 SelectionDAG &DAG) const {
3718 return getAddr(cast<JumpTableSDNode>(Op), DAG,
3719 DAG.getTarget().getCodeModel());
3720}
3721
3722SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
3723 SelectionDAG &DAG) const {
3724 return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
3725 DAG.getTarget().getCodeModel());
3726}
3727
3728SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
3729 SelectionDAG &DAG) const {
3730 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3731 assert(N->getOffset() == 0 && "unexpected offset in global node");
3732 auto CM = DAG.getTarget().getCodeModel();
3733 const GlobalValue *GV = N->getGlobal();
3734
3735 if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
3736 if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
3737 CM = *GCM;
3738 }
3739
3740 return getAddr(N, DAG, CM, GV->isDSOLocal());
3741}
3742
3743SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3744 SelectionDAG &DAG,
3745 unsigned Opc, bool UseGOT,
3746 bool Large) const {
3747 SDLoc DL(N);
3748 EVT Ty = getPointerTy(DAG.getDataLayout());
3749 MVT GRLenVT = Subtarget.getGRLenVT();
3750
3751 // This is not actually used, but is necessary for successfully matching the
3752 // PseudoLA_*_LARGE nodes.
3753 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3754 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3755
3756 // Only IE needs an extra argument for large code model.
3757 SDValue Offset = Opc == LoongArch::PseudoLA_TLS_IE_LARGE
3758 ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3759 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3760
3761 // If it is LE for normal/medium code model, the add tp operation will occur
3762 // during the pseudo-instruction expansion.
3763 if (Opc == LoongArch::PseudoLA_TLS_LE && !Large)
3764 return Offset;
3765
3766 if (UseGOT) {
3767 // Mark the load instruction as invariant to enable hoisting in MachineLICM.
3768 MachineFunction &MF = DAG.getMachineFunction();
3769 MachineMemOperand *MemOp = MF.getMachineMemOperand(
3773 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3774 DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
3775 }
3776
3777 // Add the thread pointer.
3778 return DAG.getNode(ISD::ADD, DL, Ty, Offset,
3779 DAG.getRegister(LoongArch::R2, GRLenVT));
3780}
3781
3782SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3783 SelectionDAG &DAG,
3784 unsigned Opc,
3785 bool Large) const {
3786 SDLoc DL(N);
3787 EVT Ty = getPointerTy(DAG.getDataLayout());
3788 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3789
3790 // This is not actually used, but is necessary for successfully matching the
3791 // PseudoLA_*_LARGE nodes.
3792 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3793
3794 // Use a PC-relative addressing mode to access the dynamic GOT address.
3795 SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
3796 SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3797 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3798
3799 // Prepare argument list to generate call.
3801 Args.emplace_back(Load, CallTy);
3802
3803 // Setup call to __tls_get_addr.
3804 TargetLowering::CallLoweringInfo CLI(DAG);
3805 CLI.setDebugLoc(DL)
3806 .setChain(DAG.getEntryNode())
3807 .setLibCallee(CallingConv::C, CallTy,
3808 DAG.getExternalSymbol("__tls_get_addr", Ty),
3809 std::move(Args));
3810
3811 return LowerCallTo(CLI).first;
3812}
3813
3814SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
3815 SelectionDAG &DAG, unsigned Opc,
3816 bool Large) const {
3817 SDLoc DL(N);
3818 EVT Ty = getPointerTy(DAG.getDataLayout());
3819 const GlobalValue *GV = N->getGlobal();
3820
3821 // This is not actually used, but is necessary for successfully matching the
3822 // PseudoLA_*_LARGE nodes.
3823 SDValue Tmp = DAG.getConstant(0, DL, Ty);
3824
3825 // Use a PC-relative addressing mode to access the global dynamic GOT address.
3826 // This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
3827 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3828 return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
3829 : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
3830}
3831
3832SDValue
3833LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3834 SelectionDAG &DAG) const {
3837 report_fatal_error("In GHC calling convention TLS is not supported");
3838
3839 bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
3840 assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
3841
3842 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3843 assert(N->getOffset() == 0 && "unexpected offset in global node");
3844
3845 if (DAG.getTarget().useEmulatedTLS())
3846 reportFatalUsageError("the emulated TLS is prohibited");
3847
3848 bool IsDesc = DAG.getTarget().useTLSDESC();
3849
3850 switch (getTargetMachine().getTLSModel(N->getGlobal())) {
3852 // In this model, application code calls the dynamic linker function
3853 // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
3854 // runtime.
3855 if (!IsDesc)
3856 return getDynamicTLSAddr(N, DAG,
3857 Large ? LoongArch::PseudoLA_TLS_GD_LARGE
3858 : LoongArch::PseudoLA_TLS_GD,
3859 Large);
3860 break;
3862 // Same as GeneralDynamic, except for assembly modifiers and relocation
3863 // records.
3864 if (!IsDesc)
3865 return getDynamicTLSAddr(N, DAG,
3866 Large ? LoongArch::PseudoLA_TLS_LD_LARGE
3867 : LoongArch::PseudoLA_TLS_LD,
3868 Large);
3869 break;
3871 // This model uses the GOT to resolve TLS offsets.
3872 return getStaticTLSAddr(N, DAG,
3873 Large ? LoongArch::PseudoLA_TLS_IE_LARGE
3874 : LoongArch::PseudoLA_TLS_IE,
3875 /*UseGOT=*/true, Large);
3877 // This model is used when static linking as the TLS offsets are resolved
3878 // during program linking.
3879 //
3880 // This node doesn't need an extra argument for the large code model.
3881 return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
3882 /*UseGOT=*/false, Large);
3883 }
3884
3885 return getTLSDescAddr(N, DAG,
3886 Large ? LoongArch::PseudoLA_TLS_DESC_LARGE
3887 : LoongArch::PseudoLA_TLS_DESC,
3888 Large);
3889}
3890
3891template <unsigned N>
3893 SelectionDAG &DAG, bool IsSigned = false) {
3894 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
3895 // Check the ImmArg.
3896 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3897 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3898 DAG.getContext()->emitError(Op->getOperationName(0) +
3899 ": argument out of range.");
3900 return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
3901 }
3902 return SDValue();
3903}
3904
3905SDValue
3906LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
3907 SelectionDAG &DAG) const {
3908 switch (Op.getConstantOperandVal(0)) {
3909 default:
3910 return SDValue(); // Don't custom lower most intrinsics.
3911 case Intrinsic::thread_pointer: {
3912 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3913 return DAG.getRegister(LoongArch::R2, PtrVT);
3914 }
3915 case Intrinsic::loongarch_lsx_vpickve2gr_d:
3916 case Intrinsic::loongarch_lsx_vpickve2gr_du:
3917 case Intrinsic::loongarch_lsx_vreplvei_d:
3918 case Intrinsic::loongarch_lasx_xvrepl128vei_d:
3919 return checkIntrinsicImmArg<1>(Op, 2, DAG);
3920 case Intrinsic::loongarch_lsx_vreplvei_w:
3921 case Intrinsic::loongarch_lasx_xvrepl128vei_w:
3922 case Intrinsic::loongarch_lasx_xvpickve2gr_d:
3923 case Intrinsic::loongarch_lasx_xvpickve2gr_du:
3924 case Intrinsic::loongarch_lasx_xvpickve_d:
3925 case Intrinsic::loongarch_lasx_xvpickve_d_f:
3926 return checkIntrinsicImmArg<2>(Op, 2, DAG);
3927 case Intrinsic::loongarch_lasx_xvinsve0_d:
3928 return checkIntrinsicImmArg<2>(Op, 3, DAG);
3929 case Intrinsic::loongarch_lsx_vsat_b:
3930 case Intrinsic::loongarch_lsx_vsat_bu:
3931 case Intrinsic::loongarch_lsx_vrotri_b:
3932 case Intrinsic::loongarch_lsx_vsllwil_h_b:
3933 case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
3934 case Intrinsic::loongarch_lsx_vsrlri_b:
3935 case Intrinsic::loongarch_lsx_vsrari_b:
3936 case Intrinsic::loongarch_lsx_vreplvei_h:
3937 case Intrinsic::loongarch_lasx_xvsat_b:
3938 case Intrinsic::loongarch_lasx_xvsat_bu:
3939 case Intrinsic::loongarch_lasx_xvrotri_b:
3940 case Intrinsic::loongarch_lasx_xvsllwil_h_b:
3941 case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
3942 case Intrinsic::loongarch_lasx_xvsrlri_b:
3943 case Intrinsic::loongarch_lasx_xvsrari_b:
3944 case Intrinsic::loongarch_lasx_xvrepl128vei_h:
3945 case Intrinsic::loongarch_lasx_xvpickve_w:
3946 case Intrinsic::loongarch_lasx_xvpickve_w_f:
3947 return checkIntrinsicImmArg<3>(Op, 2, DAG);
3948 case Intrinsic::loongarch_lasx_xvinsve0_w:
3949 return checkIntrinsicImmArg<3>(Op, 3, DAG);
3950 case Intrinsic::loongarch_lsx_vsat_h:
3951 case Intrinsic::loongarch_lsx_vsat_hu:
3952 case Intrinsic::loongarch_lsx_vrotri_h:
3953 case Intrinsic::loongarch_lsx_vsllwil_w_h:
3954 case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
3955 case Intrinsic::loongarch_lsx_vsrlri_h:
3956 case Intrinsic::loongarch_lsx_vsrari_h:
3957 case Intrinsic::loongarch_lsx_vreplvei_b:
3958 case Intrinsic::loongarch_lasx_xvsat_h:
3959 case Intrinsic::loongarch_lasx_xvsat_hu:
3960 case Intrinsic::loongarch_lasx_xvrotri_h:
3961 case Intrinsic::loongarch_lasx_xvsllwil_w_h:
3962 case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
3963 case Intrinsic::loongarch_lasx_xvsrlri_h:
3964 case Intrinsic::loongarch_lasx_xvsrari_h:
3965 case Intrinsic::loongarch_lasx_xvrepl128vei_b:
3966 return checkIntrinsicImmArg<4>(Op, 2, DAG);
3967 case Intrinsic::loongarch_lsx_vsrlni_b_h:
3968 case Intrinsic::loongarch_lsx_vsrani_b_h:
3969 case Intrinsic::loongarch_lsx_vsrlrni_b_h:
3970 case Intrinsic::loongarch_lsx_vsrarni_b_h:
3971 case Intrinsic::loongarch_lsx_vssrlni_b_h:
3972 case Intrinsic::loongarch_lsx_vssrani_b_h:
3973 case Intrinsic::loongarch_lsx_vssrlni_bu_h:
3974 case Intrinsic::loongarch_lsx_vssrani_bu_h:
3975 case Intrinsic::loongarch_lsx_vssrlrni_b_h:
3976 case Intrinsic::loongarch_lsx_vssrarni_b_h:
3977 case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
3978 case Intrinsic::loongarch_lsx_vssrarni_bu_h:
3979 case Intrinsic::loongarch_lasx_xvsrlni_b_h:
3980 case Intrinsic::loongarch_lasx_xvsrani_b_h:
3981 case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
3982 case Intrinsic::loongarch_lasx_xvsrarni_b_h:
3983 case Intrinsic::loongarch_lasx_xvssrlni_b_h:
3984 case Intrinsic::loongarch_lasx_xvssrani_b_h:
3985 case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
3986 case Intrinsic::loongarch_lasx_xvssrani_bu_h:
3987 case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
3988 case Intrinsic::loongarch_lasx_xvssrarni_b_h:
3989 case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
3990 case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
3991 return checkIntrinsicImmArg<4>(Op, 3, DAG);
3992 case Intrinsic::loongarch_lsx_vsat_w:
3993 case Intrinsic::loongarch_lsx_vsat_wu:
3994 case Intrinsic::loongarch_lsx_vrotri_w:
3995 case Intrinsic::loongarch_lsx_vsllwil_d_w:
3996 case Intrinsic::loongarch_lsx_vsllwil_du_wu:
3997 case Intrinsic::loongarch_lsx_vsrlri_w:
3998 case Intrinsic::loongarch_lsx_vsrari_w:
3999 case Intrinsic::loongarch_lsx_vslei_bu:
4000 case Intrinsic::loongarch_lsx_vslei_hu:
4001 case Intrinsic::loongarch_lsx_vslei_wu:
4002 case Intrinsic::loongarch_lsx_vslei_du:
4003 case Intrinsic::loongarch_lsx_vslti_bu:
4004 case Intrinsic::loongarch_lsx_vslti_hu:
4005 case Intrinsic::loongarch_lsx_vslti_wu:
4006 case Intrinsic::loongarch_lsx_vslti_du:
4007 case Intrinsic::loongarch_lsx_vbsll_v:
4008 case Intrinsic::loongarch_lsx_vbsrl_v:
4009 case Intrinsic::loongarch_lasx_xvsat_w:
4010 case Intrinsic::loongarch_lasx_xvsat_wu:
4011 case Intrinsic::loongarch_lasx_xvrotri_w:
4012 case Intrinsic::loongarch_lasx_xvsllwil_d_w:
4013 case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
4014 case Intrinsic::loongarch_lasx_xvsrlri_w:
4015 case Intrinsic::loongarch_lasx_xvsrari_w:
4016 case Intrinsic::loongarch_lasx_xvslei_bu:
4017 case Intrinsic::loongarch_lasx_xvslei_hu:
4018 case Intrinsic::loongarch_lasx_xvslei_wu:
4019 case Intrinsic::loongarch_lasx_xvslei_du:
4020 case Intrinsic::loongarch_lasx_xvslti_bu:
4021 case Intrinsic::loongarch_lasx_xvslti_hu:
4022 case Intrinsic::loongarch_lasx_xvslti_wu:
4023 case Intrinsic::loongarch_lasx_xvslti_du:
4024 case Intrinsic::loongarch_lasx_xvbsll_v:
4025 case Intrinsic::loongarch_lasx_xvbsrl_v:
4026 return checkIntrinsicImmArg<5>(Op, 2, DAG);
4027 case Intrinsic::loongarch_lsx_vseqi_b:
4028 case Intrinsic::loongarch_lsx_vseqi_h:
4029 case Intrinsic::loongarch_lsx_vseqi_w:
4030 case Intrinsic::loongarch_lsx_vseqi_d:
4031 case Intrinsic::loongarch_lsx_vslei_b:
4032 case Intrinsic::loongarch_lsx_vslei_h:
4033 case Intrinsic::loongarch_lsx_vslei_w:
4034 case Intrinsic::loongarch_lsx_vslei_d:
4035 case Intrinsic::loongarch_lsx_vslti_b:
4036 case Intrinsic::loongarch_lsx_vslti_h:
4037 case Intrinsic::loongarch_lsx_vslti_w:
4038 case Intrinsic::loongarch_lsx_vslti_d:
4039 case Intrinsic::loongarch_lasx_xvseqi_b:
4040 case Intrinsic::loongarch_lasx_xvseqi_h:
4041 case Intrinsic::loongarch_lasx_xvseqi_w:
4042 case Intrinsic::loongarch_lasx_xvseqi_d:
4043 case Intrinsic::loongarch_lasx_xvslei_b:
4044 case Intrinsic::loongarch_lasx_xvslei_h:
4045 case Intrinsic::loongarch_lasx_xvslei_w:
4046 case Intrinsic::loongarch_lasx_xvslei_d:
4047 case Intrinsic::loongarch_lasx_xvslti_b:
4048 case Intrinsic::loongarch_lasx_xvslti_h:
4049 case Intrinsic::loongarch_lasx_xvslti_w:
4050 case Intrinsic::loongarch_lasx_xvslti_d:
4051 return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
4052 case Intrinsic::loongarch_lsx_vsrlni_h_w:
4053 case Intrinsic::loongarch_lsx_vsrani_h_w:
4054 case Intrinsic::loongarch_lsx_vsrlrni_h_w:
4055 case Intrinsic::loongarch_lsx_vsrarni_h_w:
4056 case Intrinsic::loongarch_lsx_vssrlni_h_w:
4057 case Intrinsic::loongarch_lsx_vssrani_h_w:
4058 case Intrinsic::loongarch_lsx_vssrlni_hu_w:
4059 case Intrinsic::loongarch_lsx_vssrani_hu_w:
4060 case Intrinsic::loongarch_lsx_vssrlrni_h_w:
4061 case Intrinsic::loongarch_lsx_vssrarni_h_w:
4062 case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
4063 case Intrinsic::loongarch_lsx_vssrarni_hu_w:
4064 case Intrinsic::loongarch_lsx_vfrstpi_b:
4065 case Intrinsic::loongarch_lsx_vfrstpi_h:
4066 case Intrinsic::loongarch_lasx_xvsrlni_h_w:
4067 case Intrinsic::loongarch_lasx_xvsrani_h_w:
4068 case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
4069 case Intrinsic::loongarch_lasx_xvsrarni_h_w:
4070 case Intrinsic::loongarch_lasx_xvssrlni_h_w:
4071 case Intrinsic::loongarch_lasx_xvssrani_h_w:
4072 case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
4073 case Intrinsic::loongarch_lasx_xvssrani_hu_w:
4074 case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
4075 case Intrinsic::loongarch_lasx_xvssrarni_h_w:
4076 case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
4077 case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
4078 case Intrinsic::loongarch_lasx_xvfrstpi_b:
4079 case Intrinsic::loongarch_lasx_xvfrstpi_h:
4080 return checkIntrinsicImmArg<5>(Op, 3, DAG);
4081 case Intrinsic::loongarch_lsx_vsat_d:
4082 case Intrinsic::loongarch_lsx_vsat_du:
4083 case Intrinsic::loongarch_lsx_vrotri_d:
4084 case Intrinsic::loongarch_lsx_vsrlri_d:
4085 case Intrinsic::loongarch_lsx_vsrari_d:
4086 case Intrinsic::loongarch_lasx_xvsat_d:
4087 case Intrinsic::loongarch_lasx_xvsat_du:
4088 case Intrinsic::loongarch_lasx_xvrotri_d:
4089 case Intrinsic::loongarch_lasx_xvsrlri_d:
4090 case Intrinsic::loongarch_lasx_xvsrari_d:
4091 return checkIntrinsicImmArg<6>(Op, 2, DAG);
4092 case Intrinsic::loongarch_lsx_vsrlni_w_d:
4093 case Intrinsic::loongarch_lsx_vsrani_w_d:
4094 case Intrinsic::loongarch_lsx_vsrlrni_w_d:
4095 case Intrinsic::loongarch_lsx_vsrarni_w_d:
4096 case Intrinsic::loongarch_lsx_vssrlni_w_d:
4097 case Intrinsic::loongarch_lsx_vssrani_w_d:
4098 case Intrinsic::loongarch_lsx_vssrlni_wu_d:
4099 case Intrinsic::loongarch_lsx_vssrani_wu_d:
4100 case Intrinsic::loongarch_lsx_vssrlrni_w_d:
4101 case Intrinsic::loongarch_lsx_vssrarni_w_d:
4102 case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
4103 case Intrinsic::loongarch_lsx_vssrarni_wu_d:
4104 case Intrinsic::loongarch_lasx_xvsrlni_w_d:
4105 case Intrinsic::loongarch_lasx_xvsrani_w_d:
4106 case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
4107 case Intrinsic::loongarch_lasx_xvsrarni_w_d:
4108 case Intrinsic::loongarch_lasx_xvssrlni_w_d:
4109 case Intrinsic::loongarch_lasx_xvssrani_w_d:
4110 case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
4111 case Intrinsic::loongarch_lasx_xvssrani_wu_d:
4112 case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
4113 case Intrinsic::loongarch_lasx_xvssrarni_w_d:
4114 case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
4115 case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
4116 return checkIntrinsicImmArg<6>(Op, 3, DAG);
4117 case Intrinsic::loongarch_lsx_vsrlni_d_q:
4118 case Intrinsic::loongarch_lsx_vsrani_d_q:
4119 case Intrinsic::loongarch_lsx_vsrlrni_d_q:
4120 case Intrinsic::loongarch_lsx_vsrarni_d_q:
4121 case Intrinsic::loongarch_lsx_vssrlni_d_q:
4122 case Intrinsic::loongarch_lsx_vssrani_d_q:
4123 case Intrinsic::loongarch_lsx_vssrlni_du_q:
4124 case Intrinsic::loongarch_lsx_vssrani_du_q:
4125 case Intrinsic::loongarch_lsx_vssrlrni_d_q:
4126 case Intrinsic::loongarch_lsx_vssrarni_d_q:
4127 case Intrinsic::loongarch_lsx_vssrlrni_du_q:
4128 case Intrinsic::loongarch_lsx_vssrarni_du_q:
4129 case Intrinsic::loongarch_lasx_xvsrlni_d_q:
4130 case Intrinsic::loongarch_lasx_xvsrani_d_q:
4131 case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
4132 case Intrinsic::loongarch_lasx_xvsrarni_d_q:
4133 case Intrinsic::loongarch_lasx_xvssrlni_d_q:
4134 case Intrinsic::loongarch_lasx_xvssrani_d_q:
4135 case Intrinsic::loongarch_lasx_xvssrlni_du_q:
4136 case Intrinsic::loongarch_lasx_xvssrani_du_q:
4137 case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
4138 case Intrinsic::loongarch_lasx_xvssrarni_d_q:
4139 case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
4140 case Intrinsic::loongarch_lasx_xvssrarni_du_q:
4141 return checkIntrinsicImmArg<7>(Op, 3, DAG);
4142 case Intrinsic::loongarch_lsx_vnori_b:
4143 case Intrinsic::loongarch_lsx_vshuf4i_b:
4144 case Intrinsic::loongarch_lsx_vshuf4i_h:
4145 case Intrinsic::loongarch_lsx_vshuf4i_w:
4146 case Intrinsic::loongarch_lasx_xvnori_b:
4147 case Intrinsic::loongarch_lasx_xvshuf4i_b:
4148 case Intrinsic::loongarch_lasx_xvshuf4i_h:
4149 case Intrinsic::loongarch_lasx_xvshuf4i_w:
4150 case Intrinsic::loongarch_lasx_xvpermi_d:
4151 return checkIntrinsicImmArg<8>(Op, 2, DAG);
4152 case Intrinsic::loongarch_lsx_vshuf4i_d:
4153 case Intrinsic::loongarch_lsx_vpermi_w:
4154 case Intrinsic::loongarch_lsx_vbitseli_b:
4155 case Intrinsic::loongarch_lsx_vextrins_b:
4156 case Intrinsic::loongarch_lsx_vextrins_h:
4157 case Intrinsic::loongarch_lsx_vextrins_w:
4158 case Intrinsic::loongarch_lsx_vextrins_d:
4159 case Intrinsic::loongarch_lasx_xvshuf4i_d:
4160 case Intrinsic::loongarch_lasx_xvpermi_w:
4161 case Intrinsic::loongarch_lasx_xvpermi_q:
4162 case Intrinsic::loongarch_lasx_xvbitseli_b:
4163 case Intrinsic::loongarch_lasx_xvextrins_b:
4164 case Intrinsic::loongarch_lasx_xvextrins_h:
4165 case Intrinsic::loongarch_lasx_xvextrins_w:
4166 case Intrinsic::loongarch_lasx_xvextrins_d:
4167 return checkIntrinsicImmArg<8>(Op, 3, DAG);
4168 case Intrinsic::loongarch_lsx_vrepli_b:
4169 case Intrinsic::loongarch_lsx_vrepli_h:
4170 case Intrinsic::loongarch_lsx_vrepli_w:
4171 case Intrinsic::loongarch_lsx_vrepli_d:
4172 case Intrinsic::loongarch_lasx_xvrepli_b:
4173 case Intrinsic::loongarch_lasx_xvrepli_h:
4174 case Intrinsic::loongarch_lasx_xvrepli_w:
4175 case Intrinsic::loongarch_lasx_xvrepli_d:
4176 return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
4177 case Intrinsic::loongarch_lsx_vldi:
4178 case Intrinsic::loongarch_lasx_xvldi:
4179 return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
4180 }
4181}
4182
4183// Helper function that emits error message for intrinsics with chain and return
4184// merge values of a UNDEF and the chain.
4186 StringRef ErrorMsg,
4187 SelectionDAG &DAG) {
4188 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4189 return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
4190 SDLoc(Op));
4191}
4192
4193SDValue
4194LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4195 SelectionDAG &DAG) const {
4196 SDLoc DL(Op);
4197 MVT GRLenVT = Subtarget.getGRLenVT();
4198 EVT VT = Op.getValueType();
4199 SDValue Chain = Op.getOperand(0);
4200 const StringRef ErrorMsgOOR = "argument out of range";
4201 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4202 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4203
4204 switch (Op.getConstantOperandVal(1)) {
4205 default:
4206 return Op;
4207 case Intrinsic::loongarch_crc_w_b_w:
4208 case Intrinsic::loongarch_crc_w_h_w:
4209 case Intrinsic::loongarch_crc_w_w_w:
4210 case Intrinsic::loongarch_crc_w_d_w:
4211 case Intrinsic::loongarch_crcc_w_b_w:
4212 case Intrinsic::loongarch_crcc_w_h_w:
4213 case Intrinsic::loongarch_crcc_w_w_w:
4214 case Intrinsic::loongarch_crcc_w_d_w:
4215 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
4216 case Intrinsic::loongarch_csrrd_w:
4217 case Intrinsic::loongarch_csrrd_d: {
4218 unsigned Imm = Op.getConstantOperandVal(2);
4219 return !isUInt<14>(Imm)
4220 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4221 : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
4222 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4223 }
4224 case Intrinsic::loongarch_csrwr_w:
4225 case Intrinsic::loongarch_csrwr_d: {
4226 unsigned Imm = Op.getConstantOperandVal(3);
4227 return !isUInt<14>(Imm)
4228 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4229 : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
4230 {Chain, Op.getOperand(2),
4231 DAG.getConstant(Imm, DL, GRLenVT)});
4232 }
4233 case Intrinsic::loongarch_csrxchg_w:
4234 case Intrinsic::loongarch_csrxchg_d: {
4235 unsigned Imm = Op.getConstantOperandVal(4);
4236 return !isUInt<14>(Imm)
4237 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4238 : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
4239 {Chain, Op.getOperand(2), Op.getOperand(3),
4240 DAG.getConstant(Imm, DL, GRLenVT)});
4241 }
4242 case Intrinsic::loongarch_iocsrrd_d: {
4243 return DAG.getNode(
4244 LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
4245 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
4246 }
4247#define IOCSRRD_CASE(NAME, NODE) \
4248 case Intrinsic::loongarch_##NAME: { \
4249 return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
4250 {Chain, Op.getOperand(2)}); \
4251 }
4252 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
4253 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
4254 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
4255#undef IOCSRRD_CASE
4256 case Intrinsic::loongarch_cpucfg: {
4257 return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
4258 {Chain, Op.getOperand(2)});
4259 }
4260 case Intrinsic::loongarch_lddir_d: {
4261 unsigned Imm = Op.getConstantOperandVal(3);
4262 return !isUInt<8>(Imm)
4263 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4264 : Op;
4265 }
4266 case Intrinsic::loongarch_movfcsr2gr: {
4267 if (!Subtarget.hasBasicF())
4268 return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
4269 unsigned Imm = Op.getConstantOperandVal(2);
4270 return !isUInt<2>(Imm)
4271 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4272 : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
4273 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4274 }
4275 case Intrinsic::loongarch_lsx_vld:
4276 case Intrinsic::loongarch_lsx_vldrepl_b:
4277 case Intrinsic::loongarch_lasx_xvld:
4278 case Intrinsic::loongarch_lasx_xvldrepl_b:
4279 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4280 ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
4281 : SDValue();
4282 case Intrinsic::loongarch_lsx_vldrepl_h:
4283 case Intrinsic::loongarch_lasx_xvldrepl_h:
4284 return !isShiftedInt<11, 1>(
4285 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4287 Op, "argument out of range or not a multiple of 2", DAG)
4288 : SDValue();
4289 case Intrinsic::loongarch_lsx_vldrepl_w:
4290 case Intrinsic::loongarch_lasx_xvldrepl_w:
4291 return !isShiftedInt<10, 2>(
4292 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4294 Op, "argument out of range or not a multiple of 4", DAG)
4295 : SDValue();
4296 case Intrinsic::loongarch_lsx_vldrepl_d:
4297 case Intrinsic::loongarch_lasx_xvldrepl_d:
4298 return !isShiftedInt<9, 3>(
4299 cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
4301 Op, "argument out of range or not a multiple of 8", DAG)
4302 : SDValue();
4303 }
4304}
4305
4306// Helper function that emits error message for intrinsics with void return
4307// value and return the chain.
4309 SelectionDAG &DAG) {
4310
4311 DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
4312 return Op.getOperand(0);
4313}
4314
4315SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
4316 SelectionDAG &DAG) const {
4317 SDLoc DL(Op);
4318 MVT GRLenVT = Subtarget.getGRLenVT();
4319 SDValue Chain = Op.getOperand(0);
4320 uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
4321 SDValue Op2 = Op.getOperand(2);
4322 const StringRef ErrorMsgOOR = "argument out of range";
4323 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4324 const StringRef ErrorMsgReqLA32 = "requires loongarch32";
4325 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4326
4327 switch (IntrinsicEnum) {
4328 default:
4329 // TODO: Add more Intrinsics.
4330 return SDValue();
4331 case Intrinsic::loongarch_cacop_d:
4332 case Intrinsic::loongarch_cacop_w: {
4333 if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
4334 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
4335 if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
4336 return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
4337 // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
4338 unsigned Imm1 = Op2->getAsZExtVal();
4339 int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
4340 if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
4341 return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
4342 return Op;
4343 }
4344 case Intrinsic::loongarch_dbar: {
4345 unsigned Imm = Op2->getAsZExtVal();
4346 return !isUInt<15>(Imm)
4347 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4348 : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
4349 DAG.getConstant(Imm, DL, GRLenVT));
4350 }
4351 case Intrinsic::loongarch_ibar: {
4352 unsigned Imm = Op2->getAsZExtVal();
4353 return !isUInt<15>(Imm)
4354 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4355 : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
4356 DAG.getConstant(Imm, DL, GRLenVT));
4357 }
4358 case Intrinsic::loongarch_break: {
4359 unsigned Imm = Op2->getAsZExtVal();
4360 return !isUInt<15>(Imm)
4361 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4362 : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
4363 DAG.getConstant(Imm, DL, GRLenVT));
4364 }
4365 case Intrinsic::loongarch_movgr2fcsr: {
4366 if (!Subtarget.hasBasicF())
4367 return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
4368 unsigned Imm = Op2->getAsZExtVal();
4369 return !isUInt<2>(Imm)
4370 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4371 : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
4372 DAG.getConstant(Imm, DL, GRLenVT),
4373 DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
4374 Op.getOperand(3)));
4375 }
4376 case Intrinsic::loongarch_syscall: {
4377 unsigned Imm = Op2->getAsZExtVal();
4378 return !isUInt<15>(Imm)
4379 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4380 : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
4381 DAG.getConstant(Imm, DL, GRLenVT));
4382 }
4383#define IOCSRWR_CASE(NAME, NODE) \
4384 case Intrinsic::loongarch_##NAME: { \
4385 SDValue Op3 = Op.getOperand(3); \
4386 return Subtarget.is64Bit() \
4387 ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
4388 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4389 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
4390 : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
4391 Op3); \
4392 }
4393 IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
4394 IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
4395 IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
4396#undef IOCSRWR_CASE
4397 case Intrinsic::loongarch_iocsrwr_d: {
4398 return !Subtarget.is64Bit()
4399 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4400 : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
4401 Op2,
4402 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
4403 Op.getOperand(3)));
4404 }
4405#define ASRT_LE_GT_CASE(NAME) \
4406 case Intrinsic::loongarch_##NAME: { \
4407 return !Subtarget.is64Bit() \
4408 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
4409 : Op; \
4410 }
4411 ASRT_LE_GT_CASE(asrtle_d)
4412 ASRT_LE_GT_CASE(asrtgt_d)
4413#undef ASRT_LE_GT_CASE
4414 case Intrinsic::loongarch_ldpte_d: {
4415 unsigned Imm = Op.getConstantOperandVal(3);
4416 return !Subtarget.is64Bit()
4417 ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
4418 : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4419 : Op;
4420 }
4421 case Intrinsic::loongarch_lsx_vst:
4422 case Intrinsic::loongarch_lasx_xvst:
4423 return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
4424 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4425 : SDValue();
4426 case Intrinsic::loongarch_lasx_xvstelm_b:
4427 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4428 !isUInt<5>(Op.getConstantOperandVal(5)))
4429 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4430 : SDValue();
4431 case Intrinsic::loongarch_lsx_vstelm_b:
4432 return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4433 !isUInt<4>(Op.getConstantOperandVal(5)))
4434 ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
4435 : SDValue();
4436 case Intrinsic::loongarch_lasx_xvstelm_h:
4437 return (!isShiftedInt<8, 1>(
4438 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4439 !isUInt<4>(Op.getConstantOperandVal(5)))
4441 Op, "argument out of range or not a multiple of 2", DAG)
4442 : SDValue();
4443 case Intrinsic::loongarch_lsx_vstelm_h:
4444 return (!isShiftedInt<8, 1>(
4445 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4446 !isUInt<3>(Op.getConstantOperandVal(5)))
4448 Op, "argument out of range or not a multiple of 2", DAG)
4449 : SDValue();
4450 case Intrinsic::loongarch_lasx_xvstelm_w:
4451 return (!isShiftedInt<8, 2>(
4452 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4453 !isUInt<3>(Op.getConstantOperandVal(5)))
4455 Op, "argument out of range or not a multiple of 4", DAG)
4456 : SDValue();
4457 case Intrinsic::loongarch_lsx_vstelm_w:
4458 return (!isShiftedInt<8, 2>(
4459 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4460 !isUInt<2>(Op.getConstantOperandVal(5)))
4462 Op, "argument out of range or not a multiple of 4", DAG)
4463 : SDValue();
4464 case Intrinsic::loongarch_lasx_xvstelm_d:
4465 return (!isShiftedInt<8, 3>(
4466 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4467 !isUInt<2>(Op.getConstantOperandVal(5)))
4469 Op, "argument out of range or not a multiple of 8", DAG)
4470 : SDValue();
4471 case Intrinsic::loongarch_lsx_vstelm_d:
4472 return (!isShiftedInt<8, 3>(
4473 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
4474 !isUInt<1>(Op.getConstantOperandVal(5)))
4476 Op, "argument out of range or not a multiple of 8", DAG)
4477 : SDValue();
4478 }
4479}
4480
4481SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
4482 SelectionDAG &DAG) const {
4483 SDLoc DL(Op);
4484 SDValue Lo = Op.getOperand(0);
4485 SDValue Hi = Op.getOperand(1);
4486 SDValue Shamt = Op.getOperand(2);
4487 EVT VT = Lo.getValueType();
4488
4489 // if Shamt-GRLen < 0: // Shamt < GRLen
4490 // Lo = Lo << Shamt
4491 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
4492 // else:
4493 // Lo = 0
4494 // Hi = Lo << (Shamt-GRLen)
4495
4496 SDValue Zero = DAG.getConstant(0, DL, VT);
4497 SDValue One = DAG.getConstant(1, DL, VT);
4498 SDValue MinusGRLen =
4499 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4500 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4501 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4502 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4503
4504 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
4505 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
4506 SDValue ShiftRightLo =
4507 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
4508 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
4509 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
4510 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
4511
4512 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4513
4514 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
4515 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4516
4517 SDValue Parts[2] = {Lo, Hi};
4518 return DAG.getMergeValues(Parts, DL);
4519}
4520
4521SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
4522 SelectionDAG &DAG,
4523 bool IsSRA) const {
4524 SDLoc DL(Op);
4525 SDValue Lo = Op.getOperand(0);
4526 SDValue Hi = Op.getOperand(1);
4527 SDValue Shamt = Op.getOperand(2);
4528 EVT VT = Lo.getValueType();
4529
4530 // SRA expansion:
4531 // if Shamt-GRLen < 0: // Shamt < GRLen
4532 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4533 // Hi = Hi >>s Shamt
4534 // else:
4535 // Lo = Hi >>s (Shamt-GRLen);
4536 // Hi = Hi >>s (GRLen-1)
4537 //
4538 // SRL expansion:
4539 // if Shamt-GRLen < 0: // Shamt < GRLen
4540 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
4541 // Hi = Hi >>u Shamt
4542 // else:
4543 // Lo = Hi >>u (Shamt-GRLen);
4544 // Hi = 0;
4545
4546 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4547
4548 SDValue Zero = DAG.getConstant(0, DL, VT);
4549 SDValue One = DAG.getConstant(1, DL, VT);
4550 SDValue MinusGRLen =
4551 DAG.getSignedConstant(-(int)Subtarget.getGRLen(), DL, VT);
4552 SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
4553 SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
4554 SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
4555
4556 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4557 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4558 SDValue ShiftLeftHi =
4559 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
4560 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4561 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4562 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
4563 SDValue HiFalse =
4564 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
4565
4566 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
4567
4568 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4569 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4570
4571 SDValue Parts[2] = {Lo, Hi};
4572 return DAG.getMergeValues(Parts, DL);
4573}
4574
4575// Returns the opcode of the target-specific SDNode that implements the 32-bit
4576// form of the given Opcode.
4577static unsigned getLoongArchWOpcode(unsigned Opcode) {
4578 switch (Opcode) {
4579 default:
4580 llvm_unreachable("Unexpected opcode");
4581 case ISD::SDIV:
4582 return LoongArchISD::DIV_W;
4583 case ISD::UDIV:
4584 return LoongArchISD::DIV_WU;
4585 case ISD::SREM:
4586 return LoongArchISD::MOD_W;
4587 case ISD::UREM:
4588 return LoongArchISD::MOD_WU;
4589 case ISD::SHL:
4590 return LoongArchISD::SLL_W;
4591 case ISD::SRA:
4592 return LoongArchISD::SRA_W;
4593 case ISD::SRL:
4594 return LoongArchISD::SRL_W;
4595 case ISD::ROTL:
4596 case ISD::ROTR:
4597 return LoongArchISD::ROTR_W;
4598 case ISD::CTTZ:
4599 return LoongArchISD::CTZ_W;
4600 case ISD::CTLZ:
4601 return LoongArchISD::CLZ_W;
4602 }
4603}
4604
4605// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
4606// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
4607// otherwise be promoted to i64, making it difficult to select the
4608// SLL_W/.../*W later one because the fact the operation was originally of
4609// type i8/i16/i32 is lost.
4611 unsigned ExtOpc = ISD::ANY_EXTEND) {
4612 SDLoc DL(N);
4613 unsigned WOpcode = getLoongArchWOpcode(N->getOpcode());
4614 SDValue NewOp0, NewRes;
4615
4616 switch (NumOp) {
4617 default:
4618 llvm_unreachable("Unexpected NumOp");
4619 case 1: {
4620 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4621 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
4622 break;
4623 }
4624 case 2: {
4625 NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
4626 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
4627 if (N->getOpcode() == ISD::ROTL) {
4628 SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
4629 NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
4630 }
4631 NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
4632 break;
4633 }
4634 // TODO:Handle more NumOp.
4635 }
4636
4637 // ReplaceNodeResults requires we maintain the same type for the return
4638 // value.
4639 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
4640}
4641
4642// Converts the given 32-bit operation to a i64 operation with signed extension
4643// semantic to reduce the signed extension instructions.
4645 SDLoc DL(N);
4646 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
4647 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
4648 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
4649 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
4650 DAG.getValueType(MVT::i32));
4651 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
4652}
4653
4654// Helper function that emits error message for intrinsics with/without chain
4655// and return a UNDEF or and the chain as the results.
4658 StringRef ErrorMsg, bool WithChain = true) {
4659 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
4660 Results.push_back(DAG.getUNDEF(N->getValueType(0)));
4661 if (!WithChain)
4662 return;
4663 Results.push_back(N->getOperand(0));
4664}
4665
4666template <unsigned N>
4667static void
4669 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
4670 unsigned ResOp) {
4671 const StringRef ErrorMsgOOR = "argument out of range";
4672 unsigned Imm = Node->getConstantOperandVal(2);
4673 if (!isUInt<N>(Imm)) {
4675 /*WithChain=*/false);
4676 return;
4677 }
4678 SDLoc DL(Node);
4679 SDValue Vec = Node->getOperand(1);
4680
4681 SDValue PickElt =
4682 DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
4683 DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
4685 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
4686 PickElt.getValue(0)));
4687}
4688
4691 SelectionDAG &DAG,
4692 const LoongArchSubtarget &Subtarget,
4693 unsigned ResOp) {
4694 SDLoc DL(N);
4695 SDValue Vec = N->getOperand(1);
4696
4697 SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
4698 Results.push_back(
4699 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
4700}
4701
4702static void
4704 SelectionDAG &DAG,
4705 const LoongArchSubtarget &Subtarget) {
4706 switch (N->getConstantOperandVal(0)) {
4707 default:
4708 llvm_unreachable("Unexpected Intrinsic.");
4709 case Intrinsic::loongarch_lsx_vpickve2gr_b:
4710 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4711 LoongArchISD::VPICK_SEXT_ELT);
4712 break;
4713 case Intrinsic::loongarch_lsx_vpickve2gr_h:
4714 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
4715 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4716 LoongArchISD::VPICK_SEXT_ELT);
4717 break;
4718 case Intrinsic::loongarch_lsx_vpickve2gr_w:
4719 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4720 LoongArchISD::VPICK_SEXT_ELT);
4721 break;
4722 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
4723 replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
4724 LoongArchISD::VPICK_ZEXT_ELT);
4725 break;
4726 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
4727 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
4728 replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
4729 LoongArchISD::VPICK_ZEXT_ELT);
4730 break;
4731 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
4732 replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
4733 LoongArchISD::VPICK_ZEXT_ELT);
4734 break;
4735 case Intrinsic::loongarch_lsx_bz_b:
4736 case Intrinsic::loongarch_lsx_bz_h:
4737 case Intrinsic::loongarch_lsx_bz_w:
4738 case Intrinsic::loongarch_lsx_bz_d:
4739 case Intrinsic::loongarch_lasx_xbz_b:
4740 case Intrinsic::loongarch_lasx_xbz_h:
4741 case Intrinsic::loongarch_lasx_xbz_w:
4742 case Intrinsic::loongarch_lasx_xbz_d:
4743 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4744 LoongArchISD::VALL_ZERO);
4745 break;
4746 case Intrinsic::loongarch_lsx_bz_v:
4747 case Intrinsic::loongarch_lasx_xbz_v:
4748 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4749 LoongArchISD::VANY_ZERO);
4750 break;
4751 case Intrinsic::loongarch_lsx_bnz_b:
4752 case Intrinsic::loongarch_lsx_bnz_h:
4753 case Intrinsic::loongarch_lsx_bnz_w:
4754 case Intrinsic::loongarch_lsx_bnz_d:
4755 case Intrinsic::loongarch_lasx_xbnz_b:
4756 case Intrinsic::loongarch_lasx_xbnz_h:
4757 case Intrinsic::loongarch_lasx_xbnz_w:
4758 case Intrinsic::loongarch_lasx_xbnz_d:
4759 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4760 LoongArchISD::VALL_NONZERO);
4761 break;
4762 case Intrinsic::loongarch_lsx_bnz_v:
4763 case Intrinsic::loongarch_lasx_xbnz_v:
4764 replaceVecCondBranchResults(N, Results, DAG, Subtarget,
4765 LoongArchISD::VANY_NONZERO);
4766 break;
4767 }
4768}
4769
4772 SelectionDAG &DAG) {
4773 assert(N->getValueType(0) == MVT::i128 &&
4774 "AtomicCmpSwap on types less than 128 should be legal");
4775 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4776
4777 unsigned Opcode;
4778 switch (MemOp->getMergedOrdering()) {
4782 Opcode = LoongArch::PseudoCmpXchg128Acquire;
4783 break;
4786 Opcode = LoongArch::PseudoCmpXchg128;
4787 break;
4788 default:
4789 llvm_unreachable("Unexpected ordering!");
4790 }
4791
4792 SDLoc DL(N);
4793 auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
4794 auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
4795 SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second,
4796 NewVal.first, NewVal.second, N->getOperand(0)};
4797
4798 SDNode *CmpSwap = DAG.getMachineNode(
4799 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other),
4800 Ops);
4801 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4802 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
4803 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
4804 Results.push_back(SDValue(CmpSwap, 3));
4805}
4806
4809 SDLoc DL(N);
4810 EVT VT = N->getValueType(0);
4811 switch (N->getOpcode()) {
4812 default:
4813 llvm_unreachable("Don't know how to legalize this operation");
4814 case ISD::ADD:
4815 case ISD::SUB:
4816 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
4817 "Unexpected custom legalisation");
4818 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
4819 break;
4820 case ISD::SDIV:
4821 case ISD::UDIV:
4822 case ISD::SREM:
4823 case ISD::UREM:
4824 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4825 "Unexpected custom legalisation");
4826 Results.push_back(customLegalizeToWOp(N, DAG, 2,
4827 Subtarget.hasDiv32() && VT == MVT::i32
4829 : ISD::SIGN_EXTEND));
4830 break;
4831 case ISD::SHL:
4832 case ISD::SRA:
4833 case ISD::SRL:
4834 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4835 "Unexpected custom legalisation");
4836 if (N->getOperand(1).getOpcode() != ISD::Constant) {
4837 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4838 break;
4839 }
4840 break;
4841 case ISD::ROTL:
4842 case ISD::ROTR:
4843 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4844 "Unexpected custom legalisation");
4845 Results.push_back(customLegalizeToWOp(N, DAG, 2));
4846 break;
4847 case ISD::FP_TO_SINT: {
4848 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4849 "Unexpected custom legalisation");
4850 SDValue Src = N->getOperand(0);
4851 EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
4852 if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
4854 if (!isTypeLegal(Src.getValueType()))
4855 return;
4856 if (Src.getValueType() == MVT::f16)
4857 Src = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
4858 SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
4859 Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
4860 return;
4861 }
4862 // If the FP type needs to be softened, emit a library call using the 'si'
4863 // version. If we left it to default legalization we'd end up with 'di'.
4864 RTLIB::Libcall LC;
4865 LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
4866 MakeLibCallOptions CallOptions;
4867 EVT OpVT = Src.getValueType();
4868 CallOptions.setTypeListBeforeSoften(OpVT, VT);
4869 SDValue Chain = SDValue();
4870 SDValue Result;
4871 std::tie(Result, Chain) =
4872 makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
4873 Results.push_back(Result);
4874 break;
4875 }
4876 case ISD::BITCAST: {
4877 SDValue Src = N->getOperand(0);
4878 EVT SrcVT = Src.getValueType();
4879 if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
4880 Subtarget.hasBasicF()) {
4881 SDValue Dst =
4882 DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
4883 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
4884 } else if (VT == MVT::i64 && SrcVT == MVT::f64 && !Subtarget.is64Bit()) {
4885 SDValue NewReg = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
4886 DAG.getVTList(MVT::i32, MVT::i32), Src);
4887 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
4888 NewReg.getValue(0), NewReg.getValue(1));
4889 Results.push_back(RetReg);
4890 }
4891 break;
4892 }
4893 case ISD::FP_TO_UINT: {
4894 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4895 "Unexpected custom legalisation");
4896 auto &TLI = DAG.getTargetLoweringInfo();
4897 SDValue Tmp1, Tmp2;
4898 TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
4899 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
4900 break;
4901 }
4902 case ISD::BSWAP: {
4903 SDValue Src = N->getOperand(0);
4904 assert((VT == MVT::i16 || VT == MVT::i32) &&
4905 "Unexpected custom legalization");
4906 MVT GRLenVT = Subtarget.getGRLenVT();
4907 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4908 SDValue Tmp;
4909 switch (VT.getSizeInBits()) {
4910 default:
4911 llvm_unreachable("Unexpected operand width");
4912 case 16:
4913 Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
4914 break;
4915 case 32:
4916 // Only LA64 will get to here due to the size mismatch between VT and
4917 // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
4918 Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
4919 break;
4920 }
4921 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4922 break;
4923 }
4924 case ISD::BITREVERSE: {
4925 SDValue Src = N->getOperand(0);
4926 assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
4927 "Unexpected custom legalization");
4928 MVT GRLenVT = Subtarget.getGRLenVT();
4929 SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
4930 SDValue Tmp;
4931 switch (VT.getSizeInBits()) {
4932 default:
4933 llvm_unreachable("Unexpected operand width");
4934 case 8:
4935 Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
4936 break;
4937 case 32:
4938 Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
4939 break;
4940 }
4941 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
4942 break;
4943 }
4944 case ISD::CTLZ:
4945 case ISD::CTTZ: {
4946 assert(VT == MVT::i32 && Subtarget.is64Bit() &&
4947 "Unexpected custom legalisation");
4948 Results.push_back(customLegalizeToWOp(N, DAG, 1));
4949 break;
4950 }
4952 SDValue Chain = N->getOperand(0);
4953 SDValue Op2 = N->getOperand(2);
4954 MVT GRLenVT = Subtarget.getGRLenVT();
4955 const StringRef ErrorMsgOOR = "argument out of range";
4956 const StringRef ErrorMsgReqLA64 = "requires loongarch64";
4957 const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
4958
4959 switch (N->getConstantOperandVal(1)) {
4960 default:
4961 llvm_unreachable("Unexpected Intrinsic.");
4962 case Intrinsic::loongarch_movfcsr2gr: {
4963 if (!Subtarget.hasBasicF()) {
4964 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
4965 return;
4966 }
4967 unsigned Imm = Op2->getAsZExtVal();
4968 if (!isUInt<2>(Imm)) {
4969 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
4970 return;
4971 }
4972 SDValue MOVFCSR2GRResults = DAG.getNode(
4973 LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
4974 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
4975 Results.push_back(
4976 DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
4977 Results.push_back(MOVFCSR2GRResults.getValue(1));
4978 break;
4979 }
4980#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
4981 case Intrinsic::loongarch_##NAME: { \
4982 SDValue NODE = DAG.getNode( \
4983 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
4984 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
4985 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
4986 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
4987 Results.push_back(NODE.getValue(1)); \
4988 break; \
4989 }
4990 CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
4991 CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
4992 CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
4993 CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
4994 CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
4995 CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
4996#undef CRC_CASE_EXT_BINARYOP
4997
4998#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
4999 case Intrinsic::loongarch_##NAME: { \
5000 SDValue NODE = DAG.getNode( \
5001 LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5002 {Chain, Op2, \
5003 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
5004 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
5005 Results.push_back(NODE.getValue(1)); \
5006 break; \
5007 }
5008 CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
5009 CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
5010#undef CRC_CASE_EXT_UNARYOP
5011#define CSR_CASE(ID) \
5012 case Intrinsic::loongarch_##ID: { \
5013 if (!Subtarget.is64Bit()) \
5014 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
5015 break; \
5016 }
5017 CSR_CASE(csrrd_d);
5018 CSR_CASE(csrwr_d);
5019 CSR_CASE(csrxchg_d);
5020 CSR_CASE(iocsrrd_d);
5021#undef CSR_CASE
5022 case Intrinsic::loongarch_csrrd_w: {
5023 unsigned Imm = Op2->getAsZExtVal();
5024 if (!isUInt<14>(Imm)) {
5025 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5026 return;
5027 }
5028 SDValue CSRRDResults =
5029 DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
5030 {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
5031 Results.push_back(
5032 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
5033 Results.push_back(CSRRDResults.getValue(1));
5034 break;
5035 }
5036 case Intrinsic::loongarch_csrwr_w: {
5037 unsigned Imm = N->getConstantOperandVal(3);
5038 if (!isUInt<14>(Imm)) {
5039 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5040 return;
5041 }
5042 SDValue CSRWRResults =
5043 DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
5044 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5045 DAG.getConstant(Imm, DL, GRLenVT)});
5046 Results.push_back(
5047 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
5048 Results.push_back(CSRWRResults.getValue(1));
5049 break;
5050 }
5051 case Intrinsic::loongarch_csrxchg_w: {
5052 unsigned Imm = N->getConstantOperandVal(4);
5053 if (!isUInt<14>(Imm)) {
5054 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
5055 return;
5056 }
5057 SDValue CSRXCHGResults = DAG.getNode(
5058 LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
5059 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
5060 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
5061 DAG.getConstant(Imm, DL, GRLenVT)});
5062 Results.push_back(
5063 DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
5064 Results.push_back(CSRXCHGResults.getValue(1));
5065 break;
5066 }
5067#define IOCSRRD_CASE(NAME, NODE) \
5068 case Intrinsic::loongarch_##NAME: { \
5069 SDValue IOCSRRDResults = \
5070 DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
5071 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
5072 Results.push_back( \
5073 DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
5074 Results.push_back(IOCSRRDResults.getValue(1)); \
5075 break; \
5076 }
5077 IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
5078 IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
5079 IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
5080#undef IOCSRRD_CASE
5081 case Intrinsic::loongarch_cpucfg: {
5082 SDValue CPUCFGResults =
5083 DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
5084 {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
5085 Results.push_back(
5086 DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
5087 Results.push_back(CPUCFGResults.getValue(1));
5088 break;
5089 }
5090 case Intrinsic::loongarch_lddir_d: {
5091 if (!Subtarget.is64Bit()) {
5092 emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
5093 return;
5094 }
5095 break;
5096 }
5097 }
5098 break;
5099 }
5100 case ISD::READ_REGISTER: {
5101 if (Subtarget.is64Bit())
5102 DAG.getContext()->emitError(
5103 "On LA64, only 64-bit registers can be read.");
5104 else
5105 DAG.getContext()->emitError(
5106 "On LA32, only 32-bit registers can be read.");
5107 Results.push_back(DAG.getUNDEF(VT));
5108 Results.push_back(N->getOperand(0));
5109 break;
5110 }
5112 replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
5113 break;
5114 }
5115 case ISD::LROUND: {
5116 SDValue Op0 = N->getOperand(0);
5117 EVT OpVT = Op0.getValueType();
5118 RTLIB::Libcall LC =
5119 OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
5120 MakeLibCallOptions CallOptions;
5121 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
5122 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
5123 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
5124 Results.push_back(Result);
5125 break;
5126 }
5127 case ISD::ATOMIC_CMP_SWAP: {
5129 break;
5130 }
5131 case ISD::TRUNCATE: {
5132 MVT VT = N->getSimpleValueType(0);
5133 if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector)
5134 return;
5135
5136 MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
5137 SDValue In = N->getOperand(0);
5138 EVT InVT = In.getValueType();
5139 EVT InEltVT = InVT.getVectorElementType();
5140 EVT EltVT = VT.getVectorElementType();
5141 unsigned MinElts = VT.getVectorNumElements();
5142 unsigned WidenNumElts = WidenVT.getVectorNumElements();
5143 unsigned InBits = InVT.getSizeInBits();
5144
5145 if ((128 % InBits) == 0 && WidenVT.is128BitVector()) {
5146 if ((InEltVT.getSizeInBits() % EltVT.getSizeInBits()) == 0) {
5147 int Scale = InEltVT.getSizeInBits() / EltVT.getSizeInBits();
5148 SmallVector<int, 16> TruncMask(WidenNumElts, -1);
5149 for (unsigned I = 0; I < MinElts; ++I)
5150 TruncMask[I] = Scale * I;
5151
5152 unsigned WidenNumElts = 128 / In.getScalarValueSizeInBits();
5153 MVT SVT = In.getSimpleValueType().getScalarType();
5154 MVT VT = MVT::getVectorVT(SVT, WidenNumElts);
5155 SDValue WidenIn =
5156 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), In,
5157 DAG.getVectorIdxConstant(0, DL));
5158 assert(isTypeLegal(WidenVT) && isTypeLegal(WidenIn.getValueType()) &&
5159 "Illegal vector type in truncation");
5160 WidenIn = DAG.getBitcast(WidenVT, WidenIn);
5161 Results.push_back(
5162 DAG.getVectorShuffle(WidenVT, DL, WidenIn, WidenIn, TruncMask));
5163 return;
5164 }
5165 }
5166
5167 break;
5168 }
5169 }
5170}
5171
5172/// Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
5174 SelectionDAG &DAG) {
5175 assert(N->getOpcode() == ISD::AND && "Unexpected opcode combine into ANDN");
5176
5177 MVT VT = N->getSimpleValueType(0);
5178 if (!VT.is128BitVector() && !VT.is256BitVector())
5179 return SDValue();
5180
5181 SDValue X, Y;
5182 SDValue N0 = N->getOperand(0);
5183 SDValue N1 = N->getOperand(1);
5184
5185 if (SDValue Not = isNOT(N0, DAG)) {
5186 X = Not;
5187 Y = N1;
5188 } else if (SDValue Not = isNOT(N1, DAG)) {
5189 X = Not;
5190 Y = N0;
5191 } else
5192 return SDValue();
5193
5194 X = DAG.getBitcast(VT, X);
5195 Y = DAG.getBitcast(VT, Y);
5196 return DAG.getNode(LoongArchISD::VANDN, DL, VT, X, Y);
5197}
5198
5201 const LoongArchSubtarget &Subtarget) {
5202 if (DCI.isBeforeLegalizeOps())
5203 return SDValue();
5204
5205 SDValue FirstOperand = N->getOperand(0);
5206 SDValue SecondOperand = N->getOperand(1);
5207 unsigned FirstOperandOpc = FirstOperand.getOpcode();
5208 EVT ValTy = N->getValueType(0);
5209 SDLoc DL(N);
5210 uint64_t lsb, msb;
5211 unsigned SMIdx, SMLen;
5212 ConstantSDNode *CN;
5213 SDValue NewOperand;
5214 MVT GRLenVT = Subtarget.getGRLenVT();
5215
5216 if (SDValue R = combineAndNotIntoVANDN(N, DL, DAG))
5217 return R;
5218
5219 // BSTRPICK requires the 32S feature.
5220 if (!Subtarget.has32S())
5221 return SDValue();
5222
5223 // Op's second operand must be a shifted mask.
5224 if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
5225 !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
5226 return SDValue();
5227
5228 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
5229 // Pattern match BSTRPICK.
5230 // $dst = and ((sra or srl) $src , lsb), (2**len - 1)
5231 // => BSTRPICK $dst, $src, msb, lsb
5232 // where msb = lsb + len - 1
5233
5234 // The second operand of the shift must be an immediate.
5235 if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
5236 return SDValue();
5237
5238 lsb = CN->getZExtValue();
5239
5240 // Return if the shifted mask does not start at bit 0 or the sum of its
5241 // length and lsb exceeds the word's size.
5242 if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
5243 return SDValue();
5244
5245 NewOperand = FirstOperand.getOperand(0);
5246 } else {
5247 // Pattern match BSTRPICK.
5248 // $dst = and $src, (2**len- 1) , if len > 12
5249 // => BSTRPICK $dst, $src, msb, lsb
5250 // where lsb = 0 and msb = len - 1
5251
5252 // If the mask is <= 0xfff, andi can be used instead.
5253 if (CN->getZExtValue() <= 0xfff)
5254 return SDValue();
5255
5256 // Return if the MSB exceeds.
5257 if (SMIdx + SMLen > ValTy.getSizeInBits())
5258 return SDValue();
5259
5260 if (SMIdx > 0) {
5261 // Omit if the constant has more than 2 uses. This a conservative
5262 // decision. Whether it is a win depends on the HW microarchitecture.
5263 // However it should always be better for 1 and 2 uses.
5264 if (CN->use_size() > 2)
5265 return SDValue();
5266 // Return if the constant can be composed by a single LU12I.W.
5267 if ((CN->getZExtValue() & 0xfff) == 0)
5268 return SDValue();
5269 // Return if the constand can be composed by a single ADDI with
5270 // the zero register.
5271 if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
5272 return SDValue();
5273 }
5274
5275 lsb = SMIdx;
5276 NewOperand = FirstOperand;
5277 }
5278
5279 msb = lsb + SMLen - 1;
5280 SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
5281 DAG.getConstant(msb, DL, GRLenVT),
5282 DAG.getConstant(lsb, DL, GRLenVT));
5283 if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
5284 return NR0;
5285 // Try to optimize to
5286 // bstrpick $Rd, $Rs, msb, lsb
5287 // slli $Rd, $Rd, lsb
5288 return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
5289 DAG.getConstant(lsb, DL, GRLenVT));
5290}
5291
5294 const LoongArchSubtarget &Subtarget) {
5295 // BSTRPICK requires the 32S feature.
5296 if (!Subtarget.has32S())
5297 return SDValue();
5298
5299 if (DCI.isBeforeLegalizeOps())
5300 return SDValue();
5301
5302 // $dst = srl (and $src, Mask), Shamt
5303 // =>
5304 // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
5305 // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
5306 //
5307
5308 SDValue FirstOperand = N->getOperand(0);
5309 ConstantSDNode *CN;
5310 EVT ValTy = N->getValueType(0);
5311 SDLoc DL(N);
5312 MVT GRLenVT = Subtarget.getGRLenVT();
5313 unsigned MaskIdx, MaskLen;
5314 uint64_t Shamt;
5315
5316 // The first operand must be an AND and the second operand of the AND must be
5317 // a shifted mask.
5318 if (FirstOperand.getOpcode() != ISD::AND ||
5319 !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
5320 !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
5321 return SDValue();
5322
5323 // The second operand (shift amount) must be an immediate.
5324 if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
5325 return SDValue();
5326
5327 Shamt = CN->getZExtValue();
5328 if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
5329 return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
5330 FirstOperand->getOperand(0),
5331 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5332 DAG.getConstant(Shamt, DL, GRLenVT));
5333
5334 return SDValue();
5335}
5336
5337// Helper to peek through bitops/trunc/setcc to determine size of source vector.
5338// Allows BITCASTCombine to determine what size vector generated a <X x i1>.
5339static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
5340 unsigned Depth) {
5341 // Limit recursion.
5343 return false;
5344 switch (Src.getOpcode()) {
5345 case ISD::SETCC:
5346 case ISD::TRUNCATE:
5347 return Src.getOperand(0).getValueSizeInBits() == Size;
5348 case ISD::FREEZE:
5349 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1);
5350 case ISD::AND:
5351 case ISD::XOR:
5352 case ISD::OR:
5353 return checkBitcastSrcVectorSize(Src.getOperand(0), Size, Depth + 1) &&
5354 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1);
5355 case ISD::SELECT:
5356 case ISD::VSELECT:
5357 return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
5358 checkBitcastSrcVectorSize(Src.getOperand(1), Size, Depth + 1) &&
5359 checkBitcastSrcVectorSize(Src.getOperand(2), Size, Depth + 1);
5360 case ISD::BUILD_VECTOR:
5361 return ISD::isBuildVectorAllZeros(Src.getNode()) ||
5362 ISD::isBuildVectorAllOnes(Src.getNode());
5363 }
5364 return false;
5365}
5366
5367// Helper to push sign extension of vXi1 SETCC result through bitops.
5369 SDValue Src, const SDLoc &DL) {
5370 switch (Src.getOpcode()) {
5371 case ISD::SETCC:
5372 case ISD::FREEZE:
5373 case ISD::TRUNCATE:
5374 case ISD::BUILD_VECTOR:
5375 return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5376 case ISD::AND:
5377 case ISD::XOR:
5378 case ISD::OR:
5379 return DAG.getNode(
5380 Src.getOpcode(), DL, SExtVT,
5381 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
5382 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
5383 case ISD::SELECT:
5384 case ISD::VSELECT:
5385 return DAG.getSelect(
5386 DL, SExtVT, Src.getOperand(0),
5387 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL),
5388 signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(2), DL));
5389 }
5390 llvm_unreachable("Unexpected node type for vXi1 sign extension");
5391}
5392
5393static SDValue
5396 const LoongArchSubtarget &Subtarget) {
5397 SDLoc DL(N);
5398 EVT VT = N->getValueType(0);
5399 SDValue Src = N->getOperand(0);
5400 EVT SrcVT = Src.getValueType();
5401
5402 if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
5403 return SDValue();
5404
5405 bool UseLASX;
5406 unsigned Opc = ISD::DELETED_NODE;
5407 EVT CmpVT = Src.getOperand(0).getValueType();
5408 EVT EltVT = CmpVT.getVectorElementType();
5409
5410 if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() == 128)
5411 UseLASX = false;
5412 else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
5413 CmpVT.getSizeInBits() == 256)
5414 UseLASX = true;
5415 else
5416 return SDValue();
5417
5418 SDValue SrcN1 = Src.getOperand(1);
5419 switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
5420 default:
5421 break;
5422 case ISD::SETEQ:
5423 // x == 0 => not (vmsknez.b x)
5424 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5425 Opc = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
5426 break;
5427 case ISD::SETGT:
5428 // x > -1 => vmskgez.b x
5429 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
5430 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5431 break;
5432 case ISD::SETGE:
5433 // x >= 0 => vmskgez.b x
5434 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5435 Opc = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
5436 break;
5437 case ISD::SETLT:
5438 // x < 0 => vmskltz.{b,h,w,d} x
5439 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
5440 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5441 EltVT == MVT::i64))
5442 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5443 break;
5444 case ISD::SETLE:
5445 // x <= -1 => vmskltz.{b,h,w,d} x
5446 if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
5447 (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
5448 EltVT == MVT::i64))
5449 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5450 break;
5451 case ISD::SETNE:
5452 // x != 0 => vmsknez.b x
5453 if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
5454 Opc = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
5455 break;
5456 }
5457
5458 if (Opc == ISD::DELETED_NODE)
5459 return SDValue();
5460
5461 SDValue V = DAG.getNode(Opc, DL, Subtarget.getGRLenVT(), Src.getOperand(0));
5463 V = DAG.getZExtOrTrunc(V, DL, T);
5464 return DAG.getBitcast(VT, V);
5465}
5466
5469 const LoongArchSubtarget &Subtarget) {
5470 SDLoc DL(N);
5471 EVT VT = N->getValueType(0);
5472 SDValue Src = N->getOperand(0);
5473 EVT SrcVT = Src.getValueType();
5474 MVT GRLenVT = Subtarget.getGRLenVT();
5475
5476 if (!DCI.isBeforeLegalizeOps())
5477 return SDValue();
5478
5479 if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
5480 return SDValue();
5481
5482 // Combine SETCC and BITCAST into [X]VMSK{LT,GE,NE} when possible
5483 SDValue Res = performSETCC_BITCASTCombine(N, DAG, DCI, Subtarget);
5484 if (Res)
5485 return Res;
5486
5487 // Generate vXi1 using [X]VMSKLTZ
5488 MVT SExtVT;
5489 unsigned Opc;
5490 bool UseLASX = false;
5491 bool PropagateSExt = false;
5492
5493 if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse()) {
5494 EVT CmpVT = Src.getOperand(0).getValueType();
5495 if (CmpVT.getSizeInBits() > 256)
5496 return SDValue();
5497 }
5498
5499 switch (SrcVT.getSimpleVT().SimpleTy) {
5500 default:
5501 return SDValue();
5502 case MVT::v2i1:
5503 SExtVT = MVT::v2i64;
5504 break;
5505 case MVT::v4i1:
5506 SExtVT = MVT::v4i32;
5507 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5508 SExtVT = MVT::v4i64;
5509 UseLASX = true;
5510 PropagateSExt = true;
5511 }
5512 break;
5513 case MVT::v8i1:
5514 SExtVT = MVT::v8i16;
5515 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5516 SExtVT = MVT::v8i32;
5517 UseLASX = true;
5518 PropagateSExt = true;
5519 }
5520 break;
5521 case MVT::v16i1:
5522 SExtVT = MVT::v16i8;
5523 if (Subtarget.hasExtLASX() && checkBitcastSrcVectorSize(Src, 256, 0)) {
5524 SExtVT = MVT::v16i16;
5525 UseLASX = true;
5526 PropagateSExt = true;
5527 }
5528 break;
5529 case MVT::v32i1:
5530 SExtVT = MVT::v32i8;
5531 UseLASX = true;
5532 break;
5533 };
5534 Src = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL)
5535 : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src);
5536
5537 SDValue V;
5538 if (!Subtarget.has32S() || !Subtarget.hasExtLASX()) {
5539 if (Src.getSimpleValueType() == MVT::v32i8) {
5540 SDValue Lo, Hi;
5541 std::tie(Lo, Hi) = DAG.SplitVector(Src, DL);
5542 Lo = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Lo);
5543 Hi = DAG.getNode(LoongArchISD::VMSKLTZ, DL, GRLenVT, Hi);
5544 Hi = DAG.getNode(ISD::SHL, DL, GRLenVT, Hi,
5545 DAG.getConstant(16, DL, MVT::i8));
5546 V = DAG.getNode(ISD::OR, DL, GRLenVT, Lo, Hi);
5547 } else if (UseLASX) {
5548 return SDValue();
5549 }
5550 }
5551
5552 if (!V) {
5553 Opc = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
5554 V = DAG.getNode(Opc, DL, GRLenVT, Src);
5555 }
5556
5558 V = DAG.getZExtOrTrunc(V, DL, T);
5559 return DAG.getBitcast(VT, V);
5560}
5561
5564 const LoongArchSubtarget &Subtarget) {
5565 MVT GRLenVT = Subtarget.getGRLenVT();
5566 EVT ValTy = N->getValueType(0);
5567 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5568 ConstantSDNode *CN0, *CN1;
5569 SDLoc DL(N);
5570 unsigned ValBits = ValTy.getSizeInBits();
5571 unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
5572 unsigned Shamt;
5573 bool SwapAndRetried = false;
5574
5575 // BSTRPICK requires the 32S feature.
5576 if (!Subtarget.has32S())
5577 return SDValue();
5578
5579 if (DCI.isBeforeLegalizeOps())
5580 return SDValue();
5581
5582 if (ValBits != 32 && ValBits != 64)
5583 return SDValue();
5584
5585Retry:
5586 // 1st pattern to match BSTRINS:
5587 // R = or (and X, mask0), (and (shl Y, lsb), mask1)
5588 // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
5589 // =>
5590 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5591 if (N0.getOpcode() == ISD::AND &&
5592 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5593 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5594 N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
5595 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5596 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5597 MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
5598 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5599 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5600 (MaskIdx0 + MaskLen0 <= ValBits)) {
5601 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
5602 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5603 N1.getOperand(0).getOperand(0),
5604 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5605 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5606 }
5607
5608 // 2nd pattern to match BSTRINS:
5609 // R = or (and X, mask0), (shl (and Y, mask1), lsb)
5610 // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
5611 // =>
5612 // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
5613 if (N0.getOpcode() == ISD::AND &&
5614 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5615 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5616 N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5617 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5618 (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
5619 (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5620 isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
5621 MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
5622 (MaskIdx0 + MaskLen0 <= ValBits)) {
5623 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
5624 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5625 N1.getOperand(0).getOperand(0),
5626 DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
5627 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5628 }
5629
5630 // 3rd pattern to match BSTRINS:
5631 // R = or (and X, mask0), (and Y, mask1)
5632 // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
5633 // =>
5634 // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
5635 // where msb = lsb + size - 1
5636 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5637 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5638 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5639 (MaskIdx0 + MaskLen0 <= 64) &&
5640 (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
5641 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5642 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
5643 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5644 DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
5645 DAG.getConstant(MaskIdx0, DL, GRLenVT)),
5646 DAG.getConstant(ValBits == 32
5647 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5648 : (MaskIdx0 + MaskLen0 - 1),
5649 DL, GRLenVT),
5650 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5651 }
5652
5653 // 4th pattern to match BSTRINS:
5654 // R = or (and X, mask), (shl Y, shamt)
5655 // where mask = (2**shamt - 1)
5656 // =>
5657 // R = BSTRINS X, Y, ValBits - 1, shamt
5658 // where ValBits = 32 or 64
5659 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
5660 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5661 isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
5662 MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5663 (Shamt = CN1->getZExtValue()) == MaskLen0 &&
5664 (MaskIdx0 + MaskLen0 <= ValBits)) {
5665 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
5666 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5667 N1.getOperand(0),
5668 DAG.getConstant((ValBits - 1), DL, GRLenVT),
5669 DAG.getConstant(Shamt, DL, GRLenVT));
5670 }
5671
5672 // 5th pattern to match BSTRINS:
5673 // R = or (and X, mask), const
5674 // where ~mask = (2**size - 1) << lsb, mask & const = 0
5675 // =>
5676 // R = BSTRINS X, (const >> lsb), msb, lsb
5677 // where msb = lsb + size - 1
5678 if (N0.getOpcode() == ISD::AND &&
5679 (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
5680 isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
5681 (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
5682 (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
5683 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
5684 return DAG.getNode(
5685 LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
5686 DAG.getSignedConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
5687 DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
5688 : (MaskIdx0 + MaskLen0 - 1),
5689 DL, GRLenVT),
5690 DAG.getConstant(MaskIdx0, DL, GRLenVT));
5691 }
5692
5693 // 6th pattern.
5694 // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
5695 // by the incoming bits are known to be zero.
5696 // =>
5697 // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
5698 //
5699 // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
5700 // pattern is more common than the 1st. So we put the 1st before the 6th in
5701 // order to match as many nodes as possible.
5702 ConstantSDNode *CNMask, *CNShamt;
5703 unsigned MaskIdx, MaskLen;
5704 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
5705 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5706 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5707 MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5708 CNShamt->getZExtValue() + MaskLen <= ValBits) {
5709 Shamt = CNShamt->getZExtValue();
5710 APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
5711 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5712 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
5713 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5714 N1.getOperand(0).getOperand(0),
5715 DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
5716 DAG.getConstant(Shamt, DL, GRLenVT));
5717 }
5718 }
5719
5720 // 7th pattern.
5721 // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
5722 // overwritten by the incoming bits are known to be zero.
5723 // =>
5724 // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
5725 //
5726 // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
5727 // before the 7th in order to match as many nodes as possible.
5728 if (N1.getOpcode() == ISD::AND &&
5729 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5730 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
5731 N1.getOperand(0).getOpcode() == ISD::SHL &&
5732 (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
5733 CNShamt->getZExtValue() == MaskIdx) {
5734 APInt ShMask(ValBits, CNMask->getZExtValue());
5735 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5736 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
5737 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5738 N1.getOperand(0).getOperand(0),
5739 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5740 DAG.getConstant(MaskIdx, DL, GRLenVT));
5741 }
5742 }
5743
5744 // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
5745 if (!SwapAndRetried) {
5746 std::swap(N0, N1);
5747 SwapAndRetried = true;
5748 goto Retry;
5749 }
5750
5751 SwapAndRetried = false;
5752Retry2:
5753 // 8th pattern.
5754 // a = b | (c & shifted_mask), where all positions in b to be overwritten by
5755 // the incoming bits are known to be zero.
5756 // =>
5757 // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
5758 //
5759 // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
5760 // we put it here in order to match as many nodes as possible or generate less
5761 // instructions.
5762 if (N1.getOpcode() == ISD::AND &&
5763 (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
5764 isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
5765 APInt ShMask(ValBits, CNMask->getZExtValue());
5766 if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
5767 LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
5768 return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
5769 DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
5770 N1->getOperand(0),
5771 DAG.getConstant(MaskIdx, DL, GRLenVT)),
5772 DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
5773 DAG.getConstant(MaskIdx, DL, GRLenVT));
5774 }
5775 }
5776 // Swap N0/N1 and retry.
5777 if (!SwapAndRetried) {
5778 std::swap(N0, N1);
5779 SwapAndRetried = true;
5780 goto Retry2;
5781 }
5782
5783 return SDValue();
5784}
5785
5786static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
5787 ExtType = ISD::NON_EXTLOAD;
5788
5789 switch (V.getNode()->getOpcode()) {
5790 case ISD::LOAD: {
5791 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
5792 if ((LoadNode->getMemoryVT() == MVT::i8) ||
5793 (LoadNode->getMemoryVT() == MVT::i16)) {
5794 ExtType = LoadNode->getExtensionType();
5795 return true;
5796 }
5797 return false;
5798 }
5799 case ISD::AssertSext: {
5800 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5801 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5802 ExtType = ISD::SEXTLOAD;
5803 return true;
5804 }
5805 return false;
5806 }
5807 case ISD::AssertZext: {
5808 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
5809 if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
5810 ExtType = ISD::ZEXTLOAD;
5811 return true;
5812 }
5813 return false;
5814 }
5815 default:
5816 return false;
5817 }
5818
5819 return false;
5820}
5821
5822// Eliminate redundant truncation and zero-extension nodes.
5823// * Case 1:
5824// +------------+ +------------+ +------------+
5825// | Input1 | | Input2 | | CC |
5826// +------------+ +------------+ +------------+
5827// | | |
5828// V V +----+
5829// +------------+ +------------+ |
5830// | TRUNCATE | | TRUNCATE | |
5831// +------------+ +------------+ |
5832// | | |
5833// V V |
5834// +------------+ +------------+ |
5835// | ZERO_EXT | | ZERO_EXT | |
5836// +------------+ +------------+ |
5837// | | |
5838// | +-------------+ |
5839// V V | |
5840// +----------------+ | |
5841// | AND | | |
5842// +----------------+ | |
5843// | | |
5844// +---------------+ | |
5845// | | |
5846// V V V
5847// +-------------+
5848// | CMP |
5849// +-------------+
5850// * Case 2:
5851// +------------+ +------------+ +-------------+ +------------+ +------------+
5852// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
5853// +------------+ +------------+ +-------------+ +------------+ +------------+
5854// | | | | |
5855// V | | | |
5856// +------------+ | | | |
5857// | XOR |<---------------------+ | |
5858// +------------+ | | |
5859// | | | |
5860// V V +---------------+ |
5861// +------------+ +------------+ | |
5862// | TRUNCATE | | TRUNCATE | | +-------------------------+
5863// +------------+ +------------+ | |
5864// | | | |
5865// V V | |
5866// +------------+ +------------+ | |
5867// | ZERO_EXT | | ZERO_EXT | | |
5868// +------------+ +------------+ | |
5869// | | | |
5870// V V | |
5871// +----------------+ | |
5872// | AND | | |
5873// +----------------+ | |
5874// | | |
5875// +---------------+ | |
5876// | | |
5877// V V V
5878// +-------------+
5879// | CMP |
5880// +-------------+
5883 const LoongArchSubtarget &Subtarget) {
5884 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
5885
5886 SDNode *AndNode = N->getOperand(0).getNode();
5887 if (AndNode->getOpcode() != ISD::AND)
5888 return SDValue();
5889
5890 SDValue AndInputValue2 = AndNode->getOperand(1);
5891 if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
5892 return SDValue();
5893
5894 SDValue CmpInputValue = N->getOperand(1);
5895 SDValue AndInputValue1 = AndNode->getOperand(0);
5896 if (AndInputValue1.getOpcode() == ISD::XOR) {
5897 if (CC != ISD::SETEQ && CC != ISD::SETNE)
5898 return SDValue();
5899 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
5900 if (!CN || CN->getSExtValue() != -1)
5901 return SDValue();
5902 CN = dyn_cast<ConstantSDNode>(CmpInputValue);
5903 if (!CN || CN->getSExtValue() != 0)
5904 return SDValue();
5905 AndInputValue1 = AndInputValue1.getOperand(0);
5906 if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
5907 return SDValue();
5908 } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
5909 if (AndInputValue2 != CmpInputValue)
5910 return SDValue();
5911 } else {
5912 return SDValue();
5913 }
5914
5915 SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
5916 if (TruncValue1.getOpcode() != ISD::TRUNCATE)
5917 return SDValue();
5918
5919 SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
5920 if (TruncValue2.getOpcode() != ISD::TRUNCATE)
5921 return SDValue();
5922
5923 SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
5924 SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
5925 ISD::LoadExtType ExtType1;
5926 ISD::LoadExtType ExtType2;
5927
5928 if (!checkValueWidth(TruncInputValue1, ExtType1) ||
5929 !checkValueWidth(TruncInputValue2, ExtType2))
5930 return SDValue();
5931
5932 if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
5933 AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
5934 return SDValue();
5935
5936 if ((ExtType2 != ISD::ZEXTLOAD) &&
5937 ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
5938 return SDValue();
5939
5940 // These truncation and zero-extension nodes are not necessary, remove them.
5941 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
5942 TruncInputValue1, TruncInputValue2);
5943 SDValue NewSetCC =
5944 DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
5945 DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
5946 return SDValue(N, 0);
5947}
5948
5949// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
5952 const LoongArchSubtarget &Subtarget) {
5953 if (DCI.isBeforeLegalizeOps())
5954 return SDValue();
5955
5956 SDValue Src = N->getOperand(0);
5957 if (Src.getOpcode() != LoongArchISD::REVB_2W)
5958 return SDValue();
5959
5960 return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
5961 Src.getOperand(0));
5962}
5963
5964// Perform common combines for BR_CC and SELECT_CC conditions.
5965static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
5966 SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
5967 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
5968
5969 // As far as arithmetic right shift always saves the sign,
5970 // shift can be omitted.
5971 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
5972 // setge (sra X, N), 0 -> setge X, 0
5973 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
5974 LHS.getOpcode() == ISD::SRA) {
5975 LHS = LHS.getOperand(0);
5976 return true;
5977 }
5978
5979 if (!ISD::isIntEqualitySetCC(CCVal))
5980 return false;
5981
5982 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
5983 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
5984 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
5985 LHS.getOperand(0).getValueType() == Subtarget.getGRLenVT()) {
5986 // If we're looking for eq 0 instead of ne 0, we need to invert the
5987 // condition.
5988 bool Invert = CCVal == ISD::SETEQ;
5989 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
5990 if (Invert)
5991 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
5992
5993 RHS = LHS.getOperand(1);
5994 LHS = LHS.getOperand(0);
5995 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
5996
5997 CC = DAG.getCondCode(CCVal);
5998 return true;
5999 }
6000
6001 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, GRLen-1-C), 0, ge/lt)
6002 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
6003 LHS.getOperand(1).getOpcode() == ISD::Constant) {
6004 SDValue LHS0 = LHS.getOperand(0);
6005 if (LHS0.getOpcode() == ISD::AND &&
6006 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
6007 uint64_t Mask = LHS0.getConstantOperandVal(1);
6008 uint64_t ShAmt = LHS.getConstantOperandVal(1);
6009 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
6010 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
6011 CC = DAG.getCondCode(CCVal);
6012
6013 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
6014 LHS = LHS0.getOperand(0);
6015 if (ShAmt != 0)
6016 LHS =
6017 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
6018 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
6019 return true;
6020 }
6021 }
6022 }
6023
6024 // (X, 1, setne) -> (X, 0, seteq) if we can prove X is 0/1.
6025 // This can occur when legalizing some floating point comparisons.
6026 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
6027 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
6028 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
6029 CC = DAG.getCondCode(CCVal);
6030 RHS = DAG.getConstant(0, DL, LHS.getValueType());
6031 return true;
6032 }
6033
6034 return false;
6035}
6036
6039 const LoongArchSubtarget &Subtarget) {
6040 SDValue LHS = N->getOperand(1);
6041 SDValue RHS = N->getOperand(2);
6042 SDValue CC = N->getOperand(3);
6043 SDLoc DL(N);
6044
6045 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6046 return DAG.getNode(LoongArchISD::BR_CC, DL, N->getValueType(0),
6047 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
6048
6049 return SDValue();
6050}
6051
6054 const LoongArchSubtarget &Subtarget) {
6055 // Transform
6056 SDValue LHS = N->getOperand(0);
6057 SDValue RHS = N->getOperand(1);
6058 SDValue CC = N->getOperand(2);
6059 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
6060 SDValue TrueV = N->getOperand(3);
6061 SDValue FalseV = N->getOperand(4);
6062 SDLoc DL(N);
6063 EVT VT = N->getValueType(0);
6064
6065 // If the True and False values are the same, we don't need a select_cc.
6066 if (TrueV == FalseV)
6067 return TrueV;
6068
6069 // (select (x < 0), y, z) -> x >> (GRLEN - 1) & (y - z) + z
6070 // (select (x >= 0), y, z) -> x >> (GRLEN - 1) & (z - y) + y
6071 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
6073 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
6074 if (CCVal == ISD::CondCode::SETGE)
6075 std::swap(TrueV, FalseV);
6076
6077 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
6078 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
6079 // Only handle simm12, if it is not in this range, it can be considered as
6080 // register.
6081 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
6082 isInt<12>(TrueSImm - FalseSImm)) {
6083 SDValue SRA =
6084 DAG.getNode(ISD::SRA, DL, VT, LHS,
6085 DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT));
6086 SDValue AND =
6087 DAG.getNode(ISD::AND, DL, VT, SRA,
6088 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
6089 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
6090 }
6091
6092 if (CCVal == ISD::CondCode::SETGE)
6093 std::swap(TrueV, FalseV);
6094 }
6095
6096 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
6097 return DAG.getNode(LoongArchISD::SELECT_CC, DL, N->getValueType(0),
6098 {LHS, RHS, CC, TrueV, FalseV});
6099
6100 return SDValue();
6101}
6102
6103template <unsigned N>
6105 SelectionDAG &DAG,
6106 const LoongArchSubtarget &Subtarget,
6107 bool IsSigned = false) {
6108 SDLoc DL(Node);
6109 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6110 // Check the ImmArg.
6111 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6112 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6113 DAG.getContext()->emitError(Node->getOperationName(0) +
6114 ": argument out of range.");
6115 return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
6116 }
6117 return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
6118}
6119
6120template <unsigned N>
6121static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
6122 SelectionDAG &DAG, bool IsSigned = false) {
6123 SDLoc DL(Node);
6124 EVT ResTy = Node->getValueType(0);
6125 auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
6126
6127 // Check the ImmArg.
6128 if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
6129 (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
6130 DAG.getContext()->emitError(Node->getOperationName(0) +
6131 ": argument out of range.");
6132 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6133 }
6134 return DAG.getConstant(
6136 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
6137 DL, ResTy);
6138}
6139
6141 SDLoc DL(Node);
6142 EVT ResTy = Node->getValueType(0);
6143 SDValue Vec = Node->getOperand(2);
6144 SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
6145 return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
6146}
6147
6149 SDLoc DL(Node);
6150 EVT ResTy = Node->getValueType(0);
6151 SDValue One = DAG.getConstant(1, DL, ResTy);
6152 SDValue Bit =
6153 DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
6154
6155 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
6156 DAG.getNOT(DL, Bit, ResTy));
6157}
6158
6159template <unsigned N>
6161 SDLoc DL(Node);
6162 EVT ResTy = Node->getValueType(0);
6163 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6164 // Check the unsigned ImmArg.
6165 if (!isUInt<N>(CImm->getZExtValue())) {
6166 DAG.getContext()->emitError(Node->getOperationName(0) +
6167 ": argument out of range.");
6168 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6169 }
6170
6171 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6172 SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
6173
6174 return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
6175}
6176
6177template <unsigned N>
6179 SDLoc DL(Node);
6180 EVT ResTy = Node->getValueType(0);
6181 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6182 // Check the unsigned ImmArg.
6183 if (!isUInt<N>(CImm->getZExtValue())) {
6184 DAG.getContext()->emitError(Node->getOperationName(0) +
6185 ": argument out of range.");
6186 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6187 }
6188
6189 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6190 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6191 return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
6192}
6193
6194template <unsigned N>
6196 SDLoc DL(Node);
6197 EVT ResTy = Node->getValueType(0);
6198 auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
6199 // Check the unsigned ImmArg.
6200 if (!isUInt<N>(CImm->getZExtValue())) {
6201 DAG.getContext()->emitError(Node->getOperationName(0) +
6202 ": argument out of range.");
6203 return DAG.getNode(ISD::UNDEF, DL, ResTy);
6204 }
6205
6206 APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
6207 SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
6208 return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
6209}
6210
6211template <unsigned W>
6213 unsigned ResOp) {
6214 unsigned Imm = N->getConstantOperandVal(2);
6215 if (!isUInt<W>(Imm)) {
6216 const StringRef ErrorMsg = "argument out of range";
6217 DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
6218 return DAG.getUNDEF(N->getValueType(0));
6219 }
6220 SDLoc DL(N);
6221 SDValue Vec = N->getOperand(1);
6222 SDValue Idx = DAG.getConstant(Imm, DL, MVT::i32);
6224 return DAG.getNode(ResOp, DL, N->getValueType(0), Vec, Idx, EltVT);
6225}
6226
6227static SDValue
6230 const LoongArchSubtarget &Subtarget) {
6231 SDLoc DL(N);
6232 switch (N->getConstantOperandVal(0)) {
6233 default:
6234 break;
6235 case Intrinsic::loongarch_lsx_vadd_b:
6236 case Intrinsic::loongarch_lsx_vadd_h:
6237 case Intrinsic::loongarch_lsx_vadd_w:
6238 case Intrinsic::loongarch_lsx_vadd_d:
6239 case Intrinsic::loongarch_lasx_xvadd_b:
6240 case Intrinsic::loongarch_lasx_xvadd_h:
6241 case Intrinsic::loongarch_lasx_xvadd_w:
6242 case Intrinsic::loongarch_lasx_xvadd_d:
6243 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6244 N->getOperand(2));
6245 case Intrinsic::loongarch_lsx_vaddi_bu:
6246 case Intrinsic::loongarch_lsx_vaddi_hu:
6247 case Intrinsic::loongarch_lsx_vaddi_wu:
6248 case Intrinsic::loongarch_lsx_vaddi_du:
6249 case Intrinsic::loongarch_lasx_xvaddi_bu:
6250 case Intrinsic::loongarch_lasx_xvaddi_hu:
6251 case Intrinsic::loongarch_lasx_xvaddi_wu:
6252 case Intrinsic::loongarch_lasx_xvaddi_du:
6253 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
6254 lowerVectorSplatImm<5>(N, 2, DAG));
6255 case Intrinsic::loongarch_lsx_vsub_b:
6256 case Intrinsic::loongarch_lsx_vsub_h:
6257 case Intrinsic::loongarch_lsx_vsub_w:
6258 case Intrinsic::loongarch_lsx_vsub_d:
6259 case Intrinsic::loongarch_lasx_xvsub_b:
6260 case Intrinsic::loongarch_lasx_xvsub_h:
6261 case Intrinsic::loongarch_lasx_xvsub_w:
6262 case Intrinsic::loongarch_lasx_xvsub_d:
6263 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6264 N->getOperand(2));
6265 case Intrinsic::loongarch_lsx_vsubi_bu:
6266 case Intrinsic::loongarch_lsx_vsubi_hu:
6267 case Intrinsic::loongarch_lsx_vsubi_wu:
6268 case Intrinsic::loongarch_lsx_vsubi_du:
6269 case Intrinsic::loongarch_lasx_xvsubi_bu:
6270 case Intrinsic::loongarch_lasx_xvsubi_hu:
6271 case Intrinsic::loongarch_lasx_xvsubi_wu:
6272 case Intrinsic::loongarch_lasx_xvsubi_du:
6273 return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
6274 lowerVectorSplatImm<5>(N, 2, DAG));
6275 case Intrinsic::loongarch_lsx_vneg_b:
6276 case Intrinsic::loongarch_lsx_vneg_h:
6277 case Intrinsic::loongarch_lsx_vneg_w:
6278 case Intrinsic::loongarch_lsx_vneg_d:
6279 case Intrinsic::loongarch_lasx_xvneg_b:
6280 case Intrinsic::loongarch_lasx_xvneg_h:
6281 case Intrinsic::loongarch_lasx_xvneg_w:
6282 case Intrinsic::loongarch_lasx_xvneg_d:
6283 return DAG.getNode(
6284 ISD::SUB, DL, N->getValueType(0),
6285 DAG.getConstant(
6286 APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
6287 /*isSigned=*/true),
6288 SDLoc(N), N->getValueType(0)),
6289 N->getOperand(1));
6290 case Intrinsic::loongarch_lsx_vmax_b:
6291 case Intrinsic::loongarch_lsx_vmax_h:
6292 case Intrinsic::loongarch_lsx_vmax_w:
6293 case Intrinsic::loongarch_lsx_vmax_d:
6294 case Intrinsic::loongarch_lasx_xvmax_b:
6295 case Intrinsic::loongarch_lasx_xvmax_h:
6296 case Intrinsic::loongarch_lasx_xvmax_w:
6297 case Intrinsic::loongarch_lasx_xvmax_d:
6298 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6299 N->getOperand(2));
6300 case Intrinsic::loongarch_lsx_vmax_bu:
6301 case Intrinsic::loongarch_lsx_vmax_hu:
6302 case Intrinsic::loongarch_lsx_vmax_wu:
6303 case Intrinsic::loongarch_lsx_vmax_du:
6304 case Intrinsic::loongarch_lasx_xvmax_bu:
6305 case Intrinsic::loongarch_lasx_xvmax_hu:
6306 case Intrinsic::loongarch_lasx_xvmax_wu:
6307 case Intrinsic::loongarch_lasx_xvmax_du:
6308 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6309 N->getOperand(2));
6310 case Intrinsic::loongarch_lsx_vmaxi_b:
6311 case Intrinsic::loongarch_lsx_vmaxi_h:
6312 case Intrinsic::loongarch_lsx_vmaxi_w:
6313 case Intrinsic::loongarch_lsx_vmaxi_d:
6314 case Intrinsic::loongarch_lasx_xvmaxi_b:
6315 case Intrinsic::loongarch_lasx_xvmaxi_h:
6316 case Intrinsic::loongarch_lasx_xvmaxi_w:
6317 case Intrinsic::loongarch_lasx_xvmaxi_d:
6318 return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
6319 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6320 case Intrinsic::loongarch_lsx_vmaxi_bu:
6321 case Intrinsic::loongarch_lsx_vmaxi_hu:
6322 case Intrinsic::loongarch_lsx_vmaxi_wu:
6323 case Intrinsic::loongarch_lsx_vmaxi_du:
6324 case Intrinsic::loongarch_lasx_xvmaxi_bu:
6325 case Intrinsic::loongarch_lasx_xvmaxi_hu:
6326 case Intrinsic::loongarch_lasx_xvmaxi_wu:
6327 case Intrinsic::loongarch_lasx_xvmaxi_du:
6328 return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
6329 lowerVectorSplatImm<5>(N, 2, DAG));
6330 case Intrinsic::loongarch_lsx_vmin_b:
6331 case Intrinsic::loongarch_lsx_vmin_h:
6332 case Intrinsic::loongarch_lsx_vmin_w:
6333 case Intrinsic::loongarch_lsx_vmin_d:
6334 case Intrinsic::loongarch_lasx_xvmin_b:
6335 case Intrinsic::loongarch_lasx_xvmin_h:
6336 case Intrinsic::loongarch_lasx_xvmin_w:
6337 case Intrinsic::loongarch_lasx_xvmin_d:
6338 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6339 N->getOperand(2));
6340 case Intrinsic::loongarch_lsx_vmin_bu:
6341 case Intrinsic::loongarch_lsx_vmin_hu:
6342 case Intrinsic::loongarch_lsx_vmin_wu:
6343 case Intrinsic::loongarch_lsx_vmin_du:
6344 case Intrinsic::loongarch_lasx_xvmin_bu:
6345 case Intrinsic::loongarch_lasx_xvmin_hu:
6346 case Intrinsic::loongarch_lasx_xvmin_wu:
6347 case Intrinsic::loongarch_lasx_xvmin_du:
6348 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6349 N->getOperand(2));
6350 case Intrinsic::loongarch_lsx_vmini_b:
6351 case Intrinsic::loongarch_lsx_vmini_h:
6352 case Intrinsic::loongarch_lsx_vmini_w:
6353 case Intrinsic::loongarch_lsx_vmini_d:
6354 case Intrinsic::loongarch_lasx_xvmini_b:
6355 case Intrinsic::loongarch_lasx_xvmini_h:
6356 case Intrinsic::loongarch_lasx_xvmini_w:
6357 case Intrinsic::loongarch_lasx_xvmini_d:
6358 return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
6359 lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
6360 case Intrinsic::loongarch_lsx_vmini_bu:
6361 case Intrinsic::loongarch_lsx_vmini_hu:
6362 case Intrinsic::loongarch_lsx_vmini_wu:
6363 case Intrinsic::loongarch_lsx_vmini_du:
6364 case Intrinsic::loongarch_lasx_xvmini_bu:
6365 case Intrinsic::loongarch_lasx_xvmini_hu:
6366 case Intrinsic::loongarch_lasx_xvmini_wu:
6367 case Intrinsic::loongarch_lasx_xvmini_du:
6368 return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
6369 lowerVectorSplatImm<5>(N, 2, DAG));
6370 case Intrinsic::loongarch_lsx_vmul_b:
6371 case Intrinsic::loongarch_lsx_vmul_h:
6372 case Intrinsic::loongarch_lsx_vmul_w:
6373 case Intrinsic::loongarch_lsx_vmul_d:
6374 case Intrinsic::loongarch_lasx_xvmul_b:
6375 case Intrinsic::loongarch_lasx_xvmul_h:
6376 case Intrinsic::loongarch_lasx_xvmul_w:
6377 case Intrinsic::loongarch_lasx_xvmul_d:
6378 return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
6379 N->getOperand(2));
6380 case Intrinsic::loongarch_lsx_vmadd_b:
6381 case Intrinsic::loongarch_lsx_vmadd_h:
6382 case Intrinsic::loongarch_lsx_vmadd_w:
6383 case Intrinsic::loongarch_lsx_vmadd_d:
6384 case Intrinsic::loongarch_lasx_xvmadd_b:
6385 case Intrinsic::loongarch_lasx_xvmadd_h:
6386 case Intrinsic::loongarch_lasx_xvmadd_w:
6387 case Intrinsic::loongarch_lasx_xvmadd_d: {
6388 EVT ResTy = N->getValueType(0);
6389 return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
6390 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6391 N->getOperand(3)));
6392 }
6393 case Intrinsic::loongarch_lsx_vmsub_b:
6394 case Intrinsic::loongarch_lsx_vmsub_h:
6395 case Intrinsic::loongarch_lsx_vmsub_w:
6396 case Intrinsic::loongarch_lsx_vmsub_d:
6397 case Intrinsic::loongarch_lasx_xvmsub_b:
6398 case Intrinsic::loongarch_lasx_xvmsub_h:
6399 case Intrinsic::loongarch_lasx_xvmsub_w:
6400 case Intrinsic::loongarch_lasx_xvmsub_d: {
6401 EVT ResTy = N->getValueType(0);
6402 return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
6403 DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
6404 N->getOperand(3)));
6405 }
6406 case Intrinsic::loongarch_lsx_vdiv_b:
6407 case Intrinsic::loongarch_lsx_vdiv_h:
6408 case Intrinsic::loongarch_lsx_vdiv_w:
6409 case Intrinsic::loongarch_lsx_vdiv_d:
6410 case Intrinsic::loongarch_lasx_xvdiv_b:
6411 case Intrinsic::loongarch_lasx_xvdiv_h:
6412 case Intrinsic::loongarch_lasx_xvdiv_w:
6413 case Intrinsic::loongarch_lasx_xvdiv_d:
6414 return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
6415 N->getOperand(2));
6416 case Intrinsic::loongarch_lsx_vdiv_bu:
6417 case Intrinsic::loongarch_lsx_vdiv_hu:
6418 case Intrinsic::loongarch_lsx_vdiv_wu:
6419 case Intrinsic::loongarch_lsx_vdiv_du:
6420 case Intrinsic::loongarch_lasx_xvdiv_bu:
6421 case Intrinsic::loongarch_lasx_xvdiv_hu:
6422 case Intrinsic::loongarch_lasx_xvdiv_wu:
6423 case Intrinsic::loongarch_lasx_xvdiv_du:
6424 return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
6425 N->getOperand(2));
6426 case Intrinsic::loongarch_lsx_vmod_b:
6427 case Intrinsic::loongarch_lsx_vmod_h:
6428 case Intrinsic::loongarch_lsx_vmod_w:
6429 case Intrinsic::loongarch_lsx_vmod_d:
6430 case Intrinsic::loongarch_lasx_xvmod_b:
6431 case Intrinsic::loongarch_lasx_xvmod_h:
6432 case Intrinsic::loongarch_lasx_xvmod_w:
6433 case Intrinsic::loongarch_lasx_xvmod_d:
6434 return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
6435 N->getOperand(2));
6436 case Intrinsic::loongarch_lsx_vmod_bu:
6437 case Intrinsic::loongarch_lsx_vmod_hu:
6438 case Intrinsic::loongarch_lsx_vmod_wu:
6439 case Intrinsic::loongarch_lsx_vmod_du:
6440 case Intrinsic::loongarch_lasx_xvmod_bu:
6441 case Intrinsic::loongarch_lasx_xvmod_hu:
6442 case Intrinsic::loongarch_lasx_xvmod_wu:
6443 case Intrinsic::loongarch_lasx_xvmod_du:
6444 return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
6445 N->getOperand(2));
6446 case Intrinsic::loongarch_lsx_vand_v:
6447 case Intrinsic::loongarch_lasx_xvand_v:
6448 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6449 N->getOperand(2));
6450 case Intrinsic::loongarch_lsx_vor_v:
6451 case Intrinsic::loongarch_lasx_xvor_v:
6452 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6453 N->getOperand(2));
6454 case Intrinsic::loongarch_lsx_vxor_v:
6455 case Intrinsic::loongarch_lasx_xvxor_v:
6456 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6457 N->getOperand(2));
6458 case Intrinsic::loongarch_lsx_vnor_v:
6459 case Intrinsic::loongarch_lasx_xvnor_v: {
6460 SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6461 N->getOperand(2));
6462 return DAG.getNOT(DL, Res, Res->getValueType(0));
6463 }
6464 case Intrinsic::loongarch_lsx_vandi_b:
6465 case Intrinsic::loongarch_lasx_xvandi_b:
6466 return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
6467 lowerVectorSplatImm<8>(N, 2, DAG));
6468 case Intrinsic::loongarch_lsx_vori_b:
6469 case Intrinsic::loongarch_lasx_xvori_b:
6470 return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
6471 lowerVectorSplatImm<8>(N, 2, DAG));
6472 case Intrinsic::loongarch_lsx_vxori_b:
6473 case Intrinsic::loongarch_lasx_xvxori_b:
6474 return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
6475 lowerVectorSplatImm<8>(N, 2, DAG));
6476 case Intrinsic::loongarch_lsx_vsll_b:
6477 case Intrinsic::loongarch_lsx_vsll_h:
6478 case Intrinsic::loongarch_lsx_vsll_w:
6479 case Intrinsic::loongarch_lsx_vsll_d:
6480 case Intrinsic::loongarch_lasx_xvsll_b:
6481 case Intrinsic::loongarch_lasx_xvsll_h:
6482 case Intrinsic::loongarch_lasx_xvsll_w:
6483 case Intrinsic::loongarch_lasx_xvsll_d:
6484 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6485 truncateVecElts(N, DAG));
6486 case Intrinsic::loongarch_lsx_vslli_b:
6487 case Intrinsic::loongarch_lasx_xvslli_b:
6488 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6489 lowerVectorSplatImm<3>(N, 2, DAG));
6490 case Intrinsic::loongarch_lsx_vslli_h:
6491 case Intrinsic::loongarch_lasx_xvslli_h:
6492 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6493 lowerVectorSplatImm<4>(N, 2, DAG));
6494 case Intrinsic::loongarch_lsx_vslli_w:
6495 case Intrinsic::loongarch_lasx_xvslli_w:
6496 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6497 lowerVectorSplatImm<5>(N, 2, DAG));
6498 case Intrinsic::loongarch_lsx_vslli_d:
6499 case Intrinsic::loongarch_lasx_xvslli_d:
6500 return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
6501 lowerVectorSplatImm<6>(N, 2, DAG));
6502 case Intrinsic::loongarch_lsx_vsrl_b:
6503 case Intrinsic::loongarch_lsx_vsrl_h:
6504 case Intrinsic::loongarch_lsx_vsrl_w:
6505 case Intrinsic::loongarch_lsx_vsrl_d:
6506 case Intrinsic::loongarch_lasx_xvsrl_b:
6507 case Intrinsic::loongarch_lasx_xvsrl_h:
6508 case Intrinsic::loongarch_lasx_xvsrl_w:
6509 case Intrinsic::loongarch_lasx_xvsrl_d:
6510 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6511 truncateVecElts(N, DAG));
6512 case Intrinsic::loongarch_lsx_vsrli_b:
6513 case Intrinsic::loongarch_lasx_xvsrli_b:
6514 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6515 lowerVectorSplatImm<3>(N, 2, DAG));
6516 case Intrinsic::loongarch_lsx_vsrli_h:
6517 case Intrinsic::loongarch_lasx_xvsrli_h:
6518 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6519 lowerVectorSplatImm<4>(N, 2, DAG));
6520 case Intrinsic::loongarch_lsx_vsrli_w:
6521 case Intrinsic::loongarch_lasx_xvsrli_w:
6522 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6523 lowerVectorSplatImm<5>(N, 2, DAG));
6524 case Intrinsic::loongarch_lsx_vsrli_d:
6525 case Intrinsic::loongarch_lasx_xvsrli_d:
6526 return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
6527 lowerVectorSplatImm<6>(N, 2, DAG));
6528 case Intrinsic::loongarch_lsx_vsra_b:
6529 case Intrinsic::loongarch_lsx_vsra_h:
6530 case Intrinsic::loongarch_lsx_vsra_w:
6531 case Intrinsic::loongarch_lsx_vsra_d:
6532 case Intrinsic::loongarch_lasx_xvsra_b:
6533 case Intrinsic::loongarch_lasx_xvsra_h:
6534 case Intrinsic::loongarch_lasx_xvsra_w:
6535 case Intrinsic::loongarch_lasx_xvsra_d:
6536 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6537 truncateVecElts(N, DAG));
6538 case Intrinsic::loongarch_lsx_vsrai_b:
6539 case Intrinsic::loongarch_lasx_xvsrai_b:
6540 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6541 lowerVectorSplatImm<3>(N, 2, DAG));
6542 case Intrinsic::loongarch_lsx_vsrai_h:
6543 case Intrinsic::loongarch_lasx_xvsrai_h:
6544 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6545 lowerVectorSplatImm<4>(N, 2, DAG));
6546 case Intrinsic::loongarch_lsx_vsrai_w:
6547 case Intrinsic::loongarch_lasx_xvsrai_w:
6548 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6549 lowerVectorSplatImm<5>(N, 2, DAG));
6550 case Intrinsic::loongarch_lsx_vsrai_d:
6551 case Intrinsic::loongarch_lasx_xvsrai_d:
6552 return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
6553 lowerVectorSplatImm<6>(N, 2, DAG));
6554 case Intrinsic::loongarch_lsx_vclz_b:
6555 case Intrinsic::loongarch_lsx_vclz_h:
6556 case Intrinsic::loongarch_lsx_vclz_w:
6557 case Intrinsic::loongarch_lsx_vclz_d:
6558 case Intrinsic::loongarch_lasx_xvclz_b:
6559 case Intrinsic::loongarch_lasx_xvclz_h:
6560 case Intrinsic::loongarch_lasx_xvclz_w:
6561 case Intrinsic::loongarch_lasx_xvclz_d:
6562 return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
6563 case Intrinsic::loongarch_lsx_vpcnt_b:
6564 case Intrinsic::loongarch_lsx_vpcnt_h:
6565 case Intrinsic::loongarch_lsx_vpcnt_w:
6566 case Intrinsic::loongarch_lsx_vpcnt_d:
6567 case Intrinsic::loongarch_lasx_xvpcnt_b:
6568 case Intrinsic::loongarch_lasx_xvpcnt_h:
6569 case Intrinsic::loongarch_lasx_xvpcnt_w:
6570 case Intrinsic::loongarch_lasx_xvpcnt_d:
6571 return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
6572 case Intrinsic::loongarch_lsx_vbitclr_b:
6573 case Intrinsic::loongarch_lsx_vbitclr_h:
6574 case Intrinsic::loongarch_lsx_vbitclr_w:
6575 case Intrinsic::loongarch_lsx_vbitclr_d:
6576 case Intrinsic::loongarch_lasx_xvbitclr_b:
6577 case Intrinsic::loongarch_lasx_xvbitclr_h:
6578 case Intrinsic::loongarch_lasx_xvbitclr_w:
6579 case Intrinsic::loongarch_lasx_xvbitclr_d:
6580 return lowerVectorBitClear(N, DAG);
6581 case Intrinsic::loongarch_lsx_vbitclri_b:
6582 case Intrinsic::loongarch_lasx_xvbitclri_b:
6583 return lowerVectorBitClearImm<3>(N, DAG);
6584 case Intrinsic::loongarch_lsx_vbitclri_h:
6585 case Intrinsic::loongarch_lasx_xvbitclri_h:
6586 return lowerVectorBitClearImm<4>(N, DAG);
6587 case Intrinsic::loongarch_lsx_vbitclri_w:
6588 case Intrinsic::loongarch_lasx_xvbitclri_w:
6589 return lowerVectorBitClearImm<5>(N, DAG);
6590 case Intrinsic::loongarch_lsx_vbitclri_d:
6591 case Intrinsic::loongarch_lasx_xvbitclri_d:
6592 return lowerVectorBitClearImm<6>(N, DAG);
6593 case Intrinsic::loongarch_lsx_vbitset_b:
6594 case Intrinsic::loongarch_lsx_vbitset_h:
6595 case Intrinsic::loongarch_lsx_vbitset_w:
6596 case Intrinsic::loongarch_lsx_vbitset_d:
6597 case Intrinsic::loongarch_lasx_xvbitset_b:
6598 case Intrinsic::loongarch_lasx_xvbitset_h:
6599 case Intrinsic::loongarch_lasx_xvbitset_w:
6600 case Intrinsic::loongarch_lasx_xvbitset_d: {
6601 EVT VecTy = N->getValueType(0);
6602 SDValue One = DAG.getConstant(1, DL, VecTy);
6603 return DAG.getNode(
6604 ISD::OR, DL, VecTy, N->getOperand(1),
6605 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6606 }
6607 case Intrinsic::loongarch_lsx_vbitseti_b:
6608 case Intrinsic::loongarch_lasx_xvbitseti_b:
6609 return lowerVectorBitSetImm<3>(N, DAG);
6610 case Intrinsic::loongarch_lsx_vbitseti_h:
6611 case Intrinsic::loongarch_lasx_xvbitseti_h:
6612 return lowerVectorBitSetImm<4>(N, DAG);
6613 case Intrinsic::loongarch_lsx_vbitseti_w:
6614 case Intrinsic::loongarch_lasx_xvbitseti_w:
6615 return lowerVectorBitSetImm<5>(N, DAG);
6616 case Intrinsic::loongarch_lsx_vbitseti_d:
6617 case Intrinsic::loongarch_lasx_xvbitseti_d:
6618 return lowerVectorBitSetImm<6>(N, DAG);
6619 case Intrinsic::loongarch_lsx_vbitrev_b:
6620 case Intrinsic::loongarch_lsx_vbitrev_h:
6621 case Intrinsic::loongarch_lsx_vbitrev_w:
6622 case Intrinsic::loongarch_lsx_vbitrev_d:
6623 case Intrinsic::loongarch_lasx_xvbitrev_b:
6624 case Intrinsic::loongarch_lasx_xvbitrev_h:
6625 case Intrinsic::loongarch_lasx_xvbitrev_w:
6626 case Intrinsic::loongarch_lasx_xvbitrev_d: {
6627 EVT VecTy = N->getValueType(0);
6628 SDValue One = DAG.getConstant(1, DL, VecTy);
6629 return DAG.getNode(
6630 ISD::XOR, DL, VecTy, N->getOperand(1),
6631 DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
6632 }
6633 case Intrinsic::loongarch_lsx_vbitrevi_b:
6634 case Intrinsic::loongarch_lasx_xvbitrevi_b:
6635 return lowerVectorBitRevImm<3>(N, DAG);
6636 case Intrinsic::loongarch_lsx_vbitrevi_h:
6637 case Intrinsic::loongarch_lasx_xvbitrevi_h:
6638 return lowerVectorBitRevImm<4>(N, DAG);
6639 case Intrinsic::loongarch_lsx_vbitrevi_w:
6640 case Intrinsic::loongarch_lasx_xvbitrevi_w:
6641 return lowerVectorBitRevImm<5>(N, DAG);
6642 case Intrinsic::loongarch_lsx_vbitrevi_d:
6643 case Intrinsic::loongarch_lasx_xvbitrevi_d:
6644 return lowerVectorBitRevImm<6>(N, DAG);
6645 case Intrinsic::loongarch_lsx_vfadd_s:
6646 case Intrinsic::loongarch_lsx_vfadd_d:
6647 case Intrinsic::loongarch_lasx_xvfadd_s:
6648 case Intrinsic::loongarch_lasx_xvfadd_d:
6649 return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
6650 N->getOperand(2));
6651 case Intrinsic::loongarch_lsx_vfsub_s:
6652 case Intrinsic::loongarch_lsx_vfsub_d:
6653 case Intrinsic::loongarch_lasx_xvfsub_s:
6654 case Intrinsic::loongarch_lasx_xvfsub_d:
6655 return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
6656 N->getOperand(2));
6657 case Intrinsic::loongarch_lsx_vfmul_s:
6658 case Intrinsic::loongarch_lsx_vfmul_d:
6659 case Intrinsic::loongarch_lasx_xvfmul_s:
6660 case Intrinsic::loongarch_lasx_xvfmul_d:
6661 return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
6662 N->getOperand(2));
6663 case Intrinsic::loongarch_lsx_vfdiv_s:
6664 case Intrinsic::loongarch_lsx_vfdiv_d:
6665 case Intrinsic::loongarch_lasx_xvfdiv_s:
6666 case Intrinsic::loongarch_lasx_xvfdiv_d:
6667 return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
6668 N->getOperand(2));
6669 case Intrinsic::loongarch_lsx_vfmadd_s:
6670 case Intrinsic::loongarch_lsx_vfmadd_d:
6671 case Intrinsic::loongarch_lasx_xvfmadd_s:
6672 case Intrinsic::loongarch_lasx_xvfmadd_d:
6673 return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
6674 N->getOperand(2), N->getOperand(3));
6675 case Intrinsic::loongarch_lsx_vinsgr2vr_b:
6676 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6677 N->getOperand(1), N->getOperand(2),
6678 legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
6679 case Intrinsic::loongarch_lsx_vinsgr2vr_h:
6680 case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
6681 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6682 N->getOperand(1), N->getOperand(2),
6683 legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
6684 case Intrinsic::loongarch_lsx_vinsgr2vr_w:
6685 case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
6686 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6687 N->getOperand(1), N->getOperand(2),
6688 legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
6689 case Intrinsic::loongarch_lsx_vinsgr2vr_d:
6690 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
6691 N->getOperand(1), N->getOperand(2),
6692 legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
6693 case Intrinsic::loongarch_lsx_vreplgr2vr_b:
6694 case Intrinsic::loongarch_lsx_vreplgr2vr_h:
6695 case Intrinsic::loongarch_lsx_vreplgr2vr_w:
6696 case Intrinsic::loongarch_lsx_vreplgr2vr_d:
6697 case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
6698 case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
6699 case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
6700 case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
6701 return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
6702 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6703 N->getOperand(1)));
6704 case Intrinsic::loongarch_lsx_vreplve_b:
6705 case Intrinsic::loongarch_lsx_vreplve_h:
6706 case Intrinsic::loongarch_lsx_vreplve_w:
6707 case Intrinsic::loongarch_lsx_vreplve_d:
6708 case Intrinsic::loongarch_lasx_xvreplve_b:
6709 case Intrinsic::loongarch_lasx_xvreplve_h:
6710 case Intrinsic::loongarch_lasx_xvreplve_w:
6711 case Intrinsic::loongarch_lasx_xvreplve_d:
6712 return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
6713 N->getOperand(1),
6714 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
6715 N->getOperand(2)));
6716 case Intrinsic::loongarch_lsx_vpickve2gr_b:
6717 if (!Subtarget.is64Bit())
6718 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6719 break;
6720 case Intrinsic::loongarch_lsx_vpickve2gr_h:
6721 case Intrinsic::loongarch_lasx_xvpickve2gr_w:
6722 if (!Subtarget.is64Bit())
6723 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6724 break;
6725 case Intrinsic::loongarch_lsx_vpickve2gr_w:
6726 if (!Subtarget.is64Bit())
6727 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_SEXT_ELT);
6728 break;
6729 case Intrinsic::loongarch_lsx_vpickve2gr_bu:
6730 if (!Subtarget.is64Bit())
6731 return lowerVectorPickVE2GR<4>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6732 break;
6733 case Intrinsic::loongarch_lsx_vpickve2gr_hu:
6734 case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
6735 if (!Subtarget.is64Bit())
6736 return lowerVectorPickVE2GR<3>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6737 break;
6738 case Intrinsic::loongarch_lsx_vpickve2gr_wu:
6739 if (!Subtarget.is64Bit())
6740 return lowerVectorPickVE2GR<2>(N, DAG, LoongArchISD::VPICK_ZEXT_ELT);
6741 break;
6742 case Intrinsic::loongarch_lsx_bz_b:
6743 case Intrinsic::loongarch_lsx_bz_h:
6744 case Intrinsic::loongarch_lsx_bz_w:
6745 case Intrinsic::loongarch_lsx_bz_d:
6746 case Intrinsic::loongarch_lasx_xbz_b:
6747 case Intrinsic::loongarch_lasx_xbz_h:
6748 case Intrinsic::loongarch_lasx_xbz_w:
6749 case Intrinsic::loongarch_lasx_xbz_d:
6750 if (!Subtarget.is64Bit())
6751 return DAG.getNode(LoongArchISD::VALL_ZERO, DL, N->getValueType(0),
6752 N->getOperand(1));
6753 break;
6754 case Intrinsic::loongarch_lsx_bz_v:
6755 case Intrinsic::loongarch_lasx_xbz_v:
6756 if (!Subtarget.is64Bit())
6757 return DAG.getNode(LoongArchISD::VANY_ZERO, DL, N->getValueType(0),
6758 N->getOperand(1));
6759 break;
6760 case Intrinsic::loongarch_lsx_bnz_b:
6761 case Intrinsic::loongarch_lsx_bnz_h:
6762 case Intrinsic::loongarch_lsx_bnz_w:
6763 case Intrinsic::loongarch_lsx_bnz_d:
6764 case Intrinsic::loongarch_lasx_xbnz_b:
6765 case Intrinsic::loongarch_lasx_xbnz_h:
6766 case Intrinsic::loongarch_lasx_xbnz_w:
6767 case Intrinsic::loongarch_lasx_xbnz_d:
6768 if (!Subtarget.is64Bit())
6769 return DAG.getNode(LoongArchISD::VALL_NONZERO, DL, N->getValueType(0),
6770 N->getOperand(1));
6771 break;
6772 case Intrinsic::loongarch_lsx_bnz_v:
6773 case Intrinsic::loongarch_lasx_xbnz_v:
6774 if (!Subtarget.is64Bit())
6775 return DAG.getNode(LoongArchISD::VANY_NONZERO, DL, N->getValueType(0),
6776 N->getOperand(1));
6777 break;
6778 case Intrinsic::loongarch_lasx_concat_128_s:
6779 case Intrinsic::loongarch_lasx_concat_128_d:
6780 case Intrinsic::loongarch_lasx_concat_128:
6781 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
6782 N->getOperand(1), N->getOperand(2));
6783 }
6784 return SDValue();
6785}
6786
6789 const LoongArchSubtarget &Subtarget) {
6790 // If the input to MOVGR2FR_W_LA64 is just MOVFR2GR_S_LA64 the the
6791 // conversion is unnecessary and can be replaced with the
6792 // MOVFR2GR_S_LA64 operand.
6793 SDValue Op0 = N->getOperand(0);
6794 if (Op0.getOpcode() == LoongArchISD::MOVFR2GR_S_LA64)
6795 return Op0.getOperand(0);
6796 return SDValue();
6797}
6798
6801 const LoongArchSubtarget &Subtarget) {
6802 // If the input to MOVFR2GR_S_LA64 is just MOVGR2FR_W_LA64 then the
6803 // conversion is unnecessary and can be replaced with the MOVGR2FR_W_LA64
6804 // operand.
6805 SDValue Op0 = N->getOperand(0);
6806 if (Op0->getOpcode() == LoongArchISD::MOVGR2FR_W_LA64) {
6807 assert(Op0.getOperand(0).getValueType() == N->getSimpleValueType(0) &&
6808 "Unexpected value type!");
6809 return Op0.getOperand(0);
6810 }
6811 return SDValue();
6812}
6813
6816 const LoongArchSubtarget &Subtarget) {
6817 MVT VT = N->getSimpleValueType(0);
6818 unsigned NumBits = VT.getScalarSizeInBits();
6819
6820 // Simplify the inputs.
6821 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6822 APInt DemandedMask(APInt::getAllOnes(NumBits));
6823 if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
6824 return SDValue(N, 0);
6825
6826 return SDValue();
6827}
6828
6829static SDValue
6832 const LoongArchSubtarget &Subtarget) {
6833 SDValue Op0 = N->getOperand(0);
6834 SDLoc DL(N);
6835
6836 // If the input to SplitPairF64 is just BuildPairF64 then the operation is
6837 // redundant. Instead, use BuildPairF64's operands directly.
6838 if (Op0->getOpcode() == LoongArchISD::BUILD_PAIR_F64)
6839 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
6840
6841 if (Op0->isUndef()) {
6842 SDValue Lo = DAG.getUNDEF(MVT::i32);
6843 SDValue Hi = DAG.getUNDEF(MVT::i32);
6844 return DCI.CombineTo(N, Lo, Hi);
6845 }
6846
6847 // It's cheaper to materialise two 32-bit integers than to load a double
6848 // from the constant pool and transfer it to integer registers through the
6849 // stack.
6851 APInt V = C->getValueAPF().bitcastToAPInt();
6852 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
6853 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
6854 return DCI.CombineTo(N, Lo, Hi);
6855 }
6856
6857 return SDValue();
6858}
6859
6860static SDValue
6863 const LoongArchSubtarget &Subtarget) {
6864 if (!DCI.isBeforeLegalize())
6865 return SDValue();
6866
6867 MVT EltVT = N->getSimpleValueType(0);
6868 SDValue Vec = N->getOperand(0);
6869 EVT VecTy = Vec->getValueType(0);
6870 SDValue Idx = N->getOperand(1);
6871 unsigned IdxOp = Idx.getOpcode();
6872 SDLoc DL(N);
6873
6874 if (!VecTy.is256BitVector() || isa<ConstantSDNode>(Idx))
6875 return SDValue();
6876
6877 // Combine:
6878 // t2 = truncate t1
6879 // t3 = {zero/sign/any}_extend t2
6880 // t4 = extract_vector_elt t0, t3
6881 // to:
6882 // t4 = extract_vector_elt t0, t1
6883 if (IdxOp == ISD::ZERO_EXTEND || IdxOp == ISD::SIGN_EXTEND ||
6884 IdxOp == ISD::ANY_EXTEND) {
6885 SDValue IdxOrig = Idx.getOperand(0);
6886 if (!(IdxOrig.getOpcode() == ISD::TRUNCATE))
6887 return SDValue();
6888
6889 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
6890 IdxOrig.getOperand(0));
6891 }
6892
6893 return SDValue();
6894}
6895
6896/// Do target-specific dag combines on LoongArchISD::VANDN nodes.
6899 const LoongArchSubtarget &Subtarget) {
6900 SDValue N0 = N->getOperand(0);
6901 SDValue N1 = N->getOperand(1);
6902 MVT VT = N->getSimpleValueType(0);
6903 SDLoc DL(N);
6904
6905 // VANDN(undef, x) -> 0
6906 // VANDN(x, undef) -> 0
6907 if (N0.isUndef() || N1.isUndef())
6908 return DAG.getConstant(0, DL, VT);
6909
6910 // VANDN(0, x) -> x
6912 return N1;
6913
6914 // VANDN(x, 0) -> 0
6916 return DAG.getConstant(0, DL, VT);
6917
6918 // VANDN(x, -1) -> NOT(x) -> XOR(x, -1)
6920 return DAG.getNOT(DL, N0, VT);
6921
6922 // Turn VANDN back to AND if input is inverted.
6923 if (SDValue Not = isNOT(N0, DAG))
6924 return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
6925
6926 // Folds for better commutativity:
6927 if (N1->hasOneUse()) {
6928 // VANDN(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
6929 if (SDValue Not = isNOT(N1, DAG))
6930 return DAG.getNOT(
6931 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
6932
6933 // VANDN(x, SplatVector(Imm)) -> AND(NOT(x), NOT(SplatVector(~Imm)))
6934 // -> NOT(OR(x, SplatVector(-Imm))
6935 // Combination is performed only when VT is v16i8/v32i8, using `vnori.b` to
6936 // gain benefits.
6937 if (!DCI.isBeforeLegalizeOps() && (VT == MVT::v16i8 || VT == MVT::v32i8) &&
6938 N1.getOpcode() == ISD::BUILD_VECTOR) {
6939 if (SDValue SplatValue =
6940 cast<BuildVectorSDNode>(N1.getNode())->getSplatValue()) {
6941 if (!N1->isOnlyUserOf(SplatValue.getNode()))
6942 return SDValue();
6943
6944 if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
6945 uint8_t NCVal = static_cast<uint8_t>(~(C->getSExtValue()));
6946 SDValue Not =
6947 DAG.getSplat(VT, DL, DAG.getTargetConstant(NCVal, DL, MVT::i8));
6948 return DAG.getNOT(
6949 DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)),
6950 VT);
6951 }
6952 }
6953 }
6954 }
6955
6956 return SDValue();
6957}
6958
6960 DAGCombinerInfo &DCI) const {
6961 SelectionDAG &DAG = DCI.DAG;
6962 switch (N->getOpcode()) {
6963 default:
6964 break;
6965 case ISD::AND:
6966 return performANDCombine(N, DAG, DCI, Subtarget);
6967 case ISD::OR:
6968 return performORCombine(N, DAG, DCI, Subtarget);
6969 case ISD::SETCC:
6970 return performSETCCCombine(N, DAG, DCI, Subtarget);
6971 case ISD::SRL:
6972 return performSRLCombine(N, DAG, DCI, Subtarget);
6973 case ISD::BITCAST:
6974 return performBITCASTCombine(N, DAG, DCI, Subtarget);
6975 case LoongArchISD::BITREV_W:
6976 return performBITREV_WCombine(N, DAG, DCI, Subtarget);
6977 case LoongArchISD::BR_CC:
6978 return performBR_CCCombine(N, DAG, DCI, Subtarget);
6979 case LoongArchISD::SELECT_CC:
6980 return performSELECT_CCCombine(N, DAG, DCI, Subtarget);
6982 return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
6983 case LoongArchISD::MOVGR2FR_W_LA64:
6984 return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
6985 case LoongArchISD::MOVFR2GR_S_LA64:
6986 return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
6987 case LoongArchISD::VMSKLTZ:
6988 case LoongArchISD::XVMSKLTZ:
6989 return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
6990 case LoongArchISD::SPLIT_PAIR_F64:
6991 return performSPLIT_PAIR_F64Combine(N, DAG, DCI, Subtarget);
6993 return performEXTRACT_VECTOR_ELTCombine(N, DAG, DCI, Subtarget);
6994 case LoongArchISD::VANDN:
6995 return performVANDNCombine(N, DAG, DCI, Subtarget);
6996 }
6997 return SDValue();
6998}
6999
7002 if (!ZeroDivCheck)
7003 return MBB;
7004
7005 // Build instructions:
7006 // MBB:
7007 // div(or mod) $dst, $dividend, $divisor
7008 // bne $divisor, $zero, SinkMBB
7009 // BreakMBB:
7010 // break 7 // BRK_DIVZERO
7011 // SinkMBB:
7012 // fallthrough
7013 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
7014 MachineFunction::iterator It = ++MBB->getIterator();
7015 MachineFunction *MF = MBB->getParent();
7016 auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7017 auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
7018 MF->insert(It, BreakMBB);
7019 MF->insert(It, SinkMBB);
7020
7021 // Transfer the remainder of MBB and its successor edges to SinkMBB.
7022 SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
7023 SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
7024
7025 const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
7026 DebugLoc DL = MI.getDebugLoc();
7027 MachineOperand &Divisor = MI.getOperand(2);
7028 Register DivisorReg = Divisor.getReg();
7029
7030 // MBB:
7031 BuildMI(MBB, DL, TII.get(LoongArch::BNE))
7032 .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
7033 .addReg(LoongArch::R0)
7034 .addMBB(SinkMBB);
7035 MBB->addSuccessor(BreakMBB);
7036 MBB->addSuccessor(SinkMBB);
7037
7038 // BreakMBB:
7039 // See linux header file arch/loongarch/include/uapi/asm/break.h for the
7040 // definition of BRK_DIVZERO.
7041 BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
7042 BreakMBB->addSuccessor(SinkMBB);
7043
7044 // Clear Divisor's kill flag.
7045 Divisor.setIsKill(false);
7046
7047 return SinkMBB;
7048}
7049
7050static MachineBasicBlock *
7052 const LoongArchSubtarget &Subtarget) {
7053 unsigned CondOpc;
7054 switch (MI.getOpcode()) {
7055 default:
7056 llvm_unreachable("Unexpected opcode");
7057 case LoongArch::PseudoVBZ:
7058 CondOpc = LoongArch::VSETEQZ_V;
7059 break;
7060 case LoongArch::PseudoVBZ_B:
7061 CondOpc = LoongArch::VSETANYEQZ_B;
7062 break;
7063 case LoongArch::PseudoVBZ_H:
7064 CondOpc = LoongArch::VSETANYEQZ_H;
7065 break;
7066 case LoongArch::PseudoVBZ_W:
7067 CondOpc = LoongArch::VSETANYEQZ_W;
7068 break;
7069 case LoongArch::PseudoVBZ_D:
7070 CondOpc = LoongArch::VSETANYEQZ_D;
7071 break;
7072 case LoongArch::PseudoVBNZ:
7073 CondOpc = LoongArch::VSETNEZ_V;
7074 break;
7075 case LoongArch::PseudoVBNZ_B:
7076 CondOpc = LoongArch::VSETALLNEZ_B;
7077 break;
7078 case LoongArch::PseudoVBNZ_H:
7079 CondOpc = LoongArch::VSETALLNEZ_H;
7080 break;
7081 case LoongArch::PseudoVBNZ_W:
7082 CondOpc = LoongArch::VSETALLNEZ_W;
7083 break;
7084 case LoongArch::PseudoVBNZ_D:
7085 CondOpc = LoongArch::VSETALLNEZ_D;
7086 break;
7087 case LoongArch::PseudoXVBZ:
7088 CondOpc = LoongArch::XVSETEQZ_V;
7089 break;
7090 case LoongArch::PseudoXVBZ_B:
7091 CondOpc = LoongArch::XVSETANYEQZ_B;
7092 break;
7093 case LoongArch::PseudoXVBZ_H:
7094 CondOpc = LoongArch::XVSETANYEQZ_H;
7095 break;
7096 case LoongArch::PseudoXVBZ_W:
7097 CondOpc = LoongArch::XVSETANYEQZ_W;
7098 break;
7099 case LoongArch::PseudoXVBZ_D:
7100 CondOpc = LoongArch::XVSETANYEQZ_D;
7101 break;
7102 case LoongArch::PseudoXVBNZ:
7103 CondOpc = LoongArch::XVSETNEZ_V;
7104 break;
7105 case LoongArch::PseudoXVBNZ_B:
7106 CondOpc = LoongArch::XVSETALLNEZ_B;
7107 break;
7108 case LoongArch::PseudoXVBNZ_H:
7109 CondOpc = LoongArch::XVSETALLNEZ_H;
7110 break;
7111 case LoongArch::PseudoXVBNZ_W:
7112 CondOpc = LoongArch::XVSETALLNEZ_W;
7113 break;
7114 case LoongArch::PseudoXVBNZ_D:
7115 CondOpc = LoongArch::XVSETALLNEZ_D;
7116 break;
7117 }
7118
7119 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7120 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7121 DebugLoc DL = MI.getDebugLoc();
7124
7125 MachineFunction *F = BB->getParent();
7126 MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
7127 MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
7128 MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
7129
7130 F->insert(It, FalseBB);
7131 F->insert(It, TrueBB);
7132 F->insert(It, SinkBB);
7133
7134 // Transfer the remainder of MBB and its successor edges to Sink.
7135 SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
7137
7138 // Insert the real instruction to BB.
7139 Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
7140 BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
7141
7142 // Insert branch.
7143 BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
7144 BB->addSuccessor(FalseBB);
7145 BB->addSuccessor(TrueBB);
7146
7147 // FalseBB.
7148 Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7149 BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
7150 .addReg(LoongArch::R0)
7151 .addImm(0);
7152 BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
7153 FalseBB->addSuccessor(SinkBB);
7154
7155 // TrueBB.
7156 Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7157 BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
7158 .addReg(LoongArch::R0)
7159 .addImm(1);
7160 TrueBB->addSuccessor(SinkBB);
7161
7162 // SinkBB: merge the results.
7163 BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
7164 MI.getOperand(0).getReg())
7165 .addReg(RD1)
7166 .addMBB(FalseBB)
7167 .addReg(RD2)
7168 .addMBB(TrueBB);
7169
7170 // The pseudo instruction is gone now.
7171 MI.eraseFromParent();
7172 return SinkBB;
7173}
7174
7175static MachineBasicBlock *
7177 const LoongArchSubtarget &Subtarget) {
7178 unsigned InsOp;
7179 unsigned BroadcastOp;
7180 unsigned HalfSize;
7181 switch (MI.getOpcode()) {
7182 default:
7183 llvm_unreachable("Unexpected opcode");
7184 case LoongArch::PseudoXVINSGR2VR_B:
7185 HalfSize = 16;
7186 BroadcastOp = LoongArch::XVREPLGR2VR_B;
7187 InsOp = LoongArch::XVEXTRINS_B;
7188 break;
7189 case LoongArch::PseudoXVINSGR2VR_H:
7190 HalfSize = 8;
7191 BroadcastOp = LoongArch::XVREPLGR2VR_H;
7192 InsOp = LoongArch::XVEXTRINS_H;
7193 break;
7194 }
7195 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7196 const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
7197 const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
7198 DebugLoc DL = MI.getDebugLoc();
7200 // XDst = vector_insert XSrc, Elt, Idx
7201 Register XDst = MI.getOperand(0).getReg();
7202 Register XSrc = MI.getOperand(1).getReg();
7203 Register Elt = MI.getOperand(2).getReg();
7204 unsigned Idx = MI.getOperand(3).getImm();
7205
7206 if (XSrc.isVirtual() && MRI.getVRegDef(XSrc)->isImplicitDef() &&
7207 Idx < HalfSize) {
7208 Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
7209 Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
7210
7211 BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
7212 .addReg(XSrc, 0, LoongArch::sub_128);
7213 BuildMI(*BB, MI, DL,
7214 TII->get(HalfSize == 8 ? LoongArch::VINSGR2VR_H
7215 : LoongArch::VINSGR2VR_B),
7216 ScratchSubReg2)
7217 .addReg(ScratchSubReg1)
7218 .addReg(Elt)
7219 .addImm(Idx);
7220
7221 BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), XDst)
7222 .addImm(0)
7223 .addReg(ScratchSubReg2)
7224 .addImm(LoongArch::sub_128);
7225 } else {
7226 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7227 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7228
7229 BuildMI(*BB, MI, DL, TII->get(BroadcastOp), ScratchReg1).addReg(Elt);
7230
7231 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg2)
7232 .addReg(ScratchReg1)
7233 .addReg(XSrc)
7234 .addImm(Idx >= HalfSize ? 48 : 18);
7235
7236 BuildMI(*BB, MI, DL, TII->get(InsOp), XDst)
7237 .addReg(XSrc)
7238 .addReg(ScratchReg2)
7239 .addImm((Idx >= HalfSize ? Idx - HalfSize : Idx) * 17);
7240 }
7241
7242 MI.eraseFromParent();
7243 return BB;
7244}
7245
7248 const LoongArchSubtarget &Subtarget) {
7249 assert(Subtarget.hasExtLSX());
7250 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7251 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7252 DebugLoc DL = MI.getDebugLoc();
7254 Register Dst = MI.getOperand(0).getReg();
7255 Register Src = MI.getOperand(1).getReg();
7256 Register ScratchReg1 = MRI.createVirtualRegister(RC);
7257 Register ScratchReg2 = MRI.createVirtualRegister(RC);
7258 Register ScratchReg3 = MRI.createVirtualRegister(RC);
7259
7260 BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
7261 BuildMI(*BB, MI, DL,
7262 TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
7263 : LoongArch::VINSGR2VR_W),
7264 ScratchReg2)
7265 .addReg(ScratchReg1)
7266 .addReg(Src)
7267 .addImm(0);
7268 BuildMI(
7269 *BB, MI, DL,
7270 TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
7271 ScratchReg3)
7272 .addReg(ScratchReg2);
7273 BuildMI(*BB, MI, DL,
7274 TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
7275 : LoongArch::VPICKVE2GR_W),
7276 Dst)
7277 .addReg(ScratchReg3)
7278 .addImm(0);
7279
7280 MI.eraseFromParent();
7281 return BB;
7282}
7283
7284static MachineBasicBlock *
7286 const LoongArchSubtarget &Subtarget) {
7287 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7288 const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
7289 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
7291 Register Dst = MI.getOperand(0).getReg();
7292 Register Src = MI.getOperand(1).getReg();
7293 DebugLoc DL = MI.getDebugLoc();
7294 unsigned EleBits = 8;
7295 unsigned NotOpc = 0;
7296 unsigned MskOpc;
7297
7298 switch (MI.getOpcode()) {
7299 default:
7300 llvm_unreachable("Unexpected opcode");
7301 case LoongArch::PseudoVMSKLTZ_B:
7302 MskOpc = LoongArch::VMSKLTZ_B;
7303 break;
7304 case LoongArch::PseudoVMSKLTZ_H:
7305 MskOpc = LoongArch::VMSKLTZ_H;
7306 EleBits = 16;
7307 break;
7308 case LoongArch::PseudoVMSKLTZ_W:
7309 MskOpc = LoongArch::VMSKLTZ_W;
7310 EleBits = 32;
7311 break;
7312 case LoongArch::PseudoVMSKLTZ_D:
7313 MskOpc = LoongArch::VMSKLTZ_D;
7314 EleBits = 64;
7315 break;
7316 case LoongArch::PseudoVMSKGEZ_B:
7317 MskOpc = LoongArch::VMSKGEZ_B;
7318 break;
7319 case LoongArch::PseudoVMSKEQZ_B:
7320 MskOpc = LoongArch::VMSKNZ_B;
7321 NotOpc = LoongArch::VNOR_V;
7322 break;
7323 case LoongArch::PseudoVMSKNEZ_B:
7324 MskOpc = LoongArch::VMSKNZ_B;
7325 break;
7326 case LoongArch::PseudoXVMSKLTZ_B:
7327 MskOpc = LoongArch::XVMSKLTZ_B;
7328 RC = &LoongArch::LASX256RegClass;
7329 break;
7330 case LoongArch::PseudoXVMSKLTZ_H:
7331 MskOpc = LoongArch::XVMSKLTZ_H;
7332 RC = &LoongArch::LASX256RegClass;
7333 EleBits = 16;
7334 break;
7335 case LoongArch::PseudoXVMSKLTZ_W:
7336 MskOpc = LoongArch::XVMSKLTZ_W;
7337 RC = &LoongArch::LASX256RegClass;
7338 EleBits = 32;
7339 break;
7340 case LoongArch::PseudoXVMSKLTZ_D:
7341 MskOpc = LoongArch::XVMSKLTZ_D;
7342 RC = &LoongArch::LASX256RegClass;
7343 EleBits = 64;
7344 break;
7345 case LoongArch::PseudoXVMSKGEZ_B:
7346 MskOpc = LoongArch::XVMSKGEZ_B;
7347 RC = &LoongArch::LASX256RegClass;
7348 break;
7349 case LoongArch::PseudoXVMSKEQZ_B:
7350 MskOpc = LoongArch::XVMSKNZ_B;
7351 NotOpc = LoongArch::XVNOR_V;
7352 RC = &LoongArch::LASX256RegClass;
7353 break;
7354 case LoongArch::PseudoXVMSKNEZ_B:
7355 MskOpc = LoongArch::XVMSKNZ_B;
7356 RC = &LoongArch::LASX256RegClass;
7357 break;
7358 }
7359
7360 Register Msk = MRI.createVirtualRegister(RC);
7361 if (NotOpc) {
7362 Register Tmp = MRI.createVirtualRegister(RC);
7363 BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
7364 BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
7365 .addReg(Tmp, RegState::Kill)
7366 .addReg(Tmp, RegState::Kill);
7367 } else {
7368 BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
7369 }
7370
7371 if (TRI->getRegSizeInBits(*RC) > 128) {
7372 Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7373 Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
7374 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
7375 .addReg(Msk)
7376 .addImm(0);
7377 BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
7378 .addReg(Msk, RegState::Kill)
7379 .addImm(4);
7380 BuildMI(*BB, MI, DL,
7381 TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
7382 : LoongArch::BSTRINS_W),
7383 Dst)
7386 .addImm(256 / EleBits - 1)
7387 .addImm(128 / EleBits);
7388 } else {
7389 BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
7390 .addReg(Msk, RegState::Kill)
7391 .addImm(0);
7392 }
7393
7394 MI.eraseFromParent();
7395 return BB;
7396}
7397
7398static MachineBasicBlock *
7400 const LoongArchSubtarget &Subtarget) {
7401 assert(MI.getOpcode() == LoongArch::SplitPairF64Pseudo &&
7402 "Unexpected instruction");
7403
7404 MachineFunction &MF = *BB->getParent();
7405 DebugLoc DL = MI.getDebugLoc();
7407 Register LoReg = MI.getOperand(0).getReg();
7408 Register HiReg = MI.getOperand(1).getReg();
7409 Register SrcReg = MI.getOperand(2).getReg();
7410
7411 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFR2GR_S_64), LoReg).addReg(SrcReg);
7412 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVFRH2GR_S), HiReg)
7413 .addReg(SrcReg, getKillRegState(MI.getOperand(2).isKill()));
7414 MI.eraseFromParent(); // The pseudo instruction is gone now.
7415 return BB;
7416}
7417
7418static MachineBasicBlock *
7420 const LoongArchSubtarget &Subtarget) {
7421 assert(MI.getOpcode() == LoongArch::BuildPairF64Pseudo &&
7422 "Unexpected instruction");
7423
7424 MachineFunction &MF = *BB->getParent();
7425 DebugLoc DL = MI.getDebugLoc();
7428 Register TmpReg = MRI.createVirtualRegister(&LoongArch::FPR64RegClass);
7429 Register DstReg = MI.getOperand(0).getReg();
7430 Register LoReg = MI.getOperand(1).getReg();
7431 Register HiReg = MI.getOperand(2).getReg();
7432
7433 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FR_W_64), TmpReg)
7434 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()));
7435 BuildMI(*BB, MI, DL, TII.get(LoongArch::MOVGR2FRH_W), DstReg)
7436 .addReg(TmpReg, RegState::Kill)
7437 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()));
7438 MI.eraseFromParent(); // The pseudo instruction is gone now.
7439 return BB;
7440}
7441
7443 switch (MI.getOpcode()) {
7444 default:
7445 return false;
7446 case LoongArch::Select_GPR_Using_CC_GPR:
7447 return true;
7448 }
7449}
7450
7451static MachineBasicBlock *
7453 const LoongArchSubtarget &Subtarget) {
7454 // To "insert" Select_* instructions, we actually have to insert the triangle
7455 // control-flow pattern. The incoming instructions know the destination vreg
7456 // to set, the condition code register to branch on, the true/false values to
7457 // select between, and the condcode to use to select the appropriate branch.
7458 //
7459 // We produce the following control flow:
7460 // HeadMBB
7461 // | \
7462 // | IfFalseMBB
7463 // | /
7464 // TailMBB
7465 //
7466 // When we find a sequence of selects we attempt to optimize their emission
7467 // by sharing the control flow. Currently we only handle cases where we have
7468 // multiple selects with the exact same condition (same LHS, RHS and CC).
7469 // The selects may be interleaved with other instructions if the other
7470 // instructions meet some requirements we deem safe:
7471 // - They are not pseudo instructions.
7472 // - They are debug instructions. Otherwise,
7473 // - They do not have side-effects, do not access memory and their inputs do
7474 // not depend on the results of the select pseudo-instructions.
7475 // The TrueV/FalseV operands of the selects cannot depend on the result of
7476 // previous selects in the sequence.
7477 // These conditions could be further relaxed. See the X86 target for a
7478 // related approach and more information.
7479
7480 Register LHS = MI.getOperand(1).getReg();
7481 Register RHS;
7482 if (MI.getOperand(2).isReg())
7483 RHS = MI.getOperand(2).getReg();
7484 auto CC = static_cast<unsigned>(MI.getOperand(3).getImm());
7485
7486 SmallVector<MachineInstr *, 4> SelectDebugValues;
7487 SmallSet<Register, 4> SelectDests;
7488 SelectDests.insert(MI.getOperand(0).getReg());
7489
7490 MachineInstr *LastSelectPseudo = &MI;
7491 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
7492 SequenceMBBI != E; ++SequenceMBBI) {
7493 if (SequenceMBBI->isDebugInstr())
7494 continue;
7495 if (isSelectPseudo(*SequenceMBBI)) {
7496 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
7497 !SequenceMBBI->getOperand(2).isReg() ||
7498 SequenceMBBI->getOperand(2).getReg() != RHS ||
7499 SequenceMBBI->getOperand(3).getImm() != CC ||
7500 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
7501 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
7502 break;
7503 LastSelectPseudo = &*SequenceMBBI;
7504 SequenceMBBI->collectDebugValues(SelectDebugValues);
7505 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
7506 continue;
7507 }
7508 if (SequenceMBBI->hasUnmodeledSideEffects() ||
7509 SequenceMBBI->mayLoadOrStore() ||
7510 SequenceMBBI->usesCustomInsertionHook())
7511 break;
7512 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
7513 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
7514 }))
7515 break;
7516 }
7517
7518 const LoongArchInstrInfo &TII = *Subtarget.getInstrInfo();
7519 const BasicBlock *LLVM_BB = BB->getBasicBlock();
7520 DebugLoc DL = MI.getDebugLoc();
7522
7523 MachineBasicBlock *HeadMBB = BB;
7524 MachineFunction *F = BB->getParent();
7525 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
7526 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
7527
7528 F->insert(I, IfFalseMBB);
7529 F->insert(I, TailMBB);
7530
7531 // Set the call frame size on entry to the new basic blocks.
7532 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
7533 IfFalseMBB->setCallFrameSize(CallFrameSize);
7534 TailMBB->setCallFrameSize(CallFrameSize);
7535
7536 // Transfer debug instructions associated with the selects to TailMBB.
7537 for (MachineInstr *DebugInstr : SelectDebugValues) {
7538 TailMBB->push_back(DebugInstr->removeFromParent());
7539 }
7540
7541 // Move all instructions after the sequence to TailMBB.
7542 TailMBB->splice(TailMBB->end(), HeadMBB,
7543 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
7544 // Update machine-CFG edges by transferring all successors of the current
7545 // block to the new block which will contain the Phi nodes for the selects.
7546 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
7547 // Set the successors for HeadMBB.
7548 HeadMBB->addSuccessor(IfFalseMBB);
7549 HeadMBB->addSuccessor(TailMBB);
7550
7551 // Insert appropriate branch.
7552 if (MI.getOperand(2).isImm())
7553 BuildMI(HeadMBB, DL, TII.get(CC))
7554 .addReg(LHS)
7555 .addImm(MI.getOperand(2).getImm())
7556 .addMBB(TailMBB);
7557 else
7558 BuildMI(HeadMBB, DL, TII.get(CC)).addReg(LHS).addReg(RHS).addMBB(TailMBB);
7559
7560 // IfFalseMBB just falls through to TailMBB.
7561 IfFalseMBB->addSuccessor(TailMBB);
7562
7563 // Create PHIs for all of the select pseudo-instructions.
7564 auto SelectMBBI = MI.getIterator();
7565 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
7566 auto InsertionPoint = TailMBB->begin();
7567 while (SelectMBBI != SelectEnd) {
7568 auto Next = std::next(SelectMBBI);
7569 if (isSelectPseudo(*SelectMBBI)) {
7570 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
7571 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
7572 TII.get(LoongArch::PHI), SelectMBBI->getOperand(0).getReg())
7573 .addReg(SelectMBBI->getOperand(4).getReg())
7574 .addMBB(HeadMBB)
7575 .addReg(SelectMBBI->getOperand(5).getReg())
7576 .addMBB(IfFalseMBB);
7577 SelectMBBI->eraseFromParent();
7578 }
7579 SelectMBBI = Next;
7580 }
7581
7582 F->getProperties().resetNoPHIs();
7583 return TailMBB;
7584}
7585
7586MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
7587 MachineInstr &MI, MachineBasicBlock *BB) const {
7588 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7589 DebugLoc DL = MI.getDebugLoc();
7590
7591 switch (MI.getOpcode()) {
7592 default:
7593 llvm_unreachable("Unexpected instr type to insert");
7594 case LoongArch::DIV_W:
7595 case LoongArch::DIV_WU:
7596 case LoongArch::MOD_W:
7597 case LoongArch::MOD_WU:
7598 case LoongArch::DIV_D:
7599 case LoongArch::DIV_DU:
7600 case LoongArch::MOD_D:
7601 case LoongArch::MOD_DU:
7602 return insertDivByZeroTrap(MI, BB);
7603 break;
7604 case LoongArch::WRFCSR: {
7605 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
7606 LoongArch::FCSR0 + MI.getOperand(0).getImm())
7607 .addReg(MI.getOperand(1).getReg());
7608 MI.eraseFromParent();
7609 return BB;
7610 }
7611 case LoongArch::RDFCSR: {
7612 MachineInstr *ReadFCSR =
7613 BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
7614 MI.getOperand(0).getReg())
7615 .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
7616 ReadFCSR->getOperand(1).setIsUndef();
7617 MI.eraseFromParent();
7618 return BB;
7619 }
7620 case LoongArch::Select_GPR_Using_CC_GPR:
7621 return emitSelectPseudo(MI, BB, Subtarget);
7622 case LoongArch::BuildPairF64Pseudo:
7623 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
7624 case LoongArch::SplitPairF64Pseudo:
7625 return emitSplitPairF64Pseudo(MI, BB, Subtarget);
7626 case LoongArch::PseudoVBZ:
7627 case LoongArch::PseudoVBZ_B:
7628 case LoongArch::PseudoVBZ_H:
7629 case LoongArch::PseudoVBZ_W:
7630 case LoongArch::PseudoVBZ_D:
7631 case LoongArch::PseudoVBNZ:
7632 case LoongArch::PseudoVBNZ_B:
7633 case LoongArch::PseudoVBNZ_H:
7634 case LoongArch::PseudoVBNZ_W:
7635 case LoongArch::PseudoVBNZ_D:
7636 case LoongArch::PseudoXVBZ:
7637 case LoongArch::PseudoXVBZ_B:
7638 case LoongArch::PseudoXVBZ_H:
7639 case LoongArch::PseudoXVBZ_W:
7640 case LoongArch::PseudoXVBZ_D:
7641 case LoongArch::PseudoXVBNZ:
7642 case LoongArch::PseudoXVBNZ_B:
7643 case LoongArch::PseudoXVBNZ_H:
7644 case LoongArch::PseudoXVBNZ_W:
7645 case LoongArch::PseudoXVBNZ_D:
7646 return emitVecCondBranchPseudo(MI, BB, Subtarget);
7647 case LoongArch::PseudoXVINSGR2VR_B:
7648 case LoongArch::PseudoXVINSGR2VR_H:
7649 return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
7650 case LoongArch::PseudoCTPOP:
7651 return emitPseudoCTPOP(MI, BB, Subtarget);
7652 case LoongArch::PseudoVMSKLTZ_B:
7653 case LoongArch::PseudoVMSKLTZ_H:
7654 case LoongArch::PseudoVMSKLTZ_W:
7655 case LoongArch::PseudoVMSKLTZ_D:
7656 case LoongArch::PseudoVMSKGEZ_B:
7657 case LoongArch::PseudoVMSKEQZ_B:
7658 case LoongArch::PseudoVMSKNEZ_B:
7659 case LoongArch::PseudoXVMSKLTZ_B:
7660 case LoongArch::PseudoXVMSKLTZ_H:
7661 case LoongArch::PseudoXVMSKLTZ_W:
7662 case LoongArch::PseudoXVMSKLTZ_D:
7663 case LoongArch::PseudoXVMSKGEZ_B:
7664 case LoongArch::PseudoXVMSKEQZ_B:
7665 case LoongArch::PseudoXVMSKNEZ_B:
7666 return emitPseudoVMSKCOND(MI, BB, Subtarget);
7667 case TargetOpcode::STATEPOINT:
7668 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
7669 // while bl call instruction (where statepoint will be lowered at the
7670 // end) has implicit def. This def is early-clobber as it will be set at
7671 // the moment of the call and earlier than any use is read.
7672 // Add this implicit dead def here as a workaround.
7673 MI.addOperand(*MI.getMF(),
7675 LoongArch::R1, /*isDef*/ true,
7676 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
7677 /*isUndef*/ false, /*isEarlyClobber*/ true));
7678 if (!Subtarget.is64Bit())
7679 report_fatal_error("STATEPOINT is only supported on 64-bit targets");
7680 return emitPatchPoint(MI, BB);
7681 }
7682}
7683
7685 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
7686 unsigned *Fast) const {
7687 if (!Subtarget.hasUAL())
7688 return false;
7689
7690 // TODO: set reasonable speed number.
7691 if (Fast)
7692 *Fast = 1;
7693 return true;
7694}
7695
7696//===----------------------------------------------------------------------===//
7697// Calling Convention Implementation
7698//===----------------------------------------------------------------------===//
7699
7700// Eight general-purpose registers a0-a7 used for passing integer arguments,
7701// with a0-a1 reused to return values. Generally, the GPRs are used to pass
7702// fixed-point arguments, and floating-point arguments when no FPR is available
7703// or with soft float ABI.
7704const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
7705 LoongArch::R7, LoongArch::R8, LoongArch::R9,
7706 LoongArch::R10, LoongArch::R11};
7707// Eight floating-point registers fa0-fa7 used for passing floating-point
7708// arguments, and fa0-fa1 are also used to return values.
7709const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
7710 LoongArch::F3, LoongArch::F4, LoongArch::F5,
7711 LoongArch::F6, LoongArch::F7};
7712// FPR32 and FPR64 alias each other.
7714 LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
7715 LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
7716
7717const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
7718 LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
7719 LoongArch::VR6, LoongArch::VR7};
7720
7721const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
7722 LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
7723 LoongArch::XR6, LoongArch::XR7};
7724
7725// Pass a 2*GRLen argument that has been split into two GRLen values through
7726// registers or the stack as necessary.
7727static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
7728 CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
7729 unsigned ValNo2, MVT ValVT2, MVT LocVT2,
7730 ISD::ArgFlagsTy ArgFlags2) {
7731 unsigned GRLenInBytes = GRLen / 8;
7732 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7733 // At least one half can be passed via register.
7734 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
7735 VA1.getLocVT(), CCValAssign::Full));
7736 } else {
7737 // Both halves must be passed on the stack, with proper alignment.
7738 Align StackAlign =
7739 std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
7740 State.addLoc(
7742 State.AllocateStack(GRLenInBytes, StackAlign),
7743 VA1.getLocVT(), CCValAssign::Full));
7744 State.addLoc(CCValAssign::getMem(
7745 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7746 LocVT2, CCValAssign::Full));
7747 return false;
7748 }
7749 if (Register Reg = State.AllocateReg(ArgGPRs)) {
7750 // The second half can also be passed via register.
7751 State.addLoc(
7752 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
7753 } else {
7754 // The second half is passed via the stack, without additional alignment.
7755 State.addLoc(CCValAssign::getMem(
7756 ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
7757 LocVT2, CCValAssign::Full));
7758 }
7759 return false;
7760}
7761
7762// Implements the LoongArch calling convention. Returns true upon failure.
7764 unsigned ValNo, MVT ValVT,
7765 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
7766 CCState &State, bool IsRet, Type *OrigTy) {
7767 unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
7768 assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
7769 MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
7770 MVT LocVT = ValVT;
7771
7772 // Any return value split into more than two values can't be returned
7773 // directly.
7774 if (IsRet && ValNo > 1)
7775 return true;
7776
7777 // If passing a variadic argument, or if no FPR is available.
7778 bool UseGPRForFloat = true;
7779
7780 switch (ABI) {
7781 default:
7782 llvm_unreachable("Unexpected ABI");
7783 break;
7788 UseGPRForFloat = ArgFlags.isVarArg();
7789 break;
7792 break;
7793 }
7794
7795 // If this is a variadic argument, the LoongArch calling convention requires
7796 // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
7797 // byte alignment. An aligned register should be used regardless of whether
7798 // the original argument was split during legalisation or not. The argument
7799 // will not be passed by registers if the original type is larger than
7800 // 2*GRLen, so the register alignment rule does not apply.
7801 unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
7802 if (ArgFlags.isVarArg() &&
7803 ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
7804 DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
7805 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
7806 // Skip 'odd' register if necessary.
7807 if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
7808 State.AllocateReg(ArgGPRs);
7809 }
7810
7811 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
7812 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
7813 State.getPendingArgFlags();
7814
7815 assert(PendingLocs.size() == PendingArgFlags.size() &&
7816 "PendingLocs and PendingArgFlags out of sync");
7817
7818 // FPR32 and FPR64 alias each other.
7819 if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
7820 UseGPRForFloat = true;
7821
7822 if (UseGPRForFloat && ValVT == MVT::f32) {
7823 LocVT = GRLenVT;
7824 LocInfo = CCValAssign::BCvt;
7825 } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
7826 LocVT = MVT::i64;
7827 LocInfo = CCValAssign::BCvt;
7828 } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
7829 // Handle passing f64 on LA32D with a soft float ABI or when floating point
7830 // registers are exhausted.
7831 assert(PendingLocs.empty() && "Can't lower f64 if it is split");
7832 // Depending on available argument GPRS, f64 may be passed in a pair of
7833 // GPRs, split between a GPR and the stack, or passed completely on the
7834 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
7835 // cases.
7836 MCRegister Reg = State.AllocateReg(ArgGPRs);
7837 if (!Reg) {
7838 int64_t StackOffset = State.AllocateStack(8, Align(8));
7839 State.addLoc(
7840 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7841 return false;
7842 }
7843 LocVT = MVT::i32;
7844 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7845 MCRegister HiReg = State.AllocateReg(ArgGPRs);
7846 if (HiReg) {
7847 State.addLoc(
7848 CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
7849 } else {
7850 int64_t StackOffset = State.AllocateStack(4, Align(4));
7851 State.addLoc(
7852 CCValAssign::getCustomMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7853 }
7854 return false;
7855 }
7856
7857 // Split arguments might be passed indirectly, so keep track of the pending
7858 // values.
7859 if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
7860 LocVT = GRLenVT;
7861 LocInfo = CCValAssign::Indirect;
7862 PendingLocs.push_back(
7863 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
7864 PendingArgFlags.push_back(ArgFlags);
7865 if (!ArgFlags.isSplitEnd()) {
7866 return false;
7867 }
7868 }
7869
7870 // If the split argument only had two elements, it should be passed directly
7871 // in registers or on the stack.
7872 if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
7873 PendingLocs.size() <= 2) {
7874 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
7875 // Apply the normal calling convention rules to the first half of the
7876 // split argument.
7877 CCValAssign VA = PendingLocs[0];
7878 ISD::ArgFlagsTy AF = PendingArgFlags[0];
7879 PendingLocs.clear();
7880 PendingArgFlags.clear();
7881 return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
7882 ArgFlags);
7883 }
7884
7885 // Allocate to a register if possible, or else a stack slot.
7886 Register Reg;
7887 unsigned StoreSizeBytes = GRLen / 8;
7888 Align StackAlign = Align(GRLen / 8);
7889
7890 if (ValVT == MVT::f32 && !UseGPRForFloat) {
7891 Reg = State.AllocateReg(ArgFPR32s);
7892 } else if (ValVT == MVT::f64 && !UseGPRForFloat) {
7893 Reg = State.AllocateReg(ArgFPR64s);
7894 } else if (ValVT.is128BitVector()) {
7895 Reg = State.AllocateReg(ArgVRs);
7896 UseGPRForFloat = false;
7897 StoreSizeBytes = 16;
7898 StackAlign = Align(16);
7899 } else if (ValVT.is256BitVector()) {
7900 Reg = State.AllocateReg(ArgXRs);
7901 UseGPRForFloat = false;
7902 StoreSizeBytes = 32;
7903 StackAlign = Align(32);
7904 } else {
7905 Reg = State.AllocateReg(ArgGPRs);
7906 }
7907
7908 unsigned StackOffset =
7909 Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
7910
7911 // If we reach this point and PendingLocs is non-empty, we must be at the
7912 // end of a split argument that must be passed indirectly.
7913 if (!PendingLocs.empty()) {
7914 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
7915 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
7916 for (auto &It : PendingLocs) {
7917 if (Reg)
7918 It.convertToReg(Reg);
7919 else
7920 It.convertToMem(StackOffset);
7921 State.addLoc(It);
7922 }
7923 PendingLocs.clear();
7924 PendingArgFlags.clear();
7925 return false;
7926 }
7927 assert((!UseGPRForFloat || LocVT == GRLenVT) &&
7928 "Expected an GRLenVT at this stage");
7929
7930 if (Reg) {
7931 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
7932 return false;
7933 }
7934
7935 // When a floating-point value is passed on the stack, no bit-cast is needed.
7936 if (ValVT.isFloatingPoint()) {
7937 LocVT = ValVT;
7938 LocInfo = CCValAssign::Full;
7939 }
7940
7941 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
7942 return false;
7943}
7944
7945void LoongArchTargetLowering::analyzeInputArgs(
7946 MachineFunction &MF, CCState &CCInfo,
7947 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
7948 LoongArchCCAssignFn Fn) const {
7949 FunctionType *FType = MF.getFunction().getFunctionType();
7950 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
7951 MVT ArgVT = Ins[i].VT;
7952 Type *ArgTy = nullptr;
7953 if (IsRet)
7954 ArgTy = FType->getReturnType();
7955 else if (Ins[i].isOrigArg())
7956 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
7958 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7959 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
7960 CCInfo, IsRet, ArgTy)) {
7961 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
7962 << '\n');
7963 llvm_unreachable("");
7964 }
7965 }
7966}
7967
7968void LoongArchTargetLowering::analyzeOutputArgs(
7969 MachineFunction &MF, CCState &CCInfo,
7970 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
7971 CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
7972 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
7973 MVT ArgVT = Outs[i].VT;
7974 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
7976 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
7977 if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
7978 CCInfo, IsRet, OrigTy)) {
7979 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
7980 << "\n");
7981 llvm_unreachable("");
7982 }
7983 }
7984}
7985
7986// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
7987// values.
7989 const CCValAssign &VA, const SDLoc &DL) {
7990 switch (VA.getLocInfo()) {
7991 default:
7992 llvm_unreachable("Unexpected CCValAssign::LocInfo");
7993 case CCValAssign::Full:
7995 break;
7996 case CCValAssign::BCvt:
7997 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
7998 Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
7999 else
8000 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
8001 break;
8002 }
8003 return Val;
8004}
8005
8007 const CCValAssign &VA, const SDLoc &DL,
8008 const ISD::InputArg &In,
8009 const LoongArchTargetLowering &TLI) {
8012 EVT LocVT = VA.getLocVT();
8013 SDValue Val;
8014 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
8015 Register VReg = RegInfo.createVirtualRegister(RC);
8016 RegInfo.addLiveIn(VA.getLocReg(), VReg);
8017 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
8018
8019 // If input is sign extended from 32 bits, note it for the OptW pass.
8020 if (In.isOrigArg()) {
8021 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
8022 if (OrigArg->getType()->isIntegerTy()) {
8023 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
8024 // An input zero extended from i31 can also be considered sign extended.
8025 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
8026 (BitWidth < 32 && In.Flags.isZExt())) {
8029 LAFI->addSExt32Register(VReg);
8030 }
8031 }
8032 }
8033
8034 return convertLocVTToValVT(DAG, Val, VA, DL);
8035}
8036
8037// The caller is responsible for loading the full value if the argument is
8038// passed with CCValAssign::Indirect.
8040 const CCValAssign &VA, const SDLoc &DL) {
8042 MachineFrameInfo &MFI = MF.getFrameInfo();
8043 EVT ValVT = VA.getValVT();
8044 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
8045 /*IsImmutable=*/true);
8046 SDValue FIN = DAG.getFrameIndex(
8048
8049 ISD::LoadExtType ExtType;
8050 switch (VA.getLocInfo()) {
8051 default:
8052 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8053 case CCValAssign::Full:
8055 case CCValAssign::BCvt:
8056 ExtType = ISD::NON_EXTLOAD;
8057 break;
8058 }
8059 return DAG.getExtLoad(
8060 ExtType, DL, VA.getLocVT(), Chain, FIN,
8062}
8063
8065 const CCValAssign &VA,
8066 const CCValAssign &HiVA,
8067 const SDLoc &DL) {
8068 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
8069 "Unexpected VA");
8071 MachineFrameInfo &MFI = MF.getFrameInfo();
8073
8074 assert(VA.isRegLoc() && "Expected register VA assignment");
8075
8076 Register LoVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8077 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
8078 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
8079 SDValue Hi;
8080 if (HiVA.isMemLoc()) {
8081 // Second half of f64 is passed on the stack.
8082 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
8083 /*IsImmutable=*/true);
8084 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
8085 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
8087 } else {
8088 // Second half of f64 is passed in another GPR.
8089 Register HiVReg = RegInfo.createVirtualRegister(&LoongArch::GPRRegClass);
8090 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
8091 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
8092 }
8093 return DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64, Lo, Hi);
8094}
8095
8097 const CCValAssign &VA, const SDLoc &DL) {
8098 EVT LocVT = VA.getLocVT();
8099
8100 switch (VA.getLocInfo()) {
8101 default:
8102 llvm_unreachable("Unexpected CCValAssign::LocInfo");
8103 case CCValAssign::Full:
8104 break;
8105 case CCValAssign::BCvt:
8106 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
8107 Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
8108 else
8109 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
8110 break;
8111 }
8112 return Val;
8113}
8114
8115static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
8116 CCValAssign::LocInfo LocInfo,
8117 ISD::ArgFlagsTy ArgFlags, Type *OrigTy,
8118 CCState &State) {
8119 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
8120 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
8121 // s0 s1 s2 s3 s4 s5 s6 s7 s8
8122 static const MCPhysReg GPRList[] = {
8123 LoongArch::R23, LoongArch::R24, LoongArch::R25,
8124 LoongArch::R26, LoongArch::R27, LoongArch::R28,
8125 LoongArch::R29, LoongArch::R30, LoongArch::R31};
8126 if (MCRegister Reg = State.AllocateReg(GPRList)) {
8127 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8128 return false;
8129 }
8130 }
8131
8132 if (LocVT == MVT::f32) {
8133 // Pass in STG registers: F1, F2, F3, F4
8134 // fs0,fs1,fs2,fs3
8135 static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
8136 LoongArch::F26, LoongArch::F27};
8137 if (MCRegister Reg = State.AllocateReg(FPR32List)) {
8138 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8139 return false;
8140 }
8141 }
8142
8143 if (LocVT == MVT::f64) {
8144 // Pass in STG registers: D1, D2, D3, D4
8145 // fs4,fs5,fs6,fs7
8146 static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
8147 LoongArch::F30_64, LoongArch::F31_64};
8148 if (MCRegister Reg = State.AllocateReg(FPR64List)) {
8149 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
8150 return false;
8151 }
8152 }
8153
8154 report_fatal_error("No registers left in GHC calling convention");
8155 return true;
8156}
8157
8158// Transform physical registers into virtual registers.
8160 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8161 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
8162 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
8163
8165
8166 switch (CallConv) {
8167 default:
8168 llvm_unreachable("Unsupported calling convention");
8169 case CallingConv::C:
8170 case CallingConv::Fast:
8172 break;
8173 case CallingConv::GHC:
8174 if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
8175 !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
8177 "GHC calling convention requires the F and D extensions");
8178 }
8179
8180 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8181 MVT GRLenVT = Subtarget.getGRLenVT();
8182 unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
8183 // Used with varargs to acumulate store chains.
8184 std::vector<SDValue> OutChains;
8185
8186 // Assign locations to all of the incoming arguments.
8188 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8189
8190 if (CallConv == CallingConv::GHC)
8192 else
8193 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
8194
8195 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
8196 CCValAssign &VA = ArgLocs[i];
8197 SDValue ArgValue;
8198 // Passing f64 on LA32D with a soft float ABI must be handled as a special
8199 // case.
8200 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8201 assert(VA.needsCustom());
8202 ArgValue = unpackF64OnLA32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
8203 } else if (VA.isRegLoc())
8204 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
8205 else
8206 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
8207 if (VA.getLocInfo() == CCValAssign::Indirect) {
8208 // If the original argument was split and passed by reference, we need to
8209 // load all parts of it here (using the same address).
8210 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
8212 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
8213 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
8214 assert(ArgPartOffset == 0);
8215 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
8216 CCValAssign &PartVA = ArgLocs[i + 1];
8217 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
8218 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8219 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
8220 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
8222 ++i;
8223 ++InsIdx;
8224 }
8225 continue;
8226 }
8227 InVals.push_back(ArgValue);
8228 }
8229
8230 if (IsVarArg) {
8232 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
8233 const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
8234 MachineFrameInfo &MFI = MF.getFrameInfo();
8235 MachineRegisterInfo &RegInfo = MF.getRegInfo();
8236 auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
8237
8238 // Offset of the first variable argument from stack pointer, and size of
8239 // the vararg save area. For now, the varargs save area is either zero or
8240 // large enough to hold a0-a7.
8241 int VaArgOffset, VarArgsSaveSize;
8242
8243 // If all registers are allocated, then all varargs must be passed on the
8244 // stack and we don't need to save any argregs.
8245 if (ArgRegs.size() == Idx) {
8246 VaArgOffset = CCInfo.getStackSize();
8247 VarArgsSaveSize = 0;
8248 } else {
8249 VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
8250 VaArgOffset = -VarArgsSaveSize;
8251 }
8252
8253 // Record the frame index of the first variable argument
8254 // which is a value necessary to VASTART.
8255 int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8256 LoongArchFI->setVarArgsFrameIndex(FI);
8257
8258 // If saving an odd number of registers then create an extra stack slot to
8259 // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
8260 // offsets to even-numbered registered remain 2*GRLen-aligned.
8261 if (Idx % 2) {
8262 MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
8263 true);
8264 VarArgsSaveSize += GRLenInBytes;
8265 }
8266
8267 // Copy the integer registers that may have been used for passing varargs
8268 // to the vararg save area.
8269 for (unsigned I = Idx; I < ArgRegs.size();
8270 ++I, VaArgOffset += GRLenInBytes) {
8271 const Register Reg = RegInfo.createVirtualRegister(RC);
8272 RegInfo.addLiveIn(ArgRegs[I], Reg);
8273 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
8274 FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
8275 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8276 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
8278 cast<StoreSDNode>(Store.getNode())
8279 ->getMemOperand()
8280 ->setValue((Value *)nullptr);
8281 OutChains.push_back(Store);
8282 }
8283 LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
8284 }
8285
8286 // All stores are grouped in one node to allow the matching between
8287 // the size of Ins and InVals. This only happens for vararg functions.
8288 if (!OutChains.empty()) {
8289 OutChains.push_back(Chain);
8290 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
8291 }
8292
8293 return Chain;
8294}
8295
8297 return CI->isTailCall();
8298}
8299
8300// Check if the return value is used as only a return value, as otherwise
8301// we can't perform a tail-call.
8303 SDValue &Chain) const {
8304 if (N->getNumValues() != 1)
8305 return false;
8306 if (!N->hasNUsesOfValue(1, 0))
8307 return false;
8308
8309 SDNode *Copy = *N->user_begin();
8310 if (Copy->getOpcode() != ISD::CopyToReg)
8311 return false;
8312
8313 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
8314 // isn't safe to perform a tail call.
8315 if (Copy->getGluedNode())
8316 return false;
8317
8318 // The copy must be used by a LoongArchISD::RET, and nothing else.
8319 bool HasRet = false;
8320 for (SDNode *Node : Copy->users()) {
8321 if (Node->getOpcode() != LoongArchISD::RET)
8322 return false;
8323 HasRet = true;
8324 }
8325
8326 if (!HasRet)
8327 return false;
8328
8329 Chain = Copy->getOperand(0);
8330 return true;
8331}
8332
8333// Check whether the call is eligible for tail call optimization.
8334bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
8335 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
8336 const SmallVectorImpl<CCValAssign> &ArgLocs) const {
8337
8338 auto CalleeCC = CLI.CallConv;
8339 auto &Outs = CLI.Outs;
8340 auto &Caller = MF.getFunction();
8341 auto CallerCC = Caller.getCallingConv();
8342
8343 // Do not tail call opt if the stack is used to pass parameters.
8344 if (CCInfo.getStackSize() != 0)
8345 return false;
8346
8347 // Do not tail call opt if any parameters need to be passed indirectly.
8348 for (auto &VA : ArgLocs)
8349 if (VA.getLocInfo() == CCValAssign::Indirect)
8350 return false;
8351
8352 // Do not tail call opt if either caller or callee uses struct return
8353 // semantics.
8354 auto IsCallerStructRet = Caller.hasStructRetAttr();
8355 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
8356 if (IsCallerStructRet || IsCalleeStructRet)
8357 return false;
8358
8359 // Do not tail call opt if either the callee or caller has a byval argument.
8360 for (auto &Arg : Outs)
8361 if (Arg.Flags.isByVal())
8362 return false;
8363
8364 // The callee has to preserve all registers the caller needs to preserve.
8365 const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
8366 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
8367 if (CalleeCC != CallerCC) {
8368 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
8369 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
8370 return false;
8371 }
8372 return true;
8373}
8374
8376 return DAG.getDataLayout().getPrefTypeAlign(
8377 VT.getTypeForEVT(*DAG.getContext()));
8378}
8379
8380// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
8381// and output parameter nodes.
8382SDValue
8384 SmallVectorImpl<SDValue> &InVals) const {
8385 SelectionDAG &DAG = CLI.DAG;
8386 SDLoc &DL = CLI.DL;
8388 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
8390 SDValue Chain = CLI.Chain;
8391 SDValue Callee = CLI.Callee;
8392 CallingConv::ID CallConv = CLI.CallConv;
8393 bool IsVarArg = CLI.IsVarArg;
8394 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8395 MVT GRLenVT = Subtarget.getGRLenVT();
8396 bool &IsTailCall = CLI.IsTailCall;
8397
8399
8400 // Analyze the operands of the call, assigning locations to each operand.
8402 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
8403
8404 if (CallConv == CallingConv::GHC)
8405 ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
8406 else
8407 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
8408
8409 // Check if it's really possible to do a tail call.
8410 if (IsTailCall)
8411 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
8412
8413 if (IsTailCall)
8414 ++NumTailCalls;
8415 else if (CLI.CB && CLI.CB->isMustTailCall())
8416 report_fatal_error("failed to perform tail call elimination on a call "
8417 "site marked musttail");
8418
8419 // Get a count of how many bytes are to be pushed on the stack.
8420 unsigned NumBytes = ArgCCInfo.getStackSize();
8421
8422 // Create local copies for byval args.
8423 SmallVector<SDValue> ByValArgs;
8424 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8425 ISD::ArgFlagsTy Flags = Outs[i].Flags;
8426 if (!Flags.isByVal())
8427 continue;
8428
8429 SDValue Arg = OutVals[i];
8430 unsigned Size = Flags.getByValSize();
8431 Align Alignment = Flags.getNonZeroByValAlign();
8432
8433 int FI =
8434 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
8435 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
8436 SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
8437
8438 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
8439 /*IsVolatile=*/false,
8440 /*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
8442 ByValArgs.push_back(FIPtr);
8443 }
8444
8445 if (!IsTailCall)
8446 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
8447
8448 // Copy argument values to their designated locations.
8450 SmallVector<SDValue> MemOpChains;
8451 SDValue StackPtr;
8452 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
8453 ++i, ++OutIdx) {
8454 CCValAssign &VA = ArgLocs[i];
8455 SDValue ArgValue = OutVals[OutIdx];
8456 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
8457
8458 // Handle passing f64 on LA32D with a soft float ABI as a special case.
8459 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8460 assert(VA.isRegLoc() && "Expected register VA assignment");
8461 assert(VA.needsCustom());
8462 SDValue SplitF64 =
8463 DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8464 DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
8465 SDValue Lo = SplitF64.getValue(0);
8466 SDValue Hi = SplitF64.getValue(1);
8467
8468 Register RegLo = VA.getLocReg();
8469 RegsToPass.push_back(std::make_pair(RegLo, Lo));
8470
8471 // Get the CCValAssign for the Hi part.
8472 CCValAssign &HiVA = ArgLocs[++i];
8473
8474 if (HiVA.isMemLoc()) {
8475 // Second half of f64 is passed on the stack.
8476 if (!StackPtr.getNode())
8477 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8479 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8480 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
8481 // Emit the store.
8482 MemOpChains.push_back(DAG.getStore(
8483 Chain, DL, Hi, Address,
8485 } else {
8486 // Second half of f64 is passed in another GPR.
8487 Register RegHigh = HiVA.getLocReg();
8488 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
8489 }
8490 continue;
8491 }
8492
8493 // Promote the value if needed.
8494 // For now, only handle fully promoted and indirect arguments.
8495 if (VA.getLocInfo() == CCValAssign::Indirect) {
8496 // Store the argument in a stack slot and pass its address.
8497 Align StackAlign =
8498 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
8499 getPrefTypeAlign(ArgValue.getValueType(), DAG));
8500 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
8501 // If the original argument was split and passed by reference, we need to
8502 // store the required parts of it here (and pass just one address).
8503 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
8504 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
8505 assert(ArgPartOffset == 0);
8506 // Calculate the total size to store. We don't have access to what we're
8507 // actually storing other than performing the loop and collecting the
8508 // info.
8510 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
8511 SDValue PartValue = OutVals[OutIdx + 1];
8512 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
8513 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
8514 EVT PartVT = PartValue.getValueType();
8515
8516 StoredSize += PartVT.getStoreSize();
8517 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
8518 Parts.push_back(std::make_pair(PartValue, Offset));
8519 ++i;
8520 ++OutIdx;
8521 }
8522 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
8523 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
8524 MemOpChains.push_back(
8525 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
8527 for (const auto &Part : Parts) {
8528 SDValue PartValue = Part.first;
8529 SDValue PartOffset = Part.second;
8531 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
8532 MemOpChains.push_back(
8533 DAG.getStore(Chain, DL, PartValue, Address,
8535 }
8536 ArgValue = SpillSlot;
8537 } else {
8538 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
8539 }
8540
8541 // Use local copy if it is a byval arg.
8542 if (Flags.isByVal())
8543 ArgValue = ByValArgs[j++];
8544
8545 if (VA.isRegLoc()) {
8546 // Queue up the argument copies and emit them at the end.
8547 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
8548 } else {
8549 assert(VA.isMemLoc() && "Argument not register or memory");
8550 assert(!IsTailCall && "Tail call not allowed if stack is used "
8551 "for passing parameters");
8552
8553 // Work out the address of the stack slot.
8554 if (!StackPtr.getNode())
8555 StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
8557 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
8559
8560 // Emit the store.
8561 MemOpChains.push_back(
8562 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
8563 }
8564 }
8565
8566 // Join the stores, which are independent of one another.
8567 if (!MemOpChains.empty())
8568 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
8569
8570 SDValue Glue;
8571
8572 // Build a sequence of copy-to-reg nodes, chained and glued together.
8573 for (auto &Reg : RegsToPass) {
8574 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
8575 Glue = Chain.getValue(1);
8576 }
8577
8578 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
8579 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
8580 // split it and then direct call can be matched by PseudoCALL.
8582 const GlobalValue *GV = S->getGlobal();
8583 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
8586 Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
8587 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
8588 unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
8591 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
8592 }
8593
8594 // The first call operand is the chain and the second is the target address.
8596 Ops.push_back(Chain);
8597 Ops.push_back(Callee);
8598
8599 // Add argument registers to the end of the list so that they are
8600 // known live into the call.
8601 for (auto &Reg : RegsToPass)
8602 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
8603
8604 if (!IsTailCall) {
8605 // Add a register mask operand representing the call-preserved registers.
8606 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
8607 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
8608 assert(Mask && "Missing call preserved mask for calling convention");
8609 Ops.push_back(DAG.getRegisterMask(Mask));
8610 }
8611
8612 // Glue the call to the argument copies, if any.
8613 if (Glue.getNode())
8614 Ops.push_back(Glue);
8615
8616 // Emit the call.
8617 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
8618 unsigned Op;
8619 switch (DAG.getTarget().getCodeModel()) {
8620 default:
8621 report_fatal_error("Unsupported code model");
8622 case CodeModel::Small:
8623 Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
8624 break;
8625 case CodeModel::Medium:
8626 assert(Subtarget.is64Bit() && "Medium code model requires LA64");
8627 Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
8628 break;
8629 case CodeModel::Large:
8630 assert(Subtarget.is64Bit() && "Large code model requires LA64");
8631 Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
8632 break;
8633 }
8634
8635 if (IsTailCall) {
8637 SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
8638 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
8639 return Ret;
8640 }
8641
8642 Chain = DAG.getNode(Op, DL, NodeTys, Ops);
8643 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
8644 Glue = Chain.getValue(1);
8645
8646 // Mark the end of the call, which is glued to the call itself.
8647 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
8648 Glue = Chain.getValue(1);
8649
8650 // Assign locations to each value returned by this call.
8652 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
8653 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
8654
8655 // Copy all of the result registers out of their specified physreg.
8656 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
8657 auto &VA = RVLocs[i];
8658 // Copy the value out.
8659 SDValue RetValue =
8660 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
8661 // Glue the RetValue to the end of the call sequence.
8662 Chain = RetValue.getValue(1);
8663 Glue = RetValue.getValue(2);
8664
8665 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8666 assert(VA.needsCustom());
8667 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
8668 MVT::i32, Glue);
8669 Chain = RetValue2.getValue(1);
8670 Glue = RetValue2.getValue(2);
8671 RetValue = DAG.getNode(LoongArchISD::BUILD_PAIR_F64, DL, MVT::f64,
8672 RetValue, RetValue2);
8673 } else
8674 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
8675
8676 InVals.push_back(RetValue);
8677 }
8678
8679 return Chain;
8680}
8681
8683 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
8684 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
8685 const Type *RetTy) const {
8687 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
8688
8689 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
8690 LoongArchABI::ABI ABI =
8691 MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
8692 if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
8693 Outs[i].Flags, CCInfo, /*IsRet=*/true, nullptr))
8694 return false;
8695 }
8696 return true;
8697}
8698
8700 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
8702 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
8703 SelectionDAG &DAG) const {
8704 // Stores the assignment of the return value to a location.
8706
8707 // Info about the registers and stack slot.
8708 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
8709 *DAG.getContext());
8710
8711 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
8712 nullptr, CC_LoongArch);
8713 if (CallConv == CallingConv::GHC && !RVLocs.empty())
8714 report_fatal_error("GHC functions return void only");
8715 SDValue Glue;
8716 SmallVector<SDValue, 4> RetOps(1, Chain);
8717
8718 // Copy the result values into the output registers.
8719 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
8720 SDValue Val = OutVals[OutIdx];
8721 CCValAssign &VA = RVLocs[i];
8722 assert(VA.isRegLoc() && "Can only return in registers!");
8723
8724 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
8725 // Handle returning f64 on LA32D with a soft float ABI.
8726 assert(VA.isRegLoc() && "Expected return via registers");
8727 assert(VA.needsCustom());
8728 SDValue SplitF64 = DAG.getNode(LoongArchISD::SPLIT_PAIR_F64, DL,
8729 DAG.getVTList(MVT::i32, MVT::i32), Val);
8730 SDValue Lo = SplitF64.getValue(0);
8731 SDValue Hi = SplitF64.getValue(1);
8732 Register RegLo = VA.getLocReg();
8733 Register RegHi = RVLocs[++i].getLocReg();
8734
8735 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
8736 Glue = Chain.getValue(1);
8737 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
8738 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
8739 Glue = Chain.getValue(1);
8740 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
8741 } else {
8742 // Handle a 'normal' return.
8743 Val = convertValVTToLocVT(DAG, Val, VA, DL);
8744 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
8745
8746 // Guarantee that all emitted copies are stuck together.
8747 Glue = Chain.getValue(1);
8748 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
8749 }
8750 }
8751
8752 RetOps[0] = Chain; // Update chain.
8753
8754 // Add the glue node if we have it.
8755 if (Glue.getNode())
8756 RetOps.push_back(Glue);
8757
8758 return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
8759}
8760
8761// Check if a constant splat can be generated using [x]vldi, where imm[12] == 1.
8762// Note: The following prefixes are excluded:
8763// imm[11:8] == 4'b0000, 4'b0100, 4'b1000
8764// as they can be represented using [x]vrepli.[whb]
8766 const APInt &SplatValue, const unsigned SplatBitSize) const {
8767 uint64_t RequiredImm = 0;
8768 uint64_t V = SplatValue.getZExtValue();
8769 if (SplatBitSize == 16 && !(V & 0x00FF)) {
8770 // 4'b0101
8771 RequiredImm = (0b10101 << 8) | (V >> 8);
8772 return {true, RequiredImm};
8773 } else if (SplatBitSize == 32) {
8774 // 4'b0001
8775 if (!(V & 0xFFFF00FF)) {
8776 RequiredImm = (0b10001 << 8) | (V >> 8);
8777 return {true, RequiredImm};
8778 }
8779 // 4'b0010
8780 if (!(V & 0xFF00FFFF)) {
8781 RequiredImm = (0b10010 << 8) | (V >> 16);
8782 return {true, RequiredImm};
8783 }
8784 // 4'b0011
8785 if (!(V & 0x00FFFFFF)) {
8786 RequiredImm = (0b10011 << 8) | (V >> 24);
8787 return {true, RequiredImm};
8788 }
8789 // 4'b0110
8790 if ((V & 0xFFFF00FF) == 0xFF) {
8791 RequiredImm = (0b10110 << 8) | (V >> 8);
8792 return {true, RequiredImm};
8793 }
8794 // 4'b0111
8795 if ((V & 0xFF00FFFF) == 0xFFFF) {
8796 RequiredImm = (0b10111 << 8) | (V >> 16);
8797 return {true, RequiredImm};
8798 }
8799 // 4'b1010
8800 if ((V & 0x7E07FFFF) == 0x3E000000 || (V & 0x7E07FFFF) == 0x40000000) {
8801 RequiredImm =
8802 (0b11010 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8803 return {true, RequiredImm};
8804 }
8805 } else if (SplatBitSize == 64) {
8806 // 4'b1011
8807 if ((V & 0xFFFFFFFF7E07FFFFULL) == 0x3E000000ULL ||
8808 (V & 0xFFFFFFFF7E07FFFFULL) == 0x40000000ULL) {
8809 RequiredImm =
8810 (0b11011 << 8) | (((V >> 24) & 0xC0) ^ 0x40) | ((V >> 19) & 0x3F);
8811 return {true, RequiredImm};
8812 }
8813 // 4'b1100
8814 if ((V & 0x7FC0FFFFFFFFFFFFULL) == 0x4000000000000000ULL ||
8815 (V & 0x7FC0FFFFFFFFFFFFULL) == 0x3FC0000000000000ULL) {
8816 RequiredImm =
8817 (0b11100 << 8) | (((V >> 56) & 0xC0) ^ 0x40) | ((V >> 48) & 0x3F);
8818 return {true, RequiredImm};
8819 }
8820 // 4'b1001
8821 auto sameBitsPreByte = [](uint64_t x) -> std::pair<bool, uint8_t> {
8822 uint8_t res = 0;
8823 for (int i = 0; i < 8; ++i) {
8824 uint8_t byte = x & 0xFF;
8825 if (byte == 0 || byte == 0xFF)
8826 res |= ((byte & 1) << i);
8827 else
8828 return {false, 0};
8829 x >>= 8;
8830 }
8831 return {true, res};
8832 };
8833 auto [IsSame, Suffix] = sameBitsPreByte(V);
8834 if (IsSame) {
8835 RequiredImm = (0b11001 << 8) | Suffix;
8836 return {true, RequiredImm};
8837 }
8838 }
8839 return {false, RequiredImm};
8840}
8841
8843 EVT VT) const {
8844 if (!Subtarget.hasExtLSX())
8845 return false;
8846
8847 if (VT == MVT::f32) {
8848 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7e07ffff;
8849 return (masked == 0x3e000000 || masked == 0x40000000);
8850 }
8851
8852 if (VT == MVT::f64) {
8853 uint64_t masked = Imm.bitcastToAPInt().getZExtValue() & 0x7fc0ffffffffffff;
8854 return (masked == 0x3fc0000000000000 || masked == 0x4000000000000000);
8855 }
8856
8857 return false;
8858}
8859
8860bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8861 bool ForCodeSize) const {
8862 // TODO: Maybe need more checks here after vector extension is supported.
8863 if (VT == MVT::f32 && !Subtarget.hasBasicF())
8864 return false;
8865 if (VT == MVT::f64 && !Subtarget.hasBasicD())
8866 return false;
8867 return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT));
8868}
8869
8871 return true;
8872}
8873
8875 return true;
8876}
8877
8878bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
8879 const Instruction *I) const {
8880 if (!Subtarget.is64Bit())
8881 return isa<LoadInst>(I) || isa<StoreInst>(I);
8882
8883 if (isa<LoadInst>(I))
8884 return true;
8885
8886 // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
8887 // require fences beacuse we can use amswap_db.[w/d].
8888 Type *Ty = I->getOperand(0)->getType();
8889 if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
8890 unsigned Size = Ty->getIntegerBitWidth();
8891 return (Size == 8 || Size == 16);
8892 }
8893
8894 return false;
8895}
8896
8898 LLVMContext &Context,
8899 EVT VT) const {
8900 if (!VT.isVector())
8901 return getPointerTy(DL);
8903}
8904
8906 EVT VT = Y.getValueType();
8907
8908 if (VT.isVector())
8909 return Subtarget.hasExtLSX() && VT.isInteger();
8910
8911 return VT.isScalarInteger() && !isa<ConstantSDNode>(Y);
8912}
8913
8915 const CallInst &I,
8916 MachineFunction &MF,
8917 unsigned Intrinsic) const {
8918 switch (Intrinsic) {
8919 default:
8920 return false;
8921 case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
8922 case Intrinsic::loongarch_masked_atomicrmw_add_i32:
8923 case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
8924 case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
8925 Info.opc = ISD::INTRINSIC_W_CHAIN;
8926 Info.memVT = MVT::i32;
8927 Info.ptrVal = I.getArgOperand(0);
8928 Info.offset = 0;
8929 Info.align = Align(4);
8932 return true;
8933 // TODO: Add more Intrinsics later.
8934 }
8935}
8936
8937// When -mlamcas is enabled, MinCmpXchgSizeInBits will be set to 8,
8938// atomicrmw and/or/xor operations with operands less than 32 bits cannot be
8939// expanded to am{and/or/xor}[_db].w through AtomicExpandPass. To prevent
8940// regression, we need to implement it manually.
8943
8945 Op == AtomicRMWInst::And) &&
8946 "Unable to expand");
8947 unsigned MinWordSize = 4;
8948
8949 IRBuilder<> Builder(AI);
8950 LLVMContext &Ctx = Builder.getContext();
8951 const DataLayout &DL = AI->getDataLayout();
8952 Type *ValueType = AI->getType();
8953 Type *WordType = Type::getIntNTy(Ctx, MinWordSize * 8);
8954
8955 Value *Addr = AI->getPointerOperand();
8956 PointerType *PtrTy = cast<PointerType>(Addr->getType());
8957 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
8958
8959 Value *AlignedAddr = Builder.CreateIntrinsic(
8960 Intrinsic::ptrmask, {PtrTy, IntTy},
8961 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
8962 "AlignedAddr");
8963
8964 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
8965 Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
8966 Value *ShiftAmt = Builder.CreateShl(PtrLSB, 3);
8967 ShiftAmt = Builder.CreateTrunc(ShiftAmt, WordType, "ShiftAmt");
8968 Value *Mask = Builder.CreateShl(
8969 ConstantInt::get(WordType,
8970 (1 << (DL.getTypeStoreSize(ValueType) * 8)) - 1),
8971 ShiftAmt, "Mask");
8972 Value *Inv_Mask = Builder.CreateNot(Mask, "Inv_Mask");
8973 Value *ValOperand_Shifted =
8974 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), WordType),
8975 ShiftAmt, "ValOperand_Shifted");
8976 Value *NewOperand;
8977 if (Op == AtomicRMWInst::And)
8978 NewOperand = Builder.CreateOr(ValOperand_Shifted, Inv_Mask, "AndOperand");
8979 else
8980 NewOperand = ValOperand_Shifted;
8981
8982 AtomicRMWInst *NewAI =
8983 Builder.CreateAtomicRMW(Op, AlignedAddr, NewOperand, Align(MinWordSize),
8984 AI->getOrdering(), AI->getSyncScopeID());
8985
8986 Value *Shift = Builder.CreateLShr(NewAI, ShiftAmt, "shifted");
8987 Value *Trunc = Builder.CreateTrunc(Shift, ValueType, "extracted");
8988 Value *FinalOldResult = Builder.CreateBitCast(Trunc, ValueType);
8989 AI->replaceAllUsesWith(FinalOldResult);
8990 AI->eraseFromParent();
8991}
8992
8995 // TODO: Add more AtomicRMWInst that needs to be extended.
8996
8997 // Since floating-point operation requires a non-trivial set of data
8998 // operations, use CmpXChg to expand.
8999 if (AI->isFloatingPointOperation() ||
9005
9006 if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() &&
9009 AI->getOperation() == AtomicRMWInst::Sub)) {
9011 }
9012
9013 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
9014 if (Subtarget.hasLAMCAS()) {
9015 if (Size < 32 && (AI->getOperation() == AtomicRMWInst::And ||
9019 if (AI->getOperation() == AtomicRMWInst::Nand || Size < 32)
9021 }
9022
9023 if (Size == 8 || Size == 16)
9026}
9027
9028static Intrinsic::ID
9030 AtomicRMWInst::BinOp BinOp) {
9031 if (GRLen == 64) {
9032 switch (BinOp) {
9033 default:
9034 llvm_unreachable("Unexpected AtomicRMW BinOp");
9036 return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
9037 case AtomicRMWInst::Add:
9038 return Intrinsic::loongarch_masked_atomicrmw_add_i64;
9039 case AtomicRMWInst::Sub:
9040 return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
9042 return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
9044 return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
9046 return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
9047 case AtomicRMWInst::Max:
9048 return Intrinsic::loongarch_masked_atomicrmw_max_i64;
9049 case AtomicRMWInst::Min:
9050 return Intrinsic::loongarch_masked_atomicrmw_min_i64;
9051 // TODO: support other AtomicRMWInst.
9052 }
9053 }
9054
9055 if (GRLen == 32) {
9056 switch (BinOp) {
9057 default:
9058 llvm_unreachable("Unexpected AtomicRMW BinOp");
9060 return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
9061 case AtomicRMWInst::Add:
9062 return Intrinsic::loongarch_masked_atomicrmw_add_i32;
9063 case AtomicRMWInst::Sub:
9064 return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
9066 return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
9068 return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
9070 return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
9071 case AtomicRMWInst::Max:
9072 return Intrinsic::loongarch_masked_atomicrmw_max_i32;
9073 case AtomicRMWInst::Min:
9074 return Intrinsic::loongarch_masked_atomicrmw_min_i32;
9075 // TODO: support other AtomicRMWInst.
9076 }
9077 }
9078
9079 llvm_unreachable("Unexpected GRLen\n");
9080}
9081
9084 AtomicCmpXchgInst *CI) const {
9085
9086 if (Subtarget.hasLAMCAS())
9088
9090 if (Size == 8 || Size == 16)
9093}
9094
9096 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
9097 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
9098 unsigned GRLen = Subtarget.getGRLen();
9099 AtomicOrdering FailOrd = CI->getFailureOrdering();
9100 Value *FailureOrdering =
9101 Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
9102 Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
9103 if (GRLen == 64) {
9104 CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
9105 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
9106 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
9107 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9108 }
9109 Type *Tys[] = {AlignedAddr->getType()};
9110 Value *Result = Builder.CreateIntrinsic(
9111 CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
9112 if (GRLen == 64)
9113 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9114 return Result;
9115}
9116
9118 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
9119 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
9120 // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
9121 // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
9122 // mask, as this produces better code than the LL/SC loop emitted by
9123 // int_loongarch_masked_atomicrmw_xchg.
9124 if (AI->getOperation() == AtomicRMWInst::Xchg &&
9127 if (CVal->isZero())
9128 return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
9129 Builder.CreateNot(Mask, "Inv_Mask"),
9130 AI->getAlign(), Ord);
9131 if (CVal->isMinusOne())
9132 return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
9133 AI->getAlign(), Ord);
9134 }
9135
9136 unsigned GRLen = Subtarget.getGRLen();
9137 Value *Ordering =
9138 Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
9139 Type *Tys[] = {AlignedAddr->getType()};
9141 AI->getModule(),
9143
9144 if (GRLen == 64) {
9145 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
9146 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
9147 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
9148 }
9149
9150 Value *Result;
9151
9152 // Must pass the shift amount needed to sign extend the loaded value prior
9153 // to performing a signed comparison for min/max. ShiftAmt is the number of
9154 // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
9155 // is the number of bits to left+right shift the value in order to
9156 // sign-extend.
9157 if (AI->getOperation() == AtomicRMWInst::Min ||
9159 const DataLayout &DL = AI->getDataLayout();
9160 unsigned ValWidth =
9161 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
9162 Value *SextShamt =
9163 Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
9164 Result = Builder.CreateCall(LlwOpScwLoop,
9165 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
9166 } else {
9167 Result =
9168 Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
9169 }
9170
9171 if (GRLen == 64)
9172 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
9173 return Result;
9174}
9175
9177 const MachineFunction &MF, EVT VT) const {
9178 VT = VT.getScalarType();
9179
9180 if (!VT.isSimple())
9181 return false;
9182
9183 switch (VT.getSimpleVT().SimpleTy) {
9184 case MVT::f32:
9185 case MVT::f64:
9186 return true;
9187 default:
9188 break;
9189 }
9190
9191 return false;
9192}
9193
9195 const Constant *PersonalityFn) const {
9196 return LoongArch::R4;
9197}
9198
9200 const Constant *PersonalityFn) const {
9201 return LoongArch::R5;
9202}
9203
9204//===----------------------------------------------------------------------===//
9205// Target Optimization Hooks
9206//===----------------------------------------------------------------------===//
9207
9209 const LoongArchSubtarget &Subtarget) {
9210 // Feature FRECIPE instrucions relative accuracy is 2^-14.
9211 // IEEE float has 23 digits and double has 52 digits.
9212 int RefinementSteps = VT.getScalarType() == MVT::f64 ? 2 : 1;
9213 return RefinementSteps;
9214}
9215
9217 SelectionDAG &DAG, int Enabled,
9218 int &RefinementSteps,
9219 bool &UseOneConstNR,
9220 bool Reciprocal) const {
9221 if (Subtarget.hasFrecipe()) {
9222 SDLoc DL(Operand);
9223 EVT VT = Operand.getValueType();
9224
9225 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9226 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9227 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9228 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9229 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9230
9231 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9232 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9233
9234 SDValue Estimate = DAG.getNode(LoongArchISD::FRSQRTE, DL, VT, Operand);
9235 if (Reciprocal)
9236 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate);
9237
9238 return Estimate;
9239 }
9240 }
9241
9242 return SDValue();
9243}
9244
9246 SelectionDAG &DAG,
9247 int Enabled,
9248 int &RefinementSteps) const {
9249 if (Subtarget.hasFrecipe()) {
9250 SDLoc DL(Operand);
9251 EVT VT = Operand.getValueType();
9252
9253 if (VT == MVT::f32 || (VT == MVT::f64 && Subtarget.hasBasicD()) ||
9254 (VT == MVT::v4f32 && Subtarget.hasExtLSX()) ||
9255 (VT == MVT::v2f64 && Subtarget.hasExtLSX()) ||
9256 (VT == MVT::v8f32 && Subtarget.hasExtLASX()) ||
9257 (VT == MVT::v4f64 && Subtarget.hasExtLASX())) {
9258
9259 if (RefinementSteps == ReciprocalEstimate::Unspecified)
9260 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
9261
9262 return DAG.getNode(LoongArchISD::FRECIPE, DL, VT, Operand);
9263 }
9264 }
9265
9266 return SDValue();
9267}
9268
9269//===----------------------------------------------------------------------===//
9270// LoongArch Inline Assembly Support
9271//===----------------------------------------------------------------------===//
9272
9274LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
9275 // LoongArch specific constraints in GCC: config/loongarch/constraints.md
9276 //
9277 // 'f': A floating-point register (if available).
9278 // 'k': A memory operand whose address is formed by a base register and
9279 // (optionally scaled) index register.
9280 // 'l': A signed 16-bit constant.
9281 // 'm': A memory operand whose address is formed by a base register and
9282 // offset that is suitable for use in instructions with the same
9283 // addressing mode as st.w and ld.w.
9284 // 'q': A general-purpose register except for $r0 and $r1 (for the csrxchg
9285 // instruction)
9286 // 'I': A signed 12-bit constant (for arithmetic instructions).
9287 // 'J': Integer zero.
9288 // 'K': An unsigned 12-bit constant (for logic instructions).
9289 // "ZB": An address that is held in a general-purpose register. The offset is
9290 // zero.
9291 // "ZC": A memory operand whose address is formed by a base register and
9292 // offset that is suitable for use in instructions with the same
9293 // addressing mode as ll.w and sc.w.
9294 if (Constraint.size() == 1) {
9295 switch (Constraint[0]) {
9296 default:
9297 break;
9298 case 'f':
9299 case 'q':
9300 return C_RegisterClass;
9301 case 'l':
9302 case 'I':
9303 case 'J':
9304 case 'K':
9305 return C_Immediate;
9306 case 'k':
9307 return C_Memory;
9308 }
9309 }
9310
9311 if (Constraint == "ZC" || Constraint == "ZB")
9312 return C_Memory;
9313
9314 // 'm' is handled here.
9315 return TargetLowering::getConstraintType(Constraint);
9316}
9317
9318InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
9319 StringRef ConstraintCode) const {
9320 return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
9324 .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
9325}
9326
9327std::pair<unsigned, const TargetRegisterClass *>
9328LoongArchTargetLowering::getRegForInlineAsmConstraint(
9329 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
9330 // First, see if this is a constraint that directly corresponds to a LoongArch
9331 // register class.
9332 if (Constraint.size() == 1) {
9333 switch (Constraint[0]) {
9334 case 'r':
9335 // TODO: Support fixed vectors up to GRLen?
9336 if (VT.isVector())
9337 break;
9338 return std::make_pair(0U, &LoongArch::GPRRegClass);
9339 case 'q':
9340 return std::make_pair(0U, &LoongArch::GPRNoR0R1RegClass);
9341 case 'f':
9342 if (Subtarget.hasBasicF() && VT == MVT::f32)
9343 return std::make_pair(0U, &LoongArch::FPR32RegClass);
9344 if (Subtarget.hasBasicD() && VT == MVT::f64)
9345 return std::make_pair(0U, &LoongArch::FPR64RegClass);
9346 if (Subtarget.hasExtLSX() &&
9347 TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
9348 return std::make_pair(0U, &LoongArch::LSX128RegClass);
9349 if (Subtarget.hasExtLASX() &&
9350 TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
9351 return std::make_pair(0U, &LoongArch::LASX256RegClass);
9352 break;
9353 default:
9354 break;
9355 }
9356 }
9357
9358 // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
9359 // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
9360 // constraints while the official register name is prefixed with a '$'. So we
9361 // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
9362 // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
9363 // case insensitive, so no need to convert the constraint to upper case here.
9364 //
9365 // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
9366 // decode the usage of register name aliases into their official names. And
9367 // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
9368 // official register names.
9369 if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
9370 Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
9371 bool IsFP = Constraint[2] == 'f';
9372 std::pair<StringRef, StringRef> Temp = Constraint.split('$');
9373 std::pair<unsigned, const TargetRegisterClass *> R;
9375 TRI, join_items("", Temp.first, Temp.second), VT);
9376 // Match those names to the widest floating point register type available.
9377 if (IsFP) {
9378 unsigned RegNo = R.first;
9379 if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
9380 if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
9381 unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
9382 return std::make_pair(DReg, &LoongArch::FPR64RegClass);
9383 }
9384 }
9385 }
9386 return R;
9387 }
9388
9389 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
9390}
9391
9392void LoongArchTargetLowering::LowerAsmOperandForConstraint(
9393 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
9394 SelectionDAG &DAG) const {
9395 // Currently only support length 1 constraints.
9396 if (Constraint.size() == 1) {
9397 switch (Constraint[0]) {
9398 case 'l':
9399 // Validate & create a 16-bit signed immediate operand.
9400 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9401 uint64_t CVal = C->getSExtValue();
9402 if (isInt<16>(CVal))
9403 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9404 Subtarget.getGRLenVT()));
9405 }
9406 return;
9407 case 'I':
9408 // Validate & create a 12-bit signed immediate operand.
9409 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9410 uint64_t CVal = C->getSExtValue();
9411 if (isInt<12>(CVal))
9412 Ops.push_back(DAG.getSignedTargetConstant(CVal, SDLoc(Op),
9413 Subtarget.getGRLenVT()));
9414 }
9415 return;
9416 case 'J':
9417 // Validate & create an integer zero operand.
9418 if (auto *C = dyn_cast<ConstantSDNode>(Op))
9419 if (C->getZExtValue() == 0)
9420 Ops.push_back(
9421 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
9422 return;
9423 case 'K':
9424 // Validate & create a 12-bit unsigned immediate operand.
9425 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
9426 uint64_t CVal = C->getZExtValue();
9427 if (isUInt<12>(CVal))
9428 Ops.push_back(
9429 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
9430 }
9431 return;
9432 default:
9433 break;
9434 }
9435 }
9437}
9438
9439#define GET_REGISTER_MATCHER
9440#include "LoongArchGenAsmMatcher.inc"
9441
9444 const MachineFunction &MF) const {
9445 std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
9446 std::string NewRegName = Name.second.str();
9447 Register Reg = MatchRegisterAltName(NewRegName);
9448 if (!Reg)
9449 Reg = MatchRegisterName(NewRegName);
9450 if (!Reg)
9451 return Reg;
9452 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
9453 if (!ReservedRegs.test(Reg))
9454 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
9455 StringRef(RegName) + "\"."));
9456 return Reg;
9457}
9458
9460 EVT VT, SDValue C) const {
9461 // TODO: Support vectors.
9462 if (!VT.isScalarInteger())
9463 return false;
9464
9465 // Omit the optimization if the data size exceeds GRLen.
9466 if (VT.getSizeInBits() > Subtarget.getGRLen())
9467 return false;
9468
9469 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
9470 const APInt &Imm = ConstNode->getAPIntValue();
9471 // Break MUL into (SLLI + ADD/SUB) or ALSL.
9472 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
9473 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
9474 return true;
9475 // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
9476 if (ConstNode->hasOneUse() &&
9477 ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
9478 (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
9479 return true;
9480 // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
9481 // in which the immediate has two set bits. Or Break (MUL x, imm)
9482 // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
9483 // equals to (1 << s0) - (1 << s1).
9484 if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
9485 unsigned Shifts = Imm.countr_zero();
9486 // Reject immediates which can be composed via a single LUI.
9487 if (Shifts >= 12)
9488 return false;
9489 // Reject multiplications can be optimized to
9490 // (SLLI (ALSL x, x, 1/2/3/4), s).
9491 APInt ImmPop = Imm.ashr(Shifts);
9492 if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
9493 return false;
9494 // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
9495 // since it needs one more instruction than other 3 cases.
9496 APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
9497 if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
9498 (ImmSmall - Imm).isPowerOf2())
9499 return true;
9500 }
9501 }
9502
9503 return false;
9504}
9505
9507 const AddrMode &AM,
9508 Type *Ty, unsigned AS,
9509 Instruction *I) const {
9510 // LoongArch has four basic addressing modes:
9511 // 1. reg
9512 // 2. reg + 12-bit signed offset
9513 // 3. reg + 14-bit signed offset left-shifted by 2
9514 // 4. reg1 + reg2
9515 // TODO: Add more checks after support vector extension.
9516
9517 // No global is ever allowed as a base.
9518 if (AM.BaseGV)
9519 return false;
9520
9521 // Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
9522 // with `UAL` feature.
9523 if (!isInt<12>(AM.BaseOffs) &&
9524 !(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
9525 return false;
9526
9527 switch (AM.Scale) {
9528 case 0:
9529 // "r+i" or just "i", depending on HasBaseReg.
9530 break;
9531 case 1:
9532 // "r+r+i" is not allowed.
9533 if (AM.HasBaseReg && AM.BaseOffs)
9534 return false;
9535 // Otherwise we have "r+r" or "r+i".
9536 break;
9537 case 2:
9538 // "2*r+r" or "2*r+i" is not allowed.
9539 if (AM.HasBaseReg || AM.BaseOffs)
9540 return false;
9541 // Allow "2*r" as "r+r".
9542 break;
9543 default:
9544 return false;
9545 }
9546
9547 return true;
9548}
9549
9551 return isInt<12>(Imm);
9552}
9553
9555 return isInt<12>(Imm);
9556}
9557
9559 // Zexts are free if they can be combined with a load.
9560 // Don't advertise i32->i64 zextload as being free for LA64. It interacts
9561 // poorly with type legalization of compares preferring sext.
9562 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
9563 EVT MemVT = LD->getMemoryVT();
9564 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
9565 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
9566 LD->getExtensionType() == ISD::ZEXTLOAD))
9567 return true;
9568 }
9569
9570 return TargetLowering::isZExtFree(Val, VT2);
9571}
9572
9574 EVT DstVT) const {
9575 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
9576}
9577
9579 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
9580}
9581
9583 // TODO: Support vectors.
9584 if (Y.getValueType().isVector())
9585 return false;
9586
9587 return !isa<ConstantSDNode>(Y);
9588}
9589
9591 // LAMCAS will use amcas[_DB].{b/h/w/d} which does not require extension.
9592 return Subtarget.hasLAMCAS() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
9593}
9594
9596 Type *Ty, bool IsSigned) const {
9597 if (Subtarget.is64Bit() && Ty->isIntegerTy(32))
9598 return true;
9599
9600 return IsSigned;
9601}
9602
9604 // Return false to suppress the unnecessary extensions if the LibCall
9605 // arguments or return value is a float narrower than GRLEN on a soft FP ABI.
9606 if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
9607 Type.getSizeInBits() < Subtarget.getGRLen()))
9608 return false;
9609 return true;
9610}
9611
9612// memcpy, and other memory intrinsics, typically tries to use wider load/store
9613// if the source/dest is aligned and the copy size is large enough. We therefore
9614// want to align such objects passed to memory intrinsics.
9616 unsigned &MinSize,
9617 Align &PrefAlign) const {
9618 if (!isa<MemIntrinsic>(CI))
9619 return false;
9620
9621 if (Subtarget.is64Bit()) {
9622 MinSize = 8;
9623 PrefAlign = Align(8);
9624 } else {
9625 MinSize = 4;
9626 PrefAlign = Align(4);
9627 }
9628
9629 return true;
9630}
9631
9640
9641bool LoongArchTargetLowering::splitValueIntoRegisterParts(
9642 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
9643 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
9644 bool IsABIRegCopy = CC.has_value();
9645 EVT ValueVT = Val.getValueType();
9646
9647 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9648 PartVT == MVT::f32) {
9649 // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
9650 // nan, and cast to f32.
9651 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
9652 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
9653 Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
9654 DAG.getConstant(0xFFFF0000, DL, MVT::i32));
9655 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
9656 Parts[0] = Val;
9657 return true;
9658 }
9659
9660 return false;
9661}
9662
9663SDValue LoongArchTargetLowering::joinRegisterPartsIntoValue(
9664 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
9665 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
9666 bool IsABIRegCopy = CC.has_value();
9667
9668 if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
9669 PartVT == MVT::f32) {
9670 SDValue Val = Parts[0];
9671
9672 // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
9673 Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
9674 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
9675 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
9676 return Val;
9677 }
9678
9679 return SDValue();
9680}
9681
9682MVT LoongArchTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
9683 CallingConv::ID CC,
9684 EVT VT) const {
9685 // Use f32 to pass f16.
9686 if (VT == MVT::f16 && Subtarget.hasBasicF())
9687 return MVT::f32;
9688
9690}
9691
9692unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(
9693 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
9694 // Use f32 to pass f16.
9695 if (VT == MVT::f16 && Subtarget.hasBasicF())
9696 return 1;
9697
9699}
9700
9702 SDValue Op, const APInt &OriginalDemandedBits,
9703 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
9704 unsigned Depth) const {
9705 EVT VT = Op.getValueType();
9706 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
9707 unsigned Opc = Op.getOpcode();
9708 switch (Opc) {
9709 default:
9710 break;
9711 case LoongArchISD::VMSKLTZ:
9712 case LoongArchISD::XVMSKLTZ: {
9713 SDValue Src = Op.getOperand(0);
9714 MVT SrcVT = Src.getSimpleValueType();
9715 unsigned SrcBits = SrcVT.getScalarSizeInBits();
9716 unsigned NumElts = SrcVT.getVectorNumElements();
9717
9718 // If we don't need the sign bits at all just return zero.
9719 if (OriginalDemandedBits.countr_zero() >= NumElts)
9720 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
9721
9722 // Only demand the vector elements of the sign bits we need.
9723 APInt KnownUndef, KnownZero;
9724 APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
9725 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
9726 TLO, Depth + 1))
9727 return true;
9728
9729 Known.Zero = KnownZero.zext(BitWidth);
9730 Known.Zero.setHighBits(BitWidth - NumElts);
9731
9732 // [X]VMSKLTZ only uses the MSB from each vector element.
9733 KnownBits KnownSrc;
9734 APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
9735 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
9736 Depth + 1))
9737 return true;
9738
9739 if (KnownSrc.One[SrcBits - 1])
9740 Known.One.setLowBits(NumElts);
9741 else if (KnownSrc.Zero[SrcBits - 1])
9742 Known.Zero.setLowBits(NumElts);
9743
9744 // Attempt to avoid multi-use ops if we don't need anything from it.
9746 Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
9747 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
9748 return false;
9749 }
9750 }
9751
9753 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
9754}
9755
9757 unsigned Opc = VecOp.getOpcode();
9758
9759 // Assume target opcodes can't be scalarized.
9760 // TODO - do we have any exceptions?
9761 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
9762 return false;
9763
9764 // If the vector op is not supported, try to convert to scalar.
9765 EVT VecVT = VecOp.getValueType();
9767 return true;
9768
9769 // If the vector op is supported, but the scalar op is not, the transform may
9770 // not be worthwhile.
9771 EVT ScalarVT = VecVT.getScalarType();
9772 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
9773}
9774
9776 unsigned Index) const {
9778 return false;
9779
9780 // Extract a 128-bit subvector from index 0 of a 256-bit vector is free.
9781 return Index == 0;
9782}
9783
9785 unsigned Index) const {
9786 EVT EltVT = VT.getScalarType();
9787
9788 // Extract a scalar FP value from index 0 of a vector is free.
9789 return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
9790}
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCRegister MatchRegisterAltName(StringRef Name)
Maps from the set of all alternative registernames to a register number.
Function Alias Analysis Results
static uint64_t getConstant(const Value *IndexValue)
static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty, SelectionDAG &DAG, unsigned Flags)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static MachineBasicBlock * emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode)
static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget, SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const SDLoc &DL)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static SDValue performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
const MCPhysReg ArgFPR32s[]
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 128-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
const MCPhysReg ArgVRs[]
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKEV (if possible).
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
static SDValue unpackF64OnLA32DSoftABI(SelectionDAG &DAG, SDValue Chain, const CCValAssign &VA, const CCValAssign &HiVA, const SDLoc &DL)
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue lowerVECTOR_SHUFFLE_IsReverse(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE whose result is the reversed source vector.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
static cl::opt< bool > ZeroDivCheck("loongarch-check-zero-division", cl::Hidden, cl::desc("Trap on integer division by zero."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static void emitErrorAndReplaceIntrinsicResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, StringRef ErrorMsg, bool WithChain=true)
static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE as byte rotate (if possible).
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVH (if possible).
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, unsigned ValNo, MVT ValVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsRet, Type *OrigTy)
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG)
static SDValue performSPLIT_PAIR_F64Combine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static MachineBasicBlock * emitSplitPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG)
static SDValue performSETCC_BITCASTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
@ NoMaterializeFPImm
@ MaterializeFPImm2Ins
@ MaterializeFPImm5Ins
@ MaterializeFPImm6Ins
@ MaterializeFPImm3Ins
@ MaterializeFPImm4Ins
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
static std::optional< bool > matchSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue Val)
static SDValue combineAndNotIntoVANDN(SDNode *N, const SDLoc &DL, SelectionDAG &DAG)
Try to fold: (and (xor X, -1), Y) -> (vandn X, Y).
static SDValue lowerBUILD_VECTORAsBroadCastLoad(BuildVectorSDNode *BVOp, const SDLoc &DL, SelectionDAG &DAG)
#define CRC_CASE_EXT_BINARYOP(NAME, NODE)
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG)
static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, unsigned Depth)
static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as shift (if possible).
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static MachineBasicBlock * insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock *MBB)
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG)
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKEV (if possible).
static MachineBasicBlock * emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performVANDNCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
Do target-specific dag combines on LoongArchISD::VANDN nodes.
static void replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const APInt &Zeroable)
Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, bool IsSigned=false)
static cl::opt< MaterializeFPImm > MaterializeFPImmInsNum("loongarch-materialize-float-imm", cl::Hidden, cl::desc("Maximum number of instructions used (including code sequence " "to generate the value and moving the value to FPR) when " "materializing floating-point immediates (default = 3)"), cl::init(MaterializeFPImm3Ins), cl::values(clEnumValN(NoMaterializeFPImm, "0", "Use constant pool"), clEnumValN(MaterializeFPImm2Ins, "2", "Materialize FP immediate within 2 instructions"), clEnumValN(MaterializeFPImm3Ins, "3", "Materialize FP immediate within 3 instructions"), clEnumValN(MaterializeFPImm4Ins, "4", "Materialize FP immediate within 4 instructions"), clEnumValN(MaterializeFPImm5Ins, "5", "Materialize FP immediate within 5 instructions"), clEnumValN(MaterializeFPImm6Ins, "6", "Materialize FP immediate within 6 instructions " "(behaves same as 5 on loongarch64)")))
static SDValue lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERMI (if possible).
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op, StringRef ErrorMsg, SelectionDAG &DAG)
const MCPhysReg ArgXRs[]
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, ISD::ArgFlagsTy ArgFlags2)
static unsigned getLoongArchWOpcode(unsigned Opcode)
const MCPhysReg ArgFPR64s[]
static MachineBasicBlock * emitPseudoCTPOP(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue performMOVGR2FR_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRWR_CASE(NAME, NODE)
#define CRC_CASE_EXT_UNARYOP(NAME, NODE)
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPACKOD (if possible).
static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, SDValue Src, const SDLoc &DL)
static SDValue isNOT(SDValue V, SelectionDAG &DAG)
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Dispatching routine to lower various 256-bit LoongArch vector shuffles.
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
static MachineBasicBlock * emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static void fillVector(ArrayRef< SDValue > Ops, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, SDValue &Vector, EVT ResTy)
static SDValue performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue fillSubVectorFromBuildVector(BuildVectorSDNode *Node, SelectionDAG &DAG, SDLoc DL, const LoongArchSubtarget &Subtarget, EVT ResTy, unsigned first)
static bool isSelectPseudo(MachineInstr &MI)
static SDValue foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
const MCPhysReg ArgGPRs[]
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVL (if possible).
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, unsigned ExtOpc=ISD::ANY_EXTEND)
static void replaceVecCondBranchResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, unsigned ResOp)
#define ASRT_LE_GT_CASE(NAME)
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
static SDValue performBR_CCCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void computeZeroableShuffleElements(ArrayRef< int > Mask, SDValue V1, SDValue V2, APInt &KnownUndef, APInt &KnownZero)
Compute whether each element of a shuffle is zeroable.
static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
static SDValue widenShuffleMask(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
static MachineBasicBlock * emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static bool canonicalizeShuffleVectorByLane(const SDLoc &DL, MutableArrayRef< int > Mask, MVT VT, SDValue &V1, SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Shuffle vectors by lane to generate more optimized instructions.
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVILVH (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVSHUF (if possible).
static void replaceCMP_XCHG_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static SDValue lowerVectorPickVE2GR(SDNode *N, SelectionDAG &DAG, unsigned ResOp)
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
#define IOCSRRD_CASE(NAME, NODE)
static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, ArrayRef< int > Mask)
Attempts to match vector shuffle as byte rotation.
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, unsigned ScalarSizeInBits, ArrayRef< int > Mask, int MaskOffset, const APInt &Zeroable)
Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI instruction.
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VILVL (if possible).
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG)
static MachineBasicBlock * emitBuildPairF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB, const LoongArchSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static void replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget)
#define CSR_CASE(ID)
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef< int > Mask, MVT VT, SDValue V1, SDValue V2, SelectionDAG &DAG)
Lower VECTOR_SHUFFLE into VPICKOD (if possible).
static Intrinsic::ID getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen, AtomicRMWInst::BinOp BinOp)
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG)
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, ArrayRef< int > Mask, SmallVectorImpl< int > &RepeatedMask)
Test whether a shuffle mask is equivalent within each sub-lane.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue, bool AllowSymbol=false)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static bool isSequentialOrUndefInRange(ArrayRef< int > Mask, unsigned Pos, unsigned Size, int Low, int Step=1)
Return true if every element in Mask, beginning from position Pos and ending in Pos + Size,...
Value * RHS
Value * LHS
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1414
bool isZero() const
Definition APFloat.h:1427
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
Definition APInt.h:1392
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1640
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
Definition APInt.h:1389
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
bool isFloatingPointOperation() const
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
bool test(unsigned Idx) const
Definition BitVector.h:480
size_type count() const
count - Returns the number of bits which are set.
Definition BitVector.h:181
A "pseudo-class" with methods for operating on BUILD_VECTORs.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, LocInfo HTP, unsigned ExtraInfo=0)
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isMinusOne() const
This function will return true iff every bit in this constant is set to true.
Definition Constants.h:226
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
unsigned getPointerSizeInBits(unsigned AS=0) const
The size in bits of the pointer representation in a given address space.
Definition DataLayout.h:479
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
A debug info location.
Definition DebugLoc.h:124
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
Argument * getArg(unsigned i) const
Definition Function.h:884
bool isDSOLocal() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
const LoongArchRegisterInfo * getRegisterInfo() const override
const LoongArchInstrInfo * getInstrInfo() const override
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override
Return true if result of the specified node is used by a return node only.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ValueType of the result of SETCC operations.
std::pair< bool, uint64_t > isImmVLDILegalForMode1(const APInt &SplatValue, const unsigned SplatBitSize) const
Check if a constant splat can be generated using [x]vldi, where imm[12] is 1.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
bool isExtractVecEltCheap(EVT VT, unsigned Index) const override
Return true if extraction of a scalar element from the given vector type at the given index is cheap.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type from this source type with ...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
bool signExtendConstant(const ConstantInt *CI) const override
Return true if this constant should be sign extended when promoting to a larger type.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override
Returns true if arguments should be sign-extended in lib calls.
bool shouldScalarizeBinop(SDValue VecOp) const override
Try to convert an extract element of a vector binary operation into an extract element followed by a ...
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const
bool shouldExtendTypeInLibCall(EVT Type) const override
Returns true if arguments should be extended in lib calls.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override
Perform a atomicrmw expansion using a target-specific way.
ISD::NodeType getExtendForAtomicCmpSwapArg() const override
Returns how the platform's atomic compare and swap expects its comparison value to be extended (ZERO_...
LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI)
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool hasAndNotCompare(SDValue Y) const override
Return true if the target should transform: (X & Y) == Y ---> (~X & Y) == 0 (X & Y) !...
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
bool hasFeature(unsigned Feature) const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool is256BitVector() const
Return true if this is a 256-bit vector type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getHalfNumVectorElementsVT() const
Return a VT for a vector type with the same element type but half the number of elements.
MVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void push_back(MachineInstr *MI)
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
LLVM_ABI void collectDebugValues(SmallVectorImpl< MachineInstr * > &DbgValues)
Scan instructions immediately following MI and collect any matching DBG_VALUEs.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
EVT getMemoryVT() const
Return the type of the in-memory value.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Class to represent pointers.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
LLVM_ABI bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
size_t use_size() const
Return the number of uses of this node.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Returns true if the node type is UNDEF or POISON.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
bool isSafeToSpeculativelyExecute(unsigned Opcode) const
Some opcodes may create immediate undefined behavior when used with some values (integer division-by-...
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI SDValue getNegative(SDValue Val, const SDLoc &DL, EVT VT)
Create negative operation as (SUB 0, Val).
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue WidenVector(const SDValue &N, const SDLoc &DL)
Widen the vector up to the next power of two using INSERT_SUBVECTOR.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static LLVM_ABI bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
ArrayRef< int > getMask() const
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxBytesForAlignment(unsigned MaxBytes)
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual InlineAsm::ConstraintCode getInlineAsmMemConstraint(StringRef ConstraintCode) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "lookthrough" ops that don't contrib...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
bool useTLSDESC() const
Returns true if this target uses TLS Descriptors.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:134
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:236
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI bool isFreezeUndef(const SDNode *N)
Return true if the specified node is FREEZE(UNDEF).
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isIntEqualitySetCC(CondCode Code)
Return true if this is a setcc instruction that performs an equality comparison when used with intege...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ABI getTargetABI(StringRef ABIName)
InstSeq generateInstSeq(int64_t Val)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition PtrState.h:41
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isIntOrFPConstant(SDValue V)
Return true if V is either a integer or FP constant.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Other
Any other memory.
Definition ModRef.h:68
unsigned getKillRegState(bool B)
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr unsigned BitWidth
std::string join_items(Sep Separator, Args &&... Items)
Joins the strings in the parameter pack Items, adding Separator between the elements....
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:59
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
Align getNonZeroOrigAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.
MakeLibCallOptions & setTypeListBeforeSoften(ArrayRef< EVT > OpsVT, EVT RetVT)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...