LLVM 22.0.0git
MipsSEISelLowering.cpp
Go to the documentation of this file.
1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/STLExtras.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/IntrinsicsMips.h"
40#include "llvm/Support/Debug.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49#include <utility>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "mips-isel"
54
55static cl::opt<bool>
56UseMipsTailCalls("mips-tail-calls", cl::Hidden,
57 cl::desc("MIPS: permit tail calls."), cl::init(false));
58
59static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
60 cl::desc("Expand double precision loads and "
61 "stores to their single precision "
62 "counterparts"));
63
64// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
65// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
66// INST.h for v16, INST.w for v32, INST.d for v64.
69 if (this->Subtarget.hasMSA()) {
70 switch (VT.SimpleTy) {
71 // Leave v2i1 vectors to be promoted to larger ones.
72 // Other i1 types will be promoted by default.
73 case MVT::v2i1:
74 return TypePromoteInteger;
75 break;
76 // 16-bit vector types (v2 and longer)
77 case MVT::v2i8:
78 // 32-bit vector types (v2 and longer)
79 case MVT::v2i16:
80 case MVT::v4i8:
81 // 64-bit vector types (v2 and longer)
82 case MVT::v2i32:
83 case MVT::v4i16:
84 case MVT::v8i8:
85 return TypeWidenVector;
86 break;
87 // Only word (.w) and doubleword (.d) are available for floating point
88 // vectors. That means floating point vectors should be either v2f64
89 // or v4f32.
90 // Here we only explicitly widen the f32 types - f16 will be promoted
91 // by default.
92 case MVT::v2f32:
93 case MVT::v3f32:
94 return TypeWidenVector;
95 // v2i64 is already 128-bit wide.
96 default:
97 break;
98 }
99 }
101}
102
104 const MipsSubtarget &STI)
105 : MipsTargetLowering(TM, STI) {
106 // Set up the register classes
107 addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
108
109 if (Subtarget.isGP64bit())
110 addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
111
112 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
113 // Expand all truncating stores and extending loads.
116 setTruncStoreAction(VT0, VT1, Expand);
120 }
121 }
122 }
123
124 if (Subtarget.hasDSP()) {
125 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
126
127 for (const auto &VecTy : VecTys) {
128 addRegisterClass(VecTy, &Mips::DSPRRegClass);
129
130 // Expand all builtin opcodes.
131 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
133
136 setOperationAction(ISD::LOAD, VecTy, Legal);
137 setOperationAction(ISD::STORE, VecTy, Legal);
138 setOperationAction(ISD::BITCAST, VecTy, Legal);
139 }
140
143
144 if (Subtarget.hasMips32r2()) {
147 }
148 }
149
150 if (Subtarget.hasDSPR2())
151 setOperationAction(ISD::MUL, MVT::v2i16, Legal);
152
153 if (Subtarget.hasMSA()) {
154 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
155 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
156 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
157 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
158 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
159 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
160 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
161
162 // f16 is a storage-only type, always promote it to f32.
163 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
165 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
174 setOperationAction(ISD::FNEG, MVT::f16, Promote);
175 setOperationAction(ISD::FABS, MVT::f16, Promote);
176 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
178 setOperationAction(ISD::FCOS, MVT::f16, Promote);
179 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote);
180 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
181 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
182 setOperationAction(ISD::FPOW, MVT::f16, Promote);
183 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
184 setOperationAction(ISD::FRINT, MVT::f16, Promote);
185 setOperationAction(ISD::FSIN, MVT::f16, Promote);
186 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
187 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
188 setOperationAction(ISD::FEXP, MVT::f16, Promote);
189 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
190 setOperationAction(ISD::FLOG, MVT::f16, Promote);
191 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
192 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
193 setOperationAction(ISD::FROUND, MVT::f16, Promote);
194 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
195 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
196 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
197 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
198 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
199
201 }
202
203 if (!Subtarget.useSoftFloat()) {
204 addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
205
206 // When dealing with single precision only, use libcalls
207 if (!Subtarget.isSingleFloat()) {
208 if (Subtarget.isFP64bit())
209 addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
210 else
211 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
212 }
213 }
214
215 // Targets with 64bits integer registers, but no 64bit floating point register
216 // do not support conversion between them
217 if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
218 !Subtarget.useSoftFloat()) {
223 }
224
229
230 if (Subtarget.hasCnMips())
232 else if (Subtarget.isGP64bit())
234
235 if (Subtarget.isGP64bit()) {
242 }
243
246
249 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
250 if (Subtarget.hasMips32r6()) {
251 setOperationAction(ISD::LOAD, MVT::i32, Legal);
252 setOperationAction(ISD::STORE, MVT::i32, Legal);
253 } else {
254 setOperationAction(ISD::LOAD, MVT::i32, Custom);
255 setOperationAction(ISD::STORE, MVT::i32, Custom);
256 }
257
259
263
264 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
265 !Subtarget.hasMips64()) {
266 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
267 }
268
269 if (NoDPLoadStore) {
270 setOperationAction(ISD::LOAD, MVT::f64, Custom);
271 setOperationAction(ISD::STORE, MVT::f64, Custom);
272 }
273
274 if (Subtarget.hasMips32r6()) {
275 // MIPS32r6 replaces the accumulator-based multiplies with a three register
276 // instruction
282
283 // MIPS32r6 replaces the accumulator-based division/remainder with separate
284 // three register division and remainder instructions.
291
292 // MIPS32r6 replaces conditional moves with an equivalent that removes the
293 // need for three GPR read ports.
297
301
302 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
306
307 setOperationAction(ISD::BRCOND, MVT::Other, Legal);
308
309 // Floating point > and >= are supported via < and <=
314
319 }
320
321 if (Subtarget.hasMips64r6()) {
322 // MIPS64r6 replaces the accumulator-based multiplies with a three register
323 // instruction
329
330 // MIPS32r6 replaces the accumulator-based division/remainder with separate
331 // three register division and remainder instructions.
338
339 // MIPS64r6 replaces conditional moves with an equivalent that removes the
340 // need for three GPR read ports.
344 }
345
346 computeRegisterProperties(Subtarget.getRegisterInfo());
347}
348
349const MipsTargetLowering *
351 const MipsSubtarget &STI) {
352 return new MipsSETargetLowering(TM, STI);
353}
354
357 if (VT == MVT::Untyped)
358 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
359
361}
362
363// Enable MSA support for the given integer type and Register class.
366 addRegisterClass(Ty, RC);
367
368 // Expand all builtin opcodes.
369 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
371
372 setOperationAction(ISD::BITCAST, Ty, Legal);
373 setOperationAction(ISD::LOAD, Ty, Legal);
374 setOperationAction(ISD::STORE, Ty, Legal);
379
401
402 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
407 }
408
415}
416
417// Enable MSA support for the given floating-point type and Register class.
455
456SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
459
460 EVT ResTy = Op->getValueType(0);
461 SDLoc DL(Op);
462
463 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
464 // floating point register are undefined. Not really an issue as sel.d, which
465 // is produced from an FSELECT node, only looks at bit 0.
466 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
467 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
468 Op->getOperand(2));
469}
470
472 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
474
475 if (Subtarget.systemSupportsUnalignedAccess()) {
476 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
477 // implementation defined whether this is handled by hardware, software, or
478 // a hybrid of the two but it's expected that most implementations will
479 // handle the majority of cases in hardware.
480 if (Fast)
481 *Fast = 1;
482 return true;
483 } else if (Subtarget.hasMips32r6()) {
484 return false;
485 }
486
487 switch (SVT) {
488 case MVT::i64:
489 case MVT::i32:
490 if (Fast)
491 *Fast = 1;
492 return true;
493 default:
494 return false;
495 }
496}
497
499 SelectionDAG &DAG) const {
500 switch(Op.getOpcode()) {
501 case ISD::LOAD: return lowerLOAD(Op, DAG);
502 case ISD::STORE: return lowerSTORE(Op, DAG);
503 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
504 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
505 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
506 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
507 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
508 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
509 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
510 DAG);
511 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
512 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
513 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
514 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
515 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
516 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
517 case ISD::SELECT: return lowerSELECT(Op, DAG);
518 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
519 }
520
522}
523
524// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
525//
526// Performs the following transformations:
527// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
528// sign/zero-extension is completely overwritten by the new one performed by
529// the ISD::AND.
530// - Removes redundant zero extensions performed by an ISD::AND.
533 const MipsSubtarget &Subtarget) {
534 if (!Subtarget.hasMSA())
535 return SDValue();
536
537 SDValue Op0 = N->getOperand(0);
538 SDValue Op1 = N->getOperand(1);
539 unsigned Op0Opcode = Op0->getOpcode();
540
541 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
542 // where $d + 1 == 2^n and n == 32
543 // or $d + 1 == 2^n and n <= 32 and ZExt
544 // -> (MipsVExtractZExt $a, $b, $c)
545 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
546 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
548
549 if (!Mask)
550 return SDValue();
551
552 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
553
554 if (Log2IfPositive <= 0)
555 return SDValue(); // Mask+1 is not a power of 2
556
557 SDValue Op0Op2 = Op0->getOperand(2);
558 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
559 unsigned ExtendTySize = ExtendTy.getSizeInBits();
560 unsigned Log2 = Log2IfPositive;
561
562 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
563 Log2 == ExtendTySize) {
564 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
566 Op0->getVTList(),
567 ArrayRef(Ops, Op0->getNumOperands()));
568 }
569 }
570
571 return SDValue();
572}
573
574// Determine if the specified node is a constant vector splat.
575//
576// Returns true and sets Imm if:
577// * N is a ISD::BUILD_VECTOR representing a constant splat
578//
579// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
580// differences are that it assumes the MSA has already been checked and the
581// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
582// must not be in order for binsri.d to be selectable).
583static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
585
586 if (!Node)
587 return false;
588
589 APInt SplatValue, SplatUndef;
590 unsigned SplatBitSize;
591 bool HasAnyUndefs;
592
593 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
594 8, !IsLittleEndian))
595 return false;
596
597 Imm = SplatValue;
598
599 return true;
600}
601
602// Test whether the given node is an all-ones build_vector.
604 // Look through bitcasts. Endianness doesn't matter because we are looking
605 // for an all-ones value.
606 if (N->getOpcode() == ISD::BITCAST)
607 N = N->getOperand(0);
608
610
611 if (!BVN)
612 return false;
613
614 APInt SplatValue, SplatUndef;
615 unsigned SplatBitSize;
616 bool HasAnyUndefs;
617
618 // Endianness doesn't matter in this context because we are looking for
619 // an all-ones value.
620 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
621 return SplatValue.isAllOnes();
622
623 return false;
624}
625
626// Test whether N is the bitwise inverse of OfNode.
627static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
628 if (N->getOpcode() != ISD::XOR)
629 return false;
630
631 if (isVectorAllOnes(N->getOperand(0)))
632 return N->getOperand(1) == OfNode;
633
634 if (isVectorAllOnes(N->getOperand(1)))
635 return N->getOperand(0) == OfNode;
636
637 return false;
638}
639
640// Perform combines where ISD::OR is the root node.
641//
642// Performs the following transformations:
643// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
644// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
645// vector type.
648 const MipsSubtarget &Subtarget) {
649 if (!Subtarget.hasMSA())
650 return SDValue();
651
652 EVT Ty = N->getValueType(0);
653
654 if (!Ty.is128BitVector())
655 return SDValue();
656
657 SDValue Op0 = N->getOperand(0);
658 SDValue Op1 = N->getOperand(1);
659
660 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
661 SDValue Op0Op0 = Op0->getOperand(0);
662 SDValue Op0Op1 = Op0->getOperand(1);
663 SDValue Op1Op0 = Op1->getOperand(0);
664 SDValue Op1Op1 = Op1->getOperand(1);
665 bool IsLittleEndian = !Subtarget.isLittle();
666
667 SDValue IfSet, IfClr, Cond;
668 bool IsConstantMask = false;
669 APInt Mask, InvMask;
670
671 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
672 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
673 // looking.
674 // IfClr will be set if we find a valid match.
675 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
676 Cond = Op0Op0;
677 IfSet = Op0Op1;
678
679 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
680 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
681 IfClr = Op1Op1;
682 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
683 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
684 IfClr = Op1Op0;
685
686 IsConstantMask = true;
687 }
688
689 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
690 // thing again using this mask.
691 // IfClr will be set if we find a valid match.
692 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
693 Cond = Op0Op1;
694 IfSet = Op0Op0;
695
696 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
697 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
698 IfClr = Op1Op1;
699 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
700 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
701 IfClr = Op1Op0;
702
703 IsConstantMask = true;
704 }
705
706 // If IfClr is not yet set, try looking for a non-constant match.
707 // IfClr will be set if we find a valid match amongst the eight
708 // possibilities.
709 if (!IfClr.getNode()) {
710 if (isBitwiseInverse(Op0Op0, Op1Op0)) {
711 Cond = Op1Op0;
712 IfSet = Op1Op1;
713 IfClr = Op0Op1;
714 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
715 Cond = Op1Op0;
716 IfSet = Op1Op1;
717 IfClr = Op0Op0;
718 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
719 Cond = Op1Op1;
720 IfSet = Op1Op0;
721 IfClr = Op0Op1;
722 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
723 Cond = Op1Op1;
724 IfSet = Op1Op0;
725 IfClr = Op0Op0;
726 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
727 Cond = Op0Op0;
728 IfSet = Op0Op1;
729 IfClr = Op1Op1;
730 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
731 Cond = Op0Op0;
732 IfSet = Op0Op1;
733 IfClr = Op1Op0;
734 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
735 Cond = Op0Op1;
736 IfSet = Op0Op0;
737 IfClr = Op1Op1;
738 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
739 Cond = Op0Op1;
740 IfSet = Op0Op0;
741 IfClr = Op1Op0;
742 }
743 }
744
745 // At this point, IfClr will be set if we have a valid match.
746 if (!IfClr.getNode())
747 return SDValue();
748
749 assert(Cond.getNode() && IfSet.getNode());
750
751 // Fold degenerate cases.
752 if (IsConstantMask) {
753 if (Mask.isAllOnes())
754 return IfSet;
755 else if (Mask == 0)
756 return IfClr;
757 }
758
759 // Transform the DAG into an equivalent VSELECT.
760 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
761 }
762
763 return SDValue();
764}
765
767 SelectionDAG &DAG,
768 const MipsSubtarget &Subtarget) {
769 // Estimate the number of operations the below transform will turn a
770 // constant multiply into. The number is approximately equal to the minimal
771 // number of powers of two that constant can be broken down to by adding
772 // or subtracting them.
773 //
774 // If we have taken more than 12[1] / 8[2] steps to attempt the
775 // optimization for a native sized value, it is more than likely that this
776 // optimization will make things worse.
777 //
778 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
779 // multiplication requires at least 4 cycles, but another cycle (or two)
780 // to retrieve the result from the HI/LO registers.
781 //
782 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
783 // materialized in 2 instructions, multiplication requires at least 4
784 // cycles, but another cycle (or two) to retrieve the result from the
785 // HI/LO registers.
786 //
787 // TODO:
788 // - MaxSteps needs to consider the `VT` of the constant for the current
789 // target.
790 // - Consider to perform this optimization after type legalization.
791 // That allows to remove a workaround for types not supported natively.
792 // - Take in account `-Os, -Oz` flags because this optimization
793 // increases code size.
794 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
795
796 SmallVector<APInt, 16> WorkStack(1, C);
797 unsigned Steps = 0;
798 unsigned BitWidth = C.getBitWidth();
799
800 while (!WorkStack.empty()) {
801 APInt Val = WorkStack.pop_back_val();
802
803 if (Val == 0 || Val == 1)
804 continue;
805
806 if (Steps >= MaxSteps)
807 return false;
808
809 if (Val.isPowerOf2()) {
810 ++Steps;
811 continue;
812 }
813
814 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
815 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
816 : APInt(BitWidth, 1) << C.ceilLogBase2();
817 if ((Val - Floor).ule(Ceil - Val)) {
818 WorkStack.push_back(Floor);
819 WorkStack.push_back(Val - Floor);
820 } else {
821 WorkStack.push_back(Ceil);
822 WorkStack.push_back(Ceil - Val);
823 }
824
825 ++Steps;
826 }
827
828 // If the value being multiplied is not supported natively, we have to pay
829 // an additional legalization cost, conservatively assume an increase in the
830 // cost of 3 instructions per step. This values for this heuristic were
831 // determined experimentally.
832 unsigned RegisterSize = DAG.getTargetLoweringInfo()
833 .getRegisterType(*DAG.getContext(), VT)
834 .getSizeInBits();
835 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
836 if (Steps > 27)
837 return false;
838
839 return true;
840}
841
843 EVT ShiftTy, SelectionDAG &DAG) {
844 // Return 0.
845 if (C == 0)
846 return DAG.getConstant(0, DL, VT);
847
848 // Return x.
849 if (C == 1)
850 return X;
851
852 // If c is power of 2, return (shl x, log2(c)).
853 if (C.isPowerOf2())
854 return DAG.getNode(ISD::SHL, DL, VT, X,
855 DAG.getConstant(C.logBase2(), DL, ShiftTy));
856
857 unsigned BitWidth = C.getBitWidth();
858 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
859 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
860 APInt(BitWidth, 1) << C.ceilLogBase2();
861
862 // If |c - floor_c| <= |c - ceil_c|,
863 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
864 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
865 if ((C - Floor).ule(Ceil - C)) {
866 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
867 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
868 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
869 }
870
871 // If |c - floor_c| > |c - ceil_c|,
872 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
873 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
874 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
875 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
876}
877
880 const MipsSETargetLowering *TL,
881 const MipsSubtarget &Subtarget) {
882 EVT VT = N->getValueType(0);
883
884 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
886 C->getAPIntValue(), VT, DAG, Subtarget))
887 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
889 DAG);
890
891 return SDValue(N, 0);
892}
893
895 SelectionDAG &DAG,
896 const MipsSubtarget &Subtarget) {
897 // See if this is a vector splat immediate node.
898 APInt SplatValue, SplatUndef;
899 unsigned SplatBitSize;
900 bool HasAnyUndefs;
901 unsigned EltSize = Ty.getScalarSizeInBits();
902 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
903
904 if (!Subtarget.hasDSP())
905 return SDValue();
906
907 if (!BV ||
908 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
909 EltSize, !Subtarget.isLittle()) ||
910 (SplatBitSize != EltSize) ||
911 (SplatValue.getZExtValue() >= EltSize))
912 return SDValue();
913
914 SDLoc DL(N);
915 return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
916 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
917}
918
921 const MipsSubtarget &Subtarget) {
922 EVT Ty = N->getValueType(0);
923
924 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
925 return SDValue();
926
927 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
928}
929
930// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
931// constant splats into MipsISD::SHRA_DSP for DSPr2.
932//
933// Performs the following transformations:
934// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
935// sign/zero-extension is completely overwritten by the new one performed by
936// the ISD::SRA and ISD::SHL nodes.
937// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
938// sequence.
939//
940// See performDSPShiftCombine for more information about the transformation
941// used for DSPr2.
944 const MipsSubtarget &Subtarget) {
945 EVT Ty = N->getValueType(0);
946
947 if (Subtarget.hasMSA()) {
948 SDValue Op0 = N->getOperand(0);
949 SDValue Op1 = N->getOperand(1);
950
951 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
952 // where $d + sizeof($c) == 32
953 // or $d + sizeof($c) <= 32 and SExt
954 // -> (MipsVExtractSExt $a, $b, $c)
955 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
956 SDValue Op0Op0 = Op0->getOperand(0);
958
959 if (!ShAmount)
960 return SDValue();
961
962 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
964 return SDValue();
965
966 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
967 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
968
969 if (TotalBits == 32 ||
971 TotalBits <= 32)) {
972 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
973 Op0Op0->getOperand(2) };
974 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
975 Op0Op0->getVTList(),
976 ArrayRef(Ops, Op0Op0->getNumOperands()));
977 }
978 }
979 }
980
981 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
982 return SDValue();
983
984 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
985}
986
987
990 const MipsSubtarget &Subtarget) {
991 EVT Ty = N->getValueType(0);
992
993 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
994 return SDValue();
995
996 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
997}
998
1000 bool IsV216 = (Ty == MVT::v2i16);
1001
1002 switch (CC) {
1003 case ISD::SETEQ:
1004 case ISD::SETNE: return true;
1005 case ISD::SETLT:
1006 case ISD::SETLE:
1007 case ISD::SETGT:
1008 case ISD::SETGE: return IsV216;
1009 case ISD::SETULT:
1010 case ISD::SETULE:
1011 case ISD::SETUGT:
1012 case ISD::SETUGE: return !IsV216;
1013 default: return false;
1014 }
1015}
1016
1018 EVT Ty = N->getValueType(0);
1019
1020 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1021 return SDValue();
1022
1023 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
1024 return SDValue();
1025
1026 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
1027 N->getOperand(1), N->getOperand(2));
1028}
1029
1031 EVT Ty = N->getValueType(0);
1032
1033 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
1034 SDValue SetCC = N->getOperand(0);
1035
1036 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
1037 return SDValue();
1038
1039 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
1040 SetCC.getOperand(0), SetCC.getOperand(1),
1041 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
1042 }
1043
1044 return SDValue();
1045}
1046
1048 const MipsSubtarget &Subtarget) {
1049 EVT Ty = N->getValueType(0);
1050
1051 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
1052 // Try the following combines:
1053 // (xor (or $a, $b), (build_vector allones))
1054 // (xor (or $a, $b), (bitcast (build_vector allones)))
1055 SDValue Op0 = N->getOperand(0);
1056 SDValue Op1 = N->getOperand(1);
1057 SDValue NotOp;
1058
1060 NotOp = Op1;
1061 else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
1062 NotOp = Op0;
1063 else
1064 return SDValue();
1065
1066 if (NotOp->getOpcode() == ISD::OR)
1067 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
1068 NotOp->getOperand(1));
1069 }
1070
1071 return SDValue();
1072}
1073
1074SDValue
1076 SelectionDAG &DAG = DCI.DAG;
1077 SDValue Val;
1078
1079 switch (N->getOpcode()) {
1080 case ISD::AND:
1081 Val = performANDCombine(N, DAG, DCI, Subtarget);
1082 break;
1083 case ISD::OR:
1084 Val = performORCombine(N, DAG, DCI, Subtarget);
1085 break;
1086 case ISD::MUL:
1087 return performMULCombine(N, DAG, DCI, this, Subtarget);
1088 case ISD::SHL:
1089 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1090 break;
1091 case ISD::SRA:
1092 return performSRACombine(N, DAG, DCI, Subtarget);
1093 case ISD::SRL:
1094 return performSRLCombine(N, DAG, DCI, Subtarget);
1095 case ISD::VSELECT:
1096 return performVSELECTCombine(N, DAG);
1097 case ISD::XOR:
1098 Val = performXORCombine(N, DAG, Subtarget);
1099 break;
1100 case ISD::SETCC:
1101 Val = performSETCCCombine(N, DAG);
1102 break;
1103 }
1104
1105 if (Val.getNode()) {
1106 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1107 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1108 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1109 return Val;
1110 }
1111
1113}
1114
1117 MachineBasicBlock *BB) const {
1118 switch (MI.getOpcode()) {
1119 default:
1121 case Mips::BPOSGE32_PSEUDO:
1122 return emitBPOSGE32(MI, BB);
1123 case Mips::SNZ_B_PSEUDO:
1124 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1125 case Mips::SNZ_H_PSEUDO:
1126 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1127 case Mips::SNZ_W_PSEUDO:
1128 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1129 case Mips::SNZ_D_PSEUDO:
1130 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1131 case Mips::SNZ_V_PSEUDO:
1132 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1133 case Mips::SZ_B_PSEUDO:
1134 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1135 case Mips::SZ_H_PSEUDO:
1136 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1137 case Mips::SZ_W_PSEUDO:
1138 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1139 case Mips::SZ_D_PSEUDO:
1140 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1141 case Mips::SZ_V_PSEUDO:
1142 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1143 case Mips::COPY_FW_PSEUDO:
1144 return emitCOPY_FW(MI, BB);
1145 case Mips::COPY_FD_PSEUDO:
1146 return emitCOPY_FD(MI, BB);
1147 case Mips::INSERT_FW_PSEUDO:
1148 return emitINSERT_FW(MI, BB);
1149 case Mips::INSERT_FD_PSEUDO:
1150 return emitINSERT_FD(MI, BB);
1151 case Mips::INSERT_B_VIDX_PSEUDO:
1152 case Mips::INSERT_B_VIDX64_PSEUDO:
1153 return emitINSERT_DF_VIDX(MI, BB, 1, false);
1154 case Mips::INSERT_H_VIDX_PSEUDO:
1155 case Mips::INSERT_H_VIDX64_PSEUDO:
1156 return emitINSERT_DF_VIDX(MI, BB, 2, false);
1157 case Mips::INSERT_W_VIDX_PSEUDO:
1158 case Mips::INSERT_W_VIDX64_PSEUDO:
1159 return emitINSERT_DF_VIDX(MI, BB, 4, false);
1160 case Mips::INSERT_D_VIDX_PSEUDO:
1161 case Mips::INSERT_D_VIDX64_PSEUDO:
1162 return emitINSERT_DF_VIDX(MI, BB, 8, false);
1163 case Mips::INSERT_FW_VIDX_PSEUDO:
1164 case Mips::INSERT_FW_VIDX64_PSEUDO:
1165 return emitINSERT_DF_VIDX(MI, BB, 4, true);
1166 case Mips::INSERT_FD_VIDX_PSEUDO:
1167 case Mips::INSERT_FD_VIDX64_PSEUDO:
1168 return emitINSERT_DF_VIDX(MI, BB, 8, true);
1169 case Mips::FILL_FW_PSEUDO:
1170 return emitFILL_FW(MI, BB);
1171 case Mips::FILL_FD_PSEUDO:
1172 return emitFILL_FD(MI, BB);
1173 case Mips::FEXP2_W_1_PSEUDO:
1174 return emitFEXP2_W_1(MI, BB);
1175 case Mips::FEXP2_D_1_PSEUDO:
1176 return emitFEXP2_D_1(MI, BB);
1177 case Mips::ST_F16:
1178 return emitST_F16_PSEUDO(MI, BB);
1179 case Mips::LD_F16:
1180 return emitLD_F16_PSEUDO(MI, BB);
1181 case Mips::MSA_FP_EXTEND_W_PSEUDO:
1182 return emitFPEXTEND_PSEUDO(MI, BB, false);
1183 case Mips::MSA_FP_ROUND_W_PSEUDO:
1184 return emitFPROUND_PSEUDO(MI, BB, false);
1185 case Mips::MSA_FP_EXTEND_D_PSEUDO:
1186 return emitFPEXTEND_PSEUDO(MI, BB, true);
1187 case Mips::MSA_FP_ROUND_D_PSEUDO:
1188 return emitFPROUND_PSEUDO(MI, BB, true);
1189 }
1190}
1191
1192bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1193 const CCState &CCInfo, unsigned NextStackOffset,
1194 const MipsFunctionInfo &FI) const {
1195 if (!UseMipsTailCalls)
1196 return false;
1197
1198 // Exception has to be cleared with eret.
1199 if (FI.isISR())
1200 return false;
1201
1202 // Return false if either the callee or caller has a byval argument.
1203 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1204 return false;
1205
1206 // Return true if the callee's argument area is no larger than the
1207 // caller's.
1208 return NextStackOffset <= FI.getIncomingArgSize();
1209}
1210
1211void MipsSETargetLowering::
1212getOpndList(SmallVectorImpl<SDValue> &Ops,
1213 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1214 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1215 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1216 SDValue Chain) const {
1217 Ops.push_back(Callee);
1218 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1219 InternalLinkage, IsCallReloc, CLI, Callee,
1220 Chain);
1221}
1222
1223SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1224 LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1225
1226 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1227 return MipsTargetLowering::lowerLOAD(Op, DAG);
1228
1229 // Replace a double precision load with two i32 loads and a buildpair64.
1230 SDLoc DL(Op);
1231 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1232 EVT PtrVT = Ptr.getValueType();
1233
1234 // i32 load from lower address.
1235 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
1236 Nd.getAlign(), Nd.getMemOperand()->getFlags());
1237
1238 // i32 load from higher address.
1239 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1240 SDValue Hi = DAG.getLoad(
1241 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
1243
1244 if (!Subtarget.isLittle())
1245 std::swap(Lo, Hi);
1246
1247 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1248 SDValue Ops[2] = {BP, Hi.getValue(1)};
1249 return DAG.getMergeValues(Ops, DL);
1250}
1251
1252SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1253 StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1254
1255 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1257
1258 // Replace a double precision store with two extractelement64s and i32 stores.
1259 SDLoc DL(Op);
1260 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1261 EVT PtrVT = Ptr.getValueType();
1263 Val, DAG.getConstant(0, DL, MVT::i32));
1265 Val, DAG.getConstant(1, DL, MVT::i32));
1266
1267 if (!Subtarget.isLittle())
1268 std::swap(Lo, Hi);
1269
1270 // i32 store to lower address.
1271 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(),
1272 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1273
1274 // i32 store to higher address.
1275 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1276 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1277 commonAlignment(Nd.getAlign(), 4),
1278 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1279}
1280
1281SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1282 SelectionDAG &DAG) const {
1283 SDLoc DL(Op);
1284 MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
1285 MVT Dest = Op.getValueType().getSimpleVT();
1286
1287 // Bitcast i64 to double.
1288 if (Src == MVT::i64 && Dest == MVT::f64) {
1289 SDValue Lo, Hi;
1290 std::tie(Lo, Hi) =
1291 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32);
1292 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1293 }
1294
1295 // Bitcast double to i64.
1296 if (Src == MVT::f64 && Dest == MVT::i64) {
1297 // Skip lower bitcast when operand0 has converted float results to integer
1298 // which was done by function SoftenFloatResult.
1299 if (getTypeAction(*DAG.getContext(), Op.getOperand(0).getValueType()) ==
1301 return SDValue();
1302 SDValue Lo =
1303 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1304 DAG.getConstant(0, DL, MVT::i32));
1305 SDValue Hi =
1306 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1307 DAG.getConstant(1, DL, MVT::i32));
1308 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1309 }
1310
1311 // Skip other cases of bitcast and use default lowering.
1312 return SDValue();
1313}
1314
1315SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1316 bool HasLo, bool HasHi,
1317 SelectionDAG &DAG) const {
1318 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1319 assert(!Subtarget.hasMips32r6());
1320
1321 EVT Ty = Op.getOperand(0).getValueType();
1322 SDLoc DL(Op);
1323 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1324 Op.getOperand(0), Op.getOperand(1));
1325 SDValue Lo, Hi;
1326
1327 if (HasLo)
1328 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1329 if (HasHi)
1330 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1331
1332 if (!HasLo || !HasHi)
1333 return HasLo ? Lo : Hi;
1334
1335 SDValue Vals[] = { Lo, Hi };
1336 return DAG.getMergeValues(Vals, DL);
1337}
1338
1340 SDValue InLo, InHi;
1341 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32);
1342 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1343}
1344
1346 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1347 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1348 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1349}
1350
1351// This function expands mips intrinsic nodes which have 64-bit input operands
1352// or output values.
1353//
1354// out64 = intrinsic-node in64
1355// =>
1356// lo = copy (extract-element (in64, 0))
1357// hi = copy (extract-element (in64, 1))
1358// mips-specific-node
1359// v0 = copy lo
1360// v1 = copy hi
1361// out64 = merge-values (v0, v1)
1362//
1364 SDLoc DL(Op);
1365 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1367 unsigned OpNo = 0;
1368
1369 // See if Op has a chain input.
1370 if (HasChainIn)
1371 Ops.push_back(Op->getOperand(OpNo++));
1372
1373 // The next operand is the intrinsic opcode.
1374 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1375
1376 // See if the next operand has type i64.
1377 SDValue Opnd = Op->getOperand(++OpNo), In64;
1378
1379 if (Opnd.getValueType() == MVT::i64)
1380 In64 = initAccumulator(Opnd, DL, DAG);
1381 else
1382 Ops.push_back(Opnd);
1383
1384 // Push the remaining operands.
1385 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1386 Ops.push_back(Op->getOperand(OpNo));
1387
1388 // Add In64 to the end of the list.
1389 if (In64.getNode())
1390 Ops.push_back(In64);
1391
1392 // Scan output.
1393 SmallVector<EVT, 2> ResTys;
1394
1395 for (EVT Ty : Op->values())
1396 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty);
1397
1398 // Create node.
1399 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
1400 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
1401
1402 if (!HasChainIn)
1403 return Out;
1404
1405 assert(Val->getValueType(1) == MVT::Other);
1406 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1407 return DAG.getMergeValues(Vals, DL);
1408}
1409
1410// Lower an MSA copy intrinsic into the specified SelectionDAG node
1412 SDLoc DL(Op);
1413 SDValue Vec = Op->getOperand(1);
1414 SDValue Idx = Op->getOperand(2);
1415 EVT ResTy = Op->getValueType(0);
1416 EVT EltTy = Vec->getValueType(0).getVectorElementType();
1417
1418 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1419 DAG.getValueType(EltTy));
1420
1421 return Result;
1422}
1423
1424static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1425 EVT ResVecTy = Op->getValueType(0);
1426 EVT ViaVecTy = ResVecTy;
1427 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1428 SDLoc DL(Op);
1429
1430 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1431 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1432 // lanes.
1433 SDValue LaneA = Op->getOperand(OpNr);
1434 SDValue LaneB;
1435
1436 if (ResVecTy == MVT::v2i64) {
1437 // In case of the index being passed as an immediate value, set the upper
1438 // lane to 0 so that the splati.d instruction can be matched.
1439 if (isa<ConstantSDNode>(LaneA))
1440 LaneB = DAG.getConstant(0, DL, MVT::i32);
1441 // Having the index passed in a register, set the upper lane to the same
1442 // value as the lower - this results in the BUILD_VECTOR node not being
1443 // expanded through stack. This way we are able to pattern match the set of
1444 // nodes created here to splat.d.
1445 else
1446 LaneB = LaneA;
1447 ViaVecTy = MVT::v4i32;
1448 if(BigEndian)
1449 std::swap(LaneA, LaneB);
1450 } else
1451 LaneB = LaneA;
1452
1453 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1454 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1455
1456 SDValue Result = DAG.getBuildVector(
1457 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1458
1459 if (ViaVecTy != ResVecTy) {
1460 SDValue One = DAG.getConstant(1, DL, ViaVecTy);
1461 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
1462 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
1463 }
1464
1465 return Result;
1466}
1467
1468static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1469 bool IsSigned = false) {
1470 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1471 return DAG.getConstant(
1472 APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
1473 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1474 SDLoc(Op), Op->getValueType(0));
1475}
1476
1477static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1478 bool BigEndian, SelectionDAG &DAG) {
1479 EVT ViaVecTy = VecTy;
1480 SDValue SplatValueA = SplatValue;
1481 SDValue SplatValueB = SplatValue;
1482 SDLoc DL(SplatValue);
1483
1484 if (VecTy == MVT::v2i64) {
1485 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1486 ViaVecTy = MVT::v4i32;
1487
1488 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1489 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1490 DAG.getConstant(32, DL, MVT::i32));
1491 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1492 }
1493
1494 // We currently hold the parts in little endian order. Swap them if
1495 // necessary.
1496 if (BigEndian)
1497 std::swap(SplatValueA, SplatValueB);
1498
1499 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1500 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1501 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1502 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1503
1504 SDValue Result = DAG.getBuildVector(
1505 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1506
1507 if (VecTy != ViaVecTy)
1508 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1509
1510 return Result;
1511}
1512
1514 unsigned Opc, SDValue Imm,
1515 bool BigEndian) {
1516 EVT VecTy = Op->getValueType(0);
1517 SDValue Exp2Imm;
1518 SDLoc DL(Op);
1519
1520 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1521 // here for now.
1522 if (VecTy == MVT::v2i64) {
1523 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1524 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1525
1526 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
1527 MVT::i32);
1528 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
1529
1530 if (BigEndian)
1531 std::swap(BitImmLoOp, BitImmHiOp);
1532
1533 Exp2Imm = DAG.getNode(
1534 ISD::BITCAST, DL, MVT::v2i64,
1535 DAG.getBuildVector(MVT::v4i32, DL,
1536 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1537 }
1538 }
1539
1540 if (!Exp2Imm.getNode()) {
1541 // We couldnt constant fold, do a vector shift instead
1542
1543 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1544 // only values 0-63 are valid.
1545 if (VecTy == MVT::v2i64)
1546 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1547
1548 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1549
1550 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
1551 Exp2Imm);
1552 }
1553
1554 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1555}
1556
1558 SDLoc DL(Op);
1559 EVT ResTy = Op->getValueType(0);
1560 SDValue Vec = Op->getOperand(2);
1561 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1562 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1563 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
1564 DL, ResEltTy);
1565 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
1566
1567 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
1568}
1569
1571 EVT ResTy = Op->getValueType(0);
1572 SDLoc DL(Op);
1573 SDValue One = DAG.getConstant(1, DL, ResTy);
1574 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
1575
1576 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1577 DAG.getNOT(DL, Bit, ResTy));
1578}
1579
1581 SDLoc DL(Op);
1582 EVT ResTy = Op->getValueType(0);
1583 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1584 << Op->getConstantOperandAPInt(2);
1585 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
1586
1587 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1588}
1589
1590SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1591 SelectionDAG &DAG) const {
1592 SDLoc DL(Op);
1593 unsigned Intrinsic = Op->getConstantOperandVal(0);
1594 switch (Intrinsic) {
1595 default:
1596 return SDValue();
1597 case Intrinsic::mips_shilo:
1598 return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1599 case Intrinsic::mips_dpau_h_qbl:
1600 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1601 case Intrinsic::mips_dpau_h_qbr:
1602 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1603 case Intrinsic::mips_dpsu_h_qbl:
1604 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1605 case Intrinsic::mips_dpsu_h_qbr:
1606 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1607 case Intrinsic::mips_dpa_w_ph:
1608 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1609 case Intrinsic::mips_dps_w_ph:
1610 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1611 case Intrinsic::mips_dpax_w_ph:
1612 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1613 case Intrinsic::mips_dpsx_w_ph:
1614 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1615 case Intrinsic::mips_mulsa_w_ph:
1616 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1617 case Intrinsic::mips_mult:
1618 return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1619 case Intrinsic::mips_multu:
1620 return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1621 case Intrinsic::mips_madd:
1622 return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1623 case Intrinsic::mips_maddu:
1624 return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1625 case Intrinsic::mips_msub:
1626 return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1627 case Intrinsic::mips_msubu:
1628 return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1629 case Intrinsic::mips_addv_b:
1630 case Intrinsic::mips_addv_h:
1631 case Intrinsic::mips_addv_w:
1632 case Intrinsic::mips_addv_d:
1633 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1634 Op->getOperand(2));
1635 case Intrinsic::mips_addvi_b:
1636 case Intrinsic::mips_addvi_h:
1637 case Intrinsic::mips_addvi_w:
1638 case Intrinsic::mips_addvi_d:
1639 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1640 lowerMSASplatImm(Op, 2, DAG));
1641 case Intrinsic::mips_and_v:
1642 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1643 Op->getOperand(2));
1644 case Intrinsic::mips_andi_b:
1645 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1646 lowerMSASplatImm(Op, 2, DAG));
1647 case Intrinsic::mips_bclr_b:
1648 case Intrinsic::mips_bclr_h:
1649 case Intrinsic::mips_bclr_w:
1650 case Intrinsic::mips_bclr_d:
1651 return lowerMSABitClear(Op, DAG);
1652 case Intrinsic::mips_bclri_b:
1653 case Intrinsic::mips_bclri_h:
1654 case Intrinsic::mips_bclri_w:
1655 case Intrinsic::mips_bclri_d:
1656 return lowerMSABitClearImm(Op, DAG);
1657 case Intrinsic::mips_binsli_b:
1658 case Intrinsic::mips_binsli_h:
1659 case Intrinsic::mips_binsli_w:
1660 case Intrinsic::mips_binsli_d: {
1661 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1662 EVT VecTy = Op->getValueType(0);
1663 EVT EltTy = VecTy.getVectorElementType();
1664 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1665 report_fatal_error("Immediate out of range");
1667 Op->getConstantOperandVal(3) + 1);
1668 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1669 DAG.getConstant(Mask, DL, VecTy, true),
1670 Op->getOperand(2), Op->getOperand(1));
1671 }
1672 case Intrinsic::mips_binsri_b:
1673 case Intrinsic::mips_binsri_h:
1674 case Intrinsic::mips_binsri_w:
1675 case Intrinsic::mips_binsri_d: {
1676 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1677 EVT VecTy = Op->getValueType(0);
1678 EVT EltTy = VecTy.getVectorElementType();
1679 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1680 report_fatal_error("Immediate out of range");
1681 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
1682 Op->getConstantOperandVal(3) + 1);
1683 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1684 DAG.getConstant(Mask, DL, VecTy, true),
1685 Op->getOperand(2), Op->getOperand(1));
1686 }
1687 case Intrinsic::mips_bmnz_v:
1688 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1689 Op->getOperand(2), Op->getOperand(1));
1690 case Intrinsic::mips_bmnzi_b:
1691 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1692 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1693 Op->getOperand(1));
1694 case Intrinsic::mips_bmz_v:
1695 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1696 Op->getOperand(1), Op->getOperand(2));
1697 case Intrinsic::mips_bmzi_b:
1698 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1699 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1700 Op->getOperand(2));
1701 case Intrinsic::mips_bneg_b:
1702 case Intrinsic::mips_bneg_h:
1703 case Intrinsic::mips_bneg_w:
1704 case Intrinsic::mips_bneg_d: {
1705 EVT VecTy = Op->getValueType(0);
1706 SDValue One = DAG.getConstant(1, DL, VecTy);
1707
1708 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1709 DAG.getNode(ISD::SHL, DL, VecTy, One,
1710 truncateVecElts(Op, DAG)));
1711 }
1712 case Intrinsic::mips_bnegi_b:
1713 case Intrinsic::mips_bnegi_h:
1714 case Intrinsic::mips_bnegi_w:
1715 case Intrinsic::mips_bnegi_d:
1716 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1717 !Subtarget.isLittle());
1718 case Intrinsic::mips_bnz_b:
1719 case Intrinsic::mips_bnz_h:
1720 case Intrinsic::mips_bnz_w:
1721 case Intrinsic::mips_bnz_d:
1722 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1723 Op->getOperand(1));
1724 case Intrinsic::mips_bnz_v:
1725 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1726 Op->getOperand(1));
1727 case Intrinsic::mips_bsel_v:
1728 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1729 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1730 Op->getOperand(1), Op->getOperand(3),
1731 Op->getOperand(2));
1732 case Intrinsic::mips_bseli_b:
1733 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1734 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1735 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1736 Op->getOperand(2));
1737 case Intrinsic::mips_bset_b:
1738 case Intrinsic::mips_bset_h:
1739 case Intrinsic::mips_bset_w:
1740 case Intrinsic::mips_bset_d: {
1741 EVT VecTy = Op->getValueType(0);
1742 SDValue One = DAG.getConstant(1, DL, VecTy);
1743
1744 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1745 DAG.getNode(ISD::SHL, DL, VecTy, One,
1746 truncateVecElts(Op, DAG)));
1747 }
1748 case Intrinsic::mips_bseti_b:
1749 case Intrinsic::mips_bseti_h:
1750 case Intrinsic::mips_bseti_w:
1751 case Intrinsic::mips_bseti_d:
1752 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1753 !Subtarget.isLittle());
1754 case Intrinsic::mips_bz_b:
1755 case Intrinsic::mips_bz_h:
1756 case Intrinsic::mips_bz_w:
1757 case Intrinsic::mips_bz_d:
1758 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1759 Op->getOperand(1));
1760 case Intrinsic::mips_bz_v:
1761 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1762 Op->getOperand(1));
1763 case Intrinsic::mips_ceq_b:
1764 case Intrinsic::mips_ceq_h:
1765 case Intrinsic::mips_ceq_w:
1766 case Intrinsic::mips_ceq_d:
1767 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1768 Op->getOperand(2), ISD::SETEQ);
1769 case Intrinsic::mips_ceqi_b:
1770 case Intrinsic::mips_ceqi_h:
1771 case Intrinsic::mips_ceqi_w:
1772 case Intrinsic::mips_ceqi_d:
1773 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1774 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
1775 case Intrinsic::mips_cle_s_b:
1776 case Intrinsic::mips_cle_s_h:
1777 case Intrinsic::mips_cle_s_w:
1778 case Intrinsic::mips_cle_s_d:
1779 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1780 Op->getOperand(2), ISD::SETLE);
1781 case Intrinsic::mips_clei_s_b:
1782 case Intrinsic::mips_clei_s_h:
1783 case Intrinsic::mips_clei_s_w:
1784 case Intrinsic::mips_clei_s_d:
1785 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1786 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
1787 case Intrinsic::mips_cle_u_b:
1788 case Intrinsic::mips_cle_u_h:
1789 case Intrinsic::mips_cle_u_w:
1790 case Intrinsic::mips_cle_u_d:
1791 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1792 Op->getOperand(2), ISD::SETULE);
1793 case Intrinsic::mips_clei_u_b:
1794 case Intrinsic::mips_clei_u_h:
1795 case Intrinsic::mips_clei_u_w:
1796 case Intrinsic::mips_clei_u_d:
1797 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1798 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1799 case Intrinsic::mips_clt_s_b:
1800 case Intrinsic::mips_clt_s_h:
1801 case Intrinsic::mips_clt_s_w:
1802 case Intrinsic::mips_clt_s_d:
1803 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1804 Op->getOperand(2), ISD::SETLT);
1805 case Intrinsic::mips_clti_s_b:
1806 case Intrinsic::mips_clti_s_h:
1807 case Intrinsic::mips_clti_s_w:
1808 case Intrinsic::mips_clti_s_d:
1809 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1810 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
1811 case Intrinsic::mips_clt_u_b:
1812 case Intrinsic::mips_clt_u_h:
1813 case Intrinsic::mips_clt_u_w:
1814 case Intrinsic::mips_clt_u_d:
1815 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1816 Op->getOperand(2), ISD::SETULT);
1817 case Intrinsic::mips_clti_u_b:
1818 case Intrinsic::mips_clti_u_h:
1819 case Intrinsic::mips_clti_u_w:
1820 case Intrinsic::mips_clti_u_d:
1821 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1822 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1823 case Intrinsic::mips_copy_s_b:
1824 case Intrinsic::mips_copy_s_h:
1825 case Intrinsic::mips_copy_s_w:
1827 case Intrinsic::mips_copy_s_d:
1828 if (Subtarget.hasMips64())
1829 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1831 else {
1832 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1833 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1834 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1835 Op->getValueType(0), Op->getOperand(1),
1836 Op->getOperand(2));
1837 }
1838 case Intrinsic::mips_copy_u_b:
1839 case Intrinsic::mips_copy_u_h:
1840 case Intrinsic::mips_copy_u_w:
1842 case Intrinsic::mips_copy_u_d:
1843 if (Subtarget.hasMips64())
1844 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1846 else {
1847 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1848 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1849 // Note: When i64 is illegal, this results in copy_s.w instructions
1850 // instead of copy_u.w instructions. This makes no difference to the
1851 // behaviour since i64 is only illegal when the register file is 32-bit.
1852 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1853 Op->getValueType(0), Op->getOperand(1),
1854 Op->getOperand(2));
1855 }
1856 case Intrinsic::mips_div_s_b:
1857 case Intrinsic::mips_div_s_h:
1858 case Intrinsic::mips_div_s_w:
1859 case Intrinsic::mips_div_s_d:
1860 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1861 Op->getOperand(2));
1862 case Intrinsic::mips_div_u_b:
1863 case Intrinsic::mips_div_u_h:
1864 case Intrinsic::mips_div_u_w:
1865 case Intrinsic::mips_div_u_d:
1866 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
1867 Op->getOperand(2));
1868 case Intrinsic::mips_fadd_w:
1869 case Intrinsic::mips_fadd_d:
1870 // TODO: If intrinsics have fast-math-flags, propagate them.
1871 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
1872 Op->getOperand(2));
1873 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1874 case Intrinsic::mips_fceq_w:
1875 case Intrinsic::mips_fceq_d:
1876 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1877 Op->getOperand(2), ISD::SETOEQ);
1878 case Intrinsic::mips_fcle_w:
1879 case Intrinsic::mips_fcle_d:
1880 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1881 Op->getOperand(2), ISD::SETOLE);
1882 case Intrinsic::mips_fclt_w:
1883 case Intrinsic::mips_fclt_d:
1884 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1885 Op->getOperand(2), ISD::SETOLT);
1886 case Intrinsic::mips_fcne_w:
1887 case Intrinsic::mips_fcne_d:
1888 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1889 Op->getOperand(2), ISD::SETONE);
1890 case Intrinsic::mips_fcor_w:
1891 case Intrinsic::mips_fcor_d:
1892 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1893 Op->getOperand(2), ISD::SETO);
1894 case Intrinsic::mips_fcueq_w:
1895 case Intrinsic::mips_fcueq_d:
1896 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1897 Op->getOperand(2), ISD::SETUEQ);
1898 case Intrinsic::mips_fcule_w:
1899 case Intrinsic::mips_fcule_d:
1900 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1901 Op->getOperand(2), ISD::SETULE);
1902 case Intrinsic::mips_fcult_w:
1903 case Intrinsic::mips_fcult_d:
1904 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1905 Op->getOperand(2), ISD::SETULT);
1906 case Intrinsic::mips_fcun_w:
1907 case Intrinsic::mips_fcun_d:
1908 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1909 Op->getOperand(2), ISD::SETUO);
1910 case Intrinsic::mips_fcune_w:
1911 case Intrinsic::mips_fcune_d:
1912 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1913 Op->getOperand(2), ISD::SETUNE);
1914 case Intrinsic::mips_fdiv_w:
1915 case Intrinsic::mips_fdiv_d:
1916 // TODO: If intrinsics have fast-math-flags, propagate them.
1917 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
1918 Op->getOperand(2));
1919 case Intrinsic::mips_ffint_u_w:
1920 case Intrinsic::mips_ffint_u_d:
1921 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
1922 Op->getOperand(1));
1923 case Intrinsic::mips_ffint_s_w:
1924 case Intrinsic::mips_ffint_s_d:
1925 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
1926 Op->getOperand(1));
1927 case Intrinsic::mips_fill_b:
1928 case Intrinsic::mips_fill_h:
1929 case Intrinsic::mips_fill_w:
1930 case Intrinsic::mips_fill_d: {
1931 EVT ResTy = Op->getValueType(0);
1933 Op->getOperand(1));
1934
1935 // If ResTy is v2i64 then the type legalizer will break this node down into
1936 // an equivalent v4i32.
1937 return DAG.getBuildVector(ResTy, DL, Ops);
1938 }
1939 case Intrinsic::mips_fexp2_w:
1940 case Intrinsic::mips_fexp2_d: {
1941 // TODO: If intrinsics have fast-math-flags, propagate them.
1942 EVT ResTy = Op->getValueType(0);
1943 return DAG.getNode(
1944 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
1945 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
1946 }
1947 case Intrinsic::mips_flog2_w:
1948 case Intrinsic::mips_flog2_d:
1949 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
1950 case Intrinsic::mips_fmadd_w:
1951 case Intrinsic::mips_fmadd_d:
1952 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
1953 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1954 case Intrinsic::mips_fmul_w:
1955 case Intrinsic::mips_fmul_d:
1956 // TODO: If intrinsics have fast-math-flags, propagate them.
1957 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
1958 Op->getOperand(2));
1959 case Intrinsic::mips_fmsub_w:
1960 case Intrinsic::mips_fmsub_d: {
1961 // TODO: If intrinsics have fast-math-flags, propagate them.
1962 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
1963 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
1964 }
1965 case Intrinsic::mips_frint_w:
1966 case Intrinsic::mips_frint_d:
1967 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
1968 case Intrinsic::mips_fsqrt_w:
1969 case Intrinsic::mips_fsqrt_d:
1970 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
1971 case Intrinsic::mips_fsub_w:
1972 case Intrinsic::mips_fsub_d:
1973 // TODO: If intrinsics have fast-math-flags, propagate them.
1974 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
1975 Op->getOperand(2));
1976 case Intrinsic::mips_ftrunc_u_w:
1977 case Intrinsic::mips_ftrunc_u_d:
1978 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
1979 Op->getOperand(1));
1980 case Intrinsic::mips_ftrunc_s_w:
1981 case Intrinsic::mips_ftrunc_s_d:
1982 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
1983 Op->getOperand(1));
1984 case Intrinsic::mips_ilvev_b:
1985 case Intrinsic::mips_ilvev_h:
1986 case Intrinsic::mips_ilvev_w:
1987 case Intrinsic::mips_ilvev_d:
1988 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
1989 Op->getOperand(1), Op->getOperand(2));
1990 case Intrinsic::mips_ilvl_b:
1991 case Intrinsic::mips_ilvl_h:
1992 case Intrinsic::mips_ilvl_w:
1993 case Intrinsic::mips_ilvl_d:
1994 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
1995 Op->getOperand(1), Op->getOperand(2));
1996 case Intrinsic::mips_ilvod_b:
1997 case Intrinsic::mips_ilvod_h:
1998 case Intrinsic::mips_ilvod_w:
1999 case Intrinsic::mips_ilvod_d:
2000 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
2001 Op->getOperand(1), Op->getOperand(2));
2002 case Intrinsic::mips_ilvr_b:
2003 case Intrinsic::mips_ilvr_h:
2004 case Intrinsic::mips_ilvr_w:
2005 case Intrinsic::mips_ilvr_d:
2006 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
2007 Op->getOperand(1), Op->getOperand(2));
2008 case Intrinsic::mips_insert_b:
2009 case Intrinsic::mips_insert_h:
2010 case Intrinsic::mips_insert_w:
2011 case Intrinsic::mips_insert_d:
2012 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
2013 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
2014 case Intrinsic::mips_insve_b:
2015 case Intrinsic::mips_insve_h:
2016 case Intrinsic::mips_insve_w:
2017 case Intrinsic::mips_insve_d: {
2018 // Report an error for out of range values.
2019 int64_t Max;
2020 switch (Intrinsic) {
2021 case Intrinsic::mips_insve_b: Max = 15; break;
2022 case Intrinsic::mips_insve_h: Max = 7; break;
2023 case Intrinsic::mips_insve_w: Max = 3; break;
2024 case Intrinsic::mips_insve_d: Max = 1; break;
2025 default: llvm_unreachable("Unmatched intrinsic");
2026 }
2027 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2028 if (Value < 0 || Value > Max)
2029 report_fatal_error("Immediate out of range");
2030 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
2031 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
2032 DAG.getConstant(0, DL, MVT::i32));
2033 }
2034 case Intrinsic::mips_ldi_b:
2035 case Intrinsic::mips_ldi_h:
2036 case Intrinsic::mips_ldi_w:
2037 case Intrinsic::mips_ldi_d:
2038 return lowerMSASplatImm(Op, 1, DAG, true);
2039 case Intrinsic::mips_lsa:
2040 case Intrinsic::mips_dlsa: {
2041 EVT ResTy = Op->getValueType(0);
2042 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
2043 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
2044 Op->getOperand(2), Op->getOperand(3)));
2045 }
2046 case Intrinsic::mips_maddv_b:
2047 case Intrinsic::mips_maddv_h:
2048 case Intrinsic::mips_maddv_w:
2049 case Intrinsic::mips_maddv_d: {
2050 EVT ResTy = Op->getValueType(0);
2051 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
2052 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2053 Op->getOperand(2), Op->getOperand(3)));
2054 }
2055 case Intrinsic::mips_max_s_b:
2056 case Intrinsic::mips_max_s_h:
2057 case Intrinsic::mips_max_s_w:
2058 case Intrinsic::mips_max_s_d:
2059 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2060 Op->getOperand(1), Op->getOperand(2));
2061 case Intrinsic::mips_max_u_b:
2062 case Intrinsic::mips_max_u_h:
2063 case Intrinsic::mips_max_u_w:
2064 case Intrinsic::mips_max_u_d:
2065 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2066 Op->getOperand(1), Op->getOperand(2));
2067 case Intrinsic::mips_maxi_s_b:
2068 case Intrinsic::mips_maxi_s_h:
2069 case Intrinsic::mips_maxi_s_w:
2070 case Intrinsic::mips_maxi_s_d:
2071 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2072 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2073 case Intrinsic::mips_maxi_u_b:
2074 case Intrinsic::mips_maxi_u_h:
2075 case Intrinsic::mips_maxi_u_w:
2076 case Intrinsic::mips_maxi_u_d:
2077 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2078 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2079 case Intrinsic::mips_min_s_b:
2080 case Intrinsic::mips_min_s_h:
2081 case Intrinsic::mips_min_s_w:
2082 case Intrinsic::mips_min_s_d:
2083 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2084 Op->getOperand(1), Op->getOperand(2));
2085 case Intrinsic::mips_min_u_b:
2086 case Intrinsic::mips_min_u_h:
2087 case Intrinsic::mips_min_u_w:
2088 case Intrinsic::mips_min_u_d:
2089 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2090 Op->getOperand(1), Op->getOperand(2));
2091 case Intrinsic::mips_mini_s_b:
2092 case Intrinsic::mips_mini_s_h:
2093 case Intrinsic::mips_mini_s_w:
2094 case Intrinsic::mips_mini_s_d:
2095 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2096 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2097 case Intrinsic::mips_mini_u_b:
2098 case Intrinsic::mips_mini_u_h:
2099 case Intrinsic::mips_mini_u_w:
2100 case Intrinsic::mips_mini_u_d:
2101 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2102 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2103 case Intrinsic::mips_mod_s_b:
2104 case Intrinsic::mips_mod_s_h:
2105 case Intrinsic::mips_mod_s_w:
2106 case Intrinsic::mips_mod_s_d:
2107 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
2108 Op->getOperand(2));
2109 case Intrinsic::mips_mod_u_b:
2110 case Intrinsic::mips_mod_u_h:
2111 case Intrinsic::mips_mod_u_w:
2112 case Intrinsic::mips_mod_u_d:
2113 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
2114 Op->getOperand(2));
2115 case Intrinsic::mips_mulv_b:
2116 case Intrinsic::mips_mulv_h:
2117 case Intrinsic::mips_mulv_w:
2118 case Intrinsic::mips_mulv_d:
2119 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
2120 Op->getOperand(2));
2121 case Intrinsic::mips_msubv_b:
2122 case Intrinsic::mips_msubv_h:
2123 case Intrinsic::mips_msubv_w:
2124 case Intrinsic::mips_msubv_d: {
2125 EVT ResTy = Op->getValueType(0);
2126 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
2127 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2128 Op->getOperand(2), Op->getOperand(3)));
2129 }
2130 case Intrinsic::mips_nlzc_b:
2131 case Intrinsic::mips_nlzc_h:
2132 case Intrinsic::mips_nlzc_w:
2133 case Intrinsic::mips_nlzc_d:
2134 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
2135 case Intrinsic::mips_nor_v: {
2136 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2137 Op->getOperand(1), Op->getOperand(2));
2138 return DAG.getNOT(DL, Res, Res->getValueType(0));
2139 }
2140 case Intrinsic::mips_nori_b: {
2141 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2142 Op->getOperand(1),
2143 lowerMSASplatImm(Op, 2, DAG));
2144 return DAG.getNOT(DL, Res, Res->getValueType(0));
2145 }
2146 case Intrinsic::mips_or_v:
2147 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
2148 Op->getOperand(2));
2149 case Intrinsic::mips_ori_b:
2150 return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2151 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2152 case Intrinsic::mips_pckev_b:
2153 case Intrinsic::mips_pckev_h:
2154 case Intrinsic::mips_pckev_w:
2155 case Intrinsic::mips_pckev_d:
2156 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
2157 Op->getOperand(1), Op->getOperand(2));
2158 case Intrinsic::mips_pckod_b:
2159 case Intrinsic::mips_pckod_h:
2160 case Intrinsic::mips_pckod_w:
2161 case Intrinsic::mips_pckod_d:
2162 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
2163 Op->getOperand(1), Op->getOperand(2));
2164 case Intrinsic::mips_pcnt_b:
2165 case Intrinsic::mips_pcnt_h:
2166 case Intrinsic::mips_pcnt_w:
2167 case Intrinsic::mips_pcnt_d:
2168 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
2169 case Intrinsic::mips_sat_s_b:
2170 case Intrinsic::mips_sat_s_h:
2171 case Intrinsic::mips_sat_s_w:
2172 case Intrinsic::mips_sat_s_d:
2173 case Intrinsic::mips_sat_u_b:
2174 case Intrinsic::mips_sat_u_h:
2175 case Intrinsic::mips_sat_u_w:
2176 case Intrinsic::mips_sat_u_d: {
2177 // Report an error for out of range values.
2178 int64_t Max;
2179 switch (Intrinsic) {
2180 case Intrinsic::mips_sat_s_b:
2181 case Intrinsic::mips_sat_u_b: Max = 7; break;
2182 case Intrinsic::mips_sat_s_h:
2183 case Intrinsic::mips_sat_u_h: Max = 15; break;
2184 case Intrinsic::mips_sat_s_w:
2185 case Intrinsic::mips_sat_u_w: Max = 31; break;
2186 case Intrinsic::mips_sat_s_d:
2187 case Intrinsic::mips_sat_u_d: Max = 63; break;
2188 default: llvm_unreachable("Unmatched intrinsic");
2189 }
2190 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2191 if (Value < 0 || Value > Max)
2192 report_fatal_error("Immediate out of range");
2193 return SDValue();
2194 }
2195 case Intrinsic::mips_shf_b:
2196 case Intrinsic::mips_shf_h:
2197 case Intrinsic::mips_shf_w: {
2198 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2199 if (Value < 0 || Value > 255)
2200 report_fatal_error("Immediate out of range");
2201 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
2202 Op->getOperand(2), Op->getOperand(1));
2203 }
2204 case Intrinsic::mips_sldi_b:
2205 case Intrinsic::mips_sldi_h:
2206 case Intrinsic::mips_sldi_w:
2207 case Intrinsic::mips_sldi_d: {
2208 // Report an error for out of range values.
2209 int64_t Max;
2210 switch (Intrinsic) {
2211 case Intrinsic::mips_sldi_b: Max = 15; break;
2212 case Intrinsic::mips_sldi_h: Max = 7; break;
2213 case Intrinsic::mips_sldi_w: Max = 3; break;
2214 case Intrinsic::mips_sldi_d: Max = 1; break;
2215 default: llvm_unreachable("Unmatched intrinsic");
2216 }
2217 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
2218 if (Value < 0 || Value > Max)
2219 report_fatal_error("Immediate out of range");
2220 return SDValue();
2221 }
2222 case Intrinsic::mips_sll_b:
2223 case Intrinsic::mips_sll_h:
2224 case Intrinsic::mips_sll_w:
2225 case Intrinsic::mips_sll_d:
2226 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
2227 truncateVecElts(Op, DAG));
2228 case Intrinsic::mips_slli_b:
2229 case Intrinsic::mips_slli_h:
2230 case Intrinsic::mips_slli_w:
2231 case Intrinsic::mips_slli_d:
2232 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
2233 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2234 case Intrinsic::mips_splat_b:
2235 case Intrinsic::mips_splat_h:
2236 case Intrinsic::mips_splat_w:
2237 case Intrinsic::mips_splat_d:
2238 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2239 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2240 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2241 // Instead we lower to MipsISD::VSHF and match from there.
2242 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2243 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
2244 Op->getOperand(1));
2245 case Intrinsic::mips_splati_b:
2246 case Intrinsic::mips_splati_h:
2247 case Intrinsic::mips_splati_w:
2248 case Intrinsic::mips_splati_d:
2249 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2250 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
2251 Op->getOperand(1));
2252 case Intrinsic::mips_sra_b:
2253 case Intrinsic::mips_sra_h:
2254 case Intrinsic::mips_sra_w:
2255 case Intrinsic::mips_sra_d:
2256 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
2257 truncateVecElts(Op, DAG));
2258 case Intrinsic::mips_srai_b:
2259 case Intrinsic::mips_srai_h:
2260 case Intrinsic::mips_srai_w:
2261 case Intrinsic::mips_srai_d:
2262 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2263 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2264 case Intrinsic::mips_srari_b:
2265 case Intrinsic::mips_srari_h:
2266 case Intrinsic::mips_srari_w:
2267 case Intrinsic::mips_srari_d: {
2268 // Report an error for out of range values.
2269 int64_t Max;
2270 switch (Intrinsic) {
2271 case Intrinsic::mips_srari_b: Max = 7; break;
2272 case Intrinsic::mips_srari_h: Max = 15; break;
2273 case Intrinsic::mips_srari_w: Max = 31; break;
2274 case Intrinsic::mips_srari_d: Max = 63; break;
2275 default: llvm_unreachable("Unmatched intrinsic");
2276 }
2277 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2278 if (Value < 0 || Value > Max)
2279 report_fatal_error("Immediate out of range");
2280 return SDValue();
2281 }
2282 case Intrinsic::mips_srl_b:
2283 case Intrinsic::mips_srl_h:
2284 case Intrinsic::mips_srl_w:
2285 case Intrinsic::mips_srl_d:
2286 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2287 truncateVecElts(Op, DAG));
2288 case Intrinsic::mips_srli_b:
2289 case Intrinsic::mips_srli_h:
2290 case Intrinsic::mips_srli_w:
2291 case Intrinsic::mips_srli_d:
2292 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2293 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2294 case Intrinsic::mips_srlri_b:
2295 case Intrinsic::mips_srlri_h:
2296 case Intrinsic::mips_srlri_w:
2297 case Intrinsic::mips_srlri_d: {
2298 // Report an error for out of range values.
2299 int64_t Max;
2300 switch (Intrinsic) {
2301 case Intrinsic::mips_srlri_b: Max = 7; break;
2302 case Intrinsic::mips_srlri_h: Max = 15; break;
2303 case Intrinsic::mips_srlri_w: Max = 31; break;
2304 case Intrinsic::mips_srlri_d: Max = 63; break;
2305 default: llvm_unreachable("Unmatched intrinsic");
2306 }
2307 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2308 if (Value < 0 || Value > Max)
2309 report_fatal_error("Immediate out of range");
2310 return SDValue();
2311 }
2312 case Intrinsic::mips_subv_b:
2313 case Intrinsic::mips_subv_h:
2314 case Intrinsic::mips_subv_w:
2315 case Intrinsic::mips_subv_d:
2316 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2317 Op->getOperand(2));
2318 case Intrinsic::mips_subvi_b:
2319 case Intrinsic::mips_subvi_h:
2320 case Intrinsic::mips_subvi_w:
2321 case Intrinsic::mips_subvi_d:
2322 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2323 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2324 case Intrinsic::mips_vshf_b:
2325 case Intrinsic::mips_vshf_h:
2326 case Intrinsic::mips_vshf_w:
2327 case Intrinsic::mips_vshf_d:
2328 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2329 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2330 case Intrinsic::mips_xor_v:
2331 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2332 Op->getOperand(2));
2333 case Intrinsic::mips_xori_b:
2334 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2335 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2336 case Intrinsic::thread_pointer: {
2337 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2338 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
2339 }
2340 }
2341}
2342
2343static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2344 const MipsSubtarget &Subtarget) {
2345 SDLoc DL(Op);
2346 SDValue ChainIn = Op->getOperand(0);
2347 SDValue Address = Op->getOperand(2);
2348 SDValue Offset = Op->getOperand(3);
2349 EVT ResTy = Op->getValueType(0);
2350 EVT PtrTy = Address->getValueType(0);
2351
2352 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2353 // however takes an i32 signed constant offset. The actual type of the
2354 // intrinsic is a scaled signed i10.
2355 if (Subtarget.isABI_N64())
2356 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2357
2358 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2359 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
2360 Align(16));
2361}
2362
2363SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2364 SelectionDAG &DAG) const {
2365 unsigned Intr = Op->getConstantOperandVal(1);
2366 switch (Intr) {
2367 default:
2368 return SDValue();
2369 case Intrinsic::mips_extp:
2370 return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2371 case Intrinsic::mips_extpdp:
2372 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2373 case Intrinsic::mips_extr_w:
2374 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2375 case Intrinsic::mips_extr_r_w:
2376 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2377 case Intrinsic::mips_extr_rs_w:
2378 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2379 case Intrinsic::mips_extr_s_h:
2380 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2381 case Intrinsic::mips_mthlip:
2382 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2383 case Intrinsic::mips_mulsaq_s_w_ph:
2385 case Intrinsic::mips_maq_s_w_phl:
2386 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2387 case Intrinsic::mips_maq_s_w_phr:
2388 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2389 case Intrinsic::mips_maq_sa_w_phl:
2391 case Intrinsic::mips_maq_sa_w_phr:
2393 case Intrinsic::mips_dpaq_s_w_ph:
2394 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2395 case Intrinsic::mips_dpsq_s_w_ph:
2396 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2397 case Intrinsic::mips_dpaq_sa_l_w:
2398 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2399 case Intrinsic::mips_dpsq_sa_l_w:
2400 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2401 case Intrinsic::mips_dpaqx_s_w_ph:
2403 case Intrinsic::mips_dpaqx_sa_w_ph:
2405 case Intrinsic::mips_dpsqx_s_w_ph:
2407 case Intrinsic::mips_dpsqx_sa_w_ph:
2409 case Intrinsic::mips_ld_b:
2410 case Intrinsic::mips_ld_h:
2411 case Intrinsic::mips_ld_w:
2412 case Intrinsic::mips_ld_d:
2413 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2414 }
2415}
2416
2418 const MipsSubtarget &Subtarget) {
2419 SDLoc DL(Op);
2420 SDValue ChainIn = Op->getOperand(0);
2421 SDValue Value = Op->getOperand(2);
2422 SDValue Address = Op->getOperand(3);
2423 SDValue Offset = Op->getOperand(4);
2424 EVT PtrTy = Address->getValueType(0);
2425
2426 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2427 // however takes an i32 signed constant offset. The actual type of the
2428 // intrinsic is a scaled signed i10.
2429 if (Subtarget.isABI_N64())
2430 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2431
2432 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2433
2434 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
2435 Align(16));
2436}
2437
2438SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2439 SelectionDAG &DAG) const {
2440 unsigned Intr = Op->getConstantOperandVal(1);
2441 switch (Intr) {
2442 default:
2443 return SDValue();
2444 case Intrinsic::mips_st_b:
2445 case Intrinsic::mips_st_h:
2446 case Intrinsic::mips_st_w:
2447 case Intrinsic::mips_st_d:
2448 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2449 }
2450}
2451
2452// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2453//
2454// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2455// choose to sign-extend but we could have equally chosen zero-extend. The
2456// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2457// result into this node later (possibly changing it to a zero-extend in the
2458// process).
2459SDValue MipsSETargetLowering::
2460lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2461 SDLoc DL(Op);
2462 EVT ResTy = Op->getValueType(0);
2463 SDValue Op0 = Op->getOperand(0);
2464 EVT VecTy = Op0->getValueType(0);
2465
2466 if (!VecTy.is128BitVector())
2467 return SDValue();
2468
2469 if (ResTy.isInteger()) {
2470 SDValue Op1 = Op->getOperand(1);
2471 EVT EltTy = VecTy.getVectorElementType();
2472 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2473 DAG.getValueType(EltTy));
2474 }
2475
2476 return Op;
2477}
2478
2479static bool isConstantOrUndef(const SDValue Op) {
2480 if (Op->isUndef())
2481 return true;
2483 return true;
2485 return true;
2486 return false;
2487}
2488
2490 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2491 if (isConstantOrUndef(Op->getOperand(i)))
2492 return true;
2493 return false;
2494}
2495
2496// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2497// backend.
2498//
2499// Lowers according to the following rules:
2500// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2501// 2 less than or equal to 64 and the value fits into a signed 10-bit
2502// immediate
2503// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2504// is a power of 2 less than or equal to 64 and the value does not fit into a
2505// signed 10-bit immediate
2506// - Non-constant splats are legal as-is.
2507// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2508// - All others are illegal and must be expanded.
2509SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2510 SelectionDAG &DAG) const {
2511 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2512 EVT ResTy = Op->getValueType(0);
2513 SDLoc DL(Op);
2514 APInt SplatValue, SplatUndef;
2515 unsigned SplatBitSize;
2516 bool HasAnyUndefs;
2517
2518 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2519 return SDValue();
2520
2521 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2522 HasAnyUndefs, 8,
2523 !Subtarget.isLittle()) && SplatBitSize <= 64) {
2524 // We can only cope with 8, 16, 32, or 64-bit elements
2525 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2526 SplatBitSize != 64)
2527 return SDValue();
2528
2529 // If the value isn't an integer type we will have to bitcast
2530 // from an integer type first. Also, if there are any undefs, we must
2531 // lower them to defined values first.
2532 if (ResTy.isInteger() && !HasAnyUndefs)
2533 return Op;
2534
2535 EVT ViaVecTy;
2536
2537 switch (SplatBitSize) {
2538 default:
2539 return SDValue();
2540 case 8:
2541 ViaVecTy = MVT::v16i8;
2542 break;
2543 case 16:
2544 ViaVecTy = MVT::v8i16;
2545 break;
2546 case 32:
2547 ViaVecTy = MVT::v4i32;
2548 break;
2549 case 64:
2550 // There's no fill.d to fall back on for 64-bit values
2551 return SDValue();
2552 }
2553
2554 // SelectionDAG::getConstant will promote SplatValue appropriately.
2555 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2556
2557 // Bitcast to the type we originally wanted
2558 if (ViaVecTy != ResTy)
2559 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2560
2561 return Result;
2562 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
2563 return Op;
2564 else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2565 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2566 // The resulting code is the same length as the expansion, but it doesn't
2567 // use memory operations
2568 EVT ResTy = Node->getValueType(0);
2569
2570 assert(ResTy.isVector());
2571
2572 unsigned NumElts = ResTy.getVectorNumElements();
2573 SDValue Vector = DAG.getUNDEF(ResTy);
2574 for (unsigned i = 0; i < NumElts; ++i) {
2576 Node->getOperand(i),
2577 DAG.getConstant(i, DL, MVT::i32));
2578 }
2579 return Vector;
2580 }
2581
2582 return SDValue();
2583}
2584
2585// Lower VECTOR_SHUFFLE into SHF (if possible).
2586//
2587// SHF splits the vector into blocks of four elements, then shuffles these
2588// elements according to a <4 x i2> constant (encoded as an integer immediate).
2589//
2590// It is therefore possible to lower into SHF when the mask takes the form:
2591// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2592// When undef's appear they are treated as if they were whatever value is
2593// necessary in order to fit the above forms.
2594//
2595// For example:
2596// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2597// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2598// i32 7, i32 6, i32 5, i32 4>
2599// is lowered to:
2600// (SHF_H $w0, $w1, 27)
2601// where the 27 comes from:
2602// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2604 SmallVector<int, 16> Indices,
2605 SelectionDAG &DAG) {
2606 int SHFIndices[4] = { -1, -1, -1, -1 };
2607
2608 if (Indices.size() < 4)
2609 return SDValue();
2610
2611 for (unsigned i = 0; i < 4; ++i) {
2612 for (unsigned j = i; j < Indices.size(); j += 4) {
2613 int Idx = Indices[j];
2614
2615 // Convert from vector index to 4-element subvector index
2616 // If an index refers to an element outside of the subvector then give up
2617 if (Idx != -1) {
2618 Idx -= 4 * (j / 4);
2619 if (Idx < 0 || Idx >= 4)
2620 return SDValue();
2621 }
2622
2623 // If the mask has an undef, replace it with the current index.
2624 // Note that it might still be undef if the current index is also undef
2625 if (SHFIndices[i] == -1)
2626 SHFIndices[i] = Idx;
2627
2628 // Check that non-undef values are the same as in the mask. If they
2629 // aren't then give up
2630 if (!(Idx == -1 || Idx == SHFIndices[i]))
2631 return SDValue();
2632 }
2633 }
2634
2635 // Calculate the immediate. Replace any remaining undefs with zero
2636 APInt Imm(32, 0);
2637 for (int i = 3; i >= 0; --i) {
2638 int Idx = SHFIndices[i];
2639
2640 if (Idx == -1)
2641 Idx = 0;
2642
2643 Imm <<= 2;
2644 Imm |= Idx & 0x3;
2645 }
2646
2647 SDLoc DL(Op);
2648 return DAG.getNode(MipsISD::SHF, DL, ResTy,
2649 DAG.getTargetConstant(Imm, DL, MVT::i32),
2650 Op->getOperand(0));
2651}
2652
2653/// Determine whether a range fits a regular pattern of values.
2654/// This function accounts for the possibility of jumping over the End iterator.
2655template <typename ValType>
2656static bool
2658 unsigned CheckStride,
2660 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2661 auto &I = Begin;
2662
2663 while (I != End) {
2664 if (*I != -1 && *I != ExpectedIndex)
2665 return false;
2666 ExpectedIndex += ExpectedIndexStride;
2667
2668 // Incrementing past End is undefined behaviour so we must increment one
2669 // step at a time and check for End at each step.
2670 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2671 ; // Empty loop body.
2672 }
2673 return true;
2674}
2675
2676// Determine whether VECTOR_SHUFFLE is a SPLATI.
2677//
2678// It is a SPLATI when the mask is:
2679// <x, x, x, ...>
2680// where x is any valid index.
2681//
2682// When undef's appear in the mask they are treated as if they were whatever
2683// value is necessary in order to fit the above form.
2685 SmallVector<int, 16> Indices,
2686 SelectionDAG &DAG) {
2687 assert((Indices.size() % 2) == 0);
2688
2689 int SplatIndex = -1;
2690 for (const auto &V : Indices) {
2691 if (V != -1) {
2692 SplatIndex = V;
2693 break;
2694 }
2695 }
2696
2697 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
2698 0);
2699}
2700
2701// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2702//
2703// ILVEV interleaves the even elements from each vector.
2704//
2705// It is possible to lower into ILVEV when the mask consists of two of the
2706// following forms interleaved:
2707// <0, 2, 4, ...>
2708// <n, n+2, n+4, ...>
2709// where n is the number of elements in the vector.
2710// For example:
2711// <0, 0, 2, 2, 4, 4, ...>
2712// <0, n, 2, n+2, 4, n+4, ...>
2713//
2714// When undef's appear in the mask they are treated as if they were whatever
2715// value is necessary in order to fit the above forms.
2717 SmallVector<int, 16> Indices,
2718 SelectionDAG &DAG) {
2719 assert((Indices.size() % 2) == 0);
2720
2721 SDValue Wt;
2722 SDValue Ws;
2723 const auto &Begin = Indices.begin();
2724 const auto &End = Indices.end();
2725
2726 // Check even elements are taken from the even elements of one half or the
2727 // other and pick an operand accordingly.
2728 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2729 Wt = Op->getOperand(0);
2730 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
2731 Wt = Op->getOperand(1);
2732 else
2733 return SDValue();
2734
2735 // Check odd elements are taken from the even elements of one half or the
2736 // other and pick an operand accordingly.
2737 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2738 Ws = Op->getOperand(0);
2739 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
2740 Ws = Op->getOperand(1);
2741 else
2742 return SDValue();
2743
2744 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
2745}
2746
2747// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2748//
2749// ILVOD interleaves the odd elements from each vector.
2750//
2751// It is possible to lower into ILVOD when the mask consists of two of the
2752// following forms interleaved:
2753// <1, 3, 5, ...>
2754// <n+1, n+3, n+5, ...>
2755// where n is the number of elements in the vector.
2756// For example:
2757// <1, 1, 3, 3, 5, 5, ...>
2758// <1, n+1, 3, n+3, 5, n+5, ...>
2759//
2760// When undef's appear in the mask they are treated as if they were whatever
2761// value is necessary in order to fit the above forms.
2763 SmallVector<int, 16> Indices,
2764 SelectionDAG &DAG) {
2765 assert((Indices.size() % 2) == 0);
2766
2767 SDValue Wt;
2768 SDValue Ws;
2769 const auto &Begin = Indices.begin();
2770 const auto &End = Indices.end();
2771
2772 // Check even elements are taken from the odd elements of one half or the
2773 // other and pick an operand accordingly.
2774 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2775 Wt = Op->getOperand(0);
2776 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
2777 Wt = Op->getOperand(1);
2778 else
2779 return SDValue();
2780
2781 // Check odd elements are taken from the odd elements of one half or the
2782 // other and pick an operand accordingly.
2783 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2784 Ws = Op->getOperand(0);
2785 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
2786 Ws = Op->getOperand(1);
2787 else
2788 return SDValue();
2789
2790 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Ws, Wt);
2791}
2792
2793// Lower VECTOR_SHUFFLE into ILVR (if possible).
2794//
2795// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2796// each vector.
2797//
2798// It is possible to lower into ILVR when the mask consists of two of the
2799// following forms interleaved:
2800// <0, 1, 2, ...>
2801// <n, n+1, n+2, ...>
2802// where n is the number of elements in the vector.
2803// For example:
2804// <0, 0, 1, 1, 2, 2, ...>
2805// <0, n, 1, n+1, 2, n+2, ...>
2806//
2807// When undef's appear in the mask they are treated as if they were whatever
2808// value is necessary in order to fit the above forms.
2810 SmallVector<int, 16> Indices,
2811 SelectionDAG &DAG) {
2812 assert((Indices.size() % 2) == 0);
2813
2814 SDValue Wt;
2815 SDValue Ws;
2816 const auto &Begin = Indices.begin();
2817 const auto &End = Indices.end();
2818
2819 // Check even elements are taken from the right (lowest-indexed) elements of
2820 // one half or the other and pick an operand accordingly.
2821 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2822 Wt = Op->getOperand(0);
2823 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
2824 Wt = Op->getOperand(1);
2825 else
2826 return SDValue();
2827
2828 // Check odd elements are taken from the right (lowest-indexed) elements of
2829 // one half or the other and pick an operand accordingly.
2830 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2831 Ws = Op->getOperand(0);
2832 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
2833 Ws = Op->getOperand(1);
2834 else
2835 return SDValue();
2836
2837 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
2838}
2839
2840// Lower VECTOR_SHUFFLE into ILVL (if possible).
2841//
2842// ILVL interleaves consecutive elements from the left (highest-indexed) half
2843// of each vector.
2844//
2845// It is possible to lower into ILVL when the mask consists of two of the
2846// following forms interleaved:
2847// <x, x+1, x+2, ...>
2848// <n+x, n+x+1, n+x+2, ...>
2849// where n is the number of elements in the vector and x is half n.
2850// For example:
2851// <x, x, x+1, x+1, x+2, x+2, ...>
2852// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2853//
2854// When undef's appear in the mask they are treated as if they were whatever
2855// value is necessary in order to fit the above forms.
2857 SmallVector<int, 16> Indices,
2858 SelectionDAG &DAG) {
2859 assert((Indices.size() % 2) == 0);
2860
2861 unsigned HalfSize = Indices.size() / 2;
2862 SDValue Wt;
2863 SDValue Ws;
2864 const auto &Begin = Indices.begin();
2865 const auto &End = Indices.end();
2866
2867 // Check even elements are taken from the left (highest-indexed) elements of
2868 // one half or the other and pick an operand accordingly.
2869 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2870 Wt = Op->getOperand(0);
2871 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
2872 Wt = Op->getOperand(1);
2873 else
2874 return SDValue();
2875
2876 // Check odd elements are taken from the left (highest-indexed) elements of
2877 // one half or the other and pick an operand accordingly.
2878 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2879 Ws = Op->getOperand(0);
2880 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
2881 1))
2882 Ws = Op->getOperand(1);
2883 else
2884 return SDValue();
2885
2886 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
2887}
2888
2889// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2890//
2891// PCKEV copies the even elements of each vector into the result vector.
2892//
2893// It is possible to lower into PCKEV when the mask consists of two of the
2894// following forms concatenated:
2895// <0, 2, 4, ...>
2896// <n, n+2, n+4, ...>
2897// where n is the number of elements in the vector.
2898// For example:
2899// <0, 2, 4, ..., 0, 2, 4, ...>
2900// <0, 2, 4, ..., n, n+2, n+4, ...>
2901//
2902// When undef's appear in the mask they are treated as if they were whatever
2903// value is necessary in order to fit the above forms.
2905 SmallVector<int, 16> Indices,
2906 SelectionDAG &DAG) {
2907 assert((Indices.size() % 2) == 0);
2908
2909 SDValue Wt;
2910 SDValue Ws;
2911 const auto &Begin = Indices.begin();
2912 const auto &Mid = Indices.begin() + Indices.size() / 2;
2913 const auto &End = Indices.end();
2914
2915 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2916 Wt = Op->getOperand(0);
2917 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
2918 Wt = Op->getOperand(1);
2919 else
2920 return SDValue();
2921
2922 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2923 Ws = Op->getOperand(0);
2924 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
2925 Ws = Op->getOperand(1);
2926 else
2927 return SDValue();
2928
2929 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
2930}
2931
2932// Lower VECTOR_SHUFFLE into PCKOD (if possible).
2933//
2934// PCKOD copies the odd elements of each vector into the result vector.
2935//
2936// It is possible to lower into PCKOD when the mask consists of two of the
2937// following forms concatenated:
2938// <1, 3, 5, ...>
2939// <n+1, n+3, n+5, ...>
2940// where n is the number of elements in the vector.
2941// For example:
2942// <1, 3, 5, ..., 1, 3, 5, ...>
2943// <1, 3, 5, ..., n+1, n+3, n+5, ...>
2944//
2945// When undef's appear in the mask they are treated as if they were whatever
2946// value is necessary in order to fit the above forms.
2948 SmallVector<int, 16> Indices,
2949 SelectionDAG &DAG) {
2950 assert((Indices.size() % 2) == 0);
2951
2952 SDValue Wt;
2953 SDValue Ws;
2954 const auto &Begin = Indices.begin();
2955 const auto &Mid = Indices.begin() + Indices.size() / 2;
2956 const auto &End = Indices.end();
2957
2958 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
2959 Wt = Op->getOperand(0);
2960 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
2961 Wt = Op->getOperand(1);
2962 else
2963 return SDValue();
2964
2965 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
2966 Ws = Op->getOperand(0);
2967 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
2968 Ws = Op->getOperand(1);
2969 else
2970 return SDValue();
2971
2972 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
2973}
2974
2975// Lower VECTOR_SHUFFLE into VSHF.
2976//
2977// This mostly consists of converting the shuffle indices in Indices into a
2978// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
2979// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
2980// if the type is v8i16 and all the indices are less than 8 then the second
2981// operand is unused and can be replaced with anything. We choose to replace it
2982// with the used operand since this reduces the number of instructions overall.
2983//
2984// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
2985// UNDEFs as same as SPLATI index.
2986// For other instances we use the last valid index if UNDEF is
2987// encountered.
2989 const SmallVector<int, 16> &Indices,
2990 const bool isSPLATI,
2991 SelectionDAG &DAG) {
2993 SDValue Op0;
2994 SDValue Op1;
2995 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
2996 EVT MaskEltTy = MaskVecTy.getVectorElementType();
2997 bool Using1stVec = false;
2998 bool Using2ndVec = false;
2999 SDLoc DL(Op);
3000 int ResTyNumElts = ResTy.getVectorNumElements();
3001
3002 assert(Indices[0] >= 0 &&
3003 "shuffle mask starts with an UNDEF, which is not expected");
3004
3005 for (int i = 0; i < ResTyNumElts; ++i) {
3006 // Idx == -1 means UNDEF
3007 int Idx = Indices[i];
3008
3009 if (0 <= Idx && Idx < ResTyNumElts)
3010 Using1stVec = true;
3011 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
3012 Using2ndVec = true;
3013 }
3014 int LastValidIndex = 0;
3015 for (size_t i = 0; i < Indices.size(); i++) {
3016 int Idx = Indices[i];
3017 if (Idx < 0) {
3018 // Continue using splati index or use the last valid index.
3019 Idx = isSPLATI ? Indices[0] : LastValidIndex;
3020 } else {
3021 LastValidIndex = Idx;
3022 }
3023 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
3024 }
3025
3026 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
3027
3028 if (Using1stVec && Using2ndVec) {
3029 Op0 = Op->getOperand(0);
3030 Op1 = Op->getOperand(1);
3031 } else if (Using1stVec)
3032 Op0 = Op1 = Op->getOperand(0);
3033 else if (Using2ndVec)
3034 Op0 = Op1 = Op->getOperand(1);
3035 else
3036 llvm_unreachable("shuffle vector mask references neither vector operand?");
3037
3038 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
3039 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
3040 // VSHF concatenates the vectors in a bitwise fashion:
3041 // <0b00, 0b01> + <0b10, 0b11> ->
3042 // 0b0100 + 0b1110 -> 0b01001110
3043 // <0b10, 0b11, 0b00, 0b01>
3044 // We must therefore swap the operands to get the correct result.
3045 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
3046}
3047
3048// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
3049// indices in the shuffle.
3050SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3051 SelectionDAG &DAG) const {
3052 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
3053 EVT ResTy = Op->getValueType(0);
3054
3055 if (!ResTy.is128BitVector())
3056 return SDValue();
3057
3058 int ResTyNumElts = ResTy.getVectorNumElements();
3059 SmallVector<int, 16> Indices;
3060
3061 for (int i = 0; i < ResTyNumElts; ++i)
3062 Indices.push_back(Node->getMaskElt(i));
3063
3064 // splati.[bhwd] is preferable to the others but is matched from
3065 // MipsISD::VSHF.
3066 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
3067 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG);
3069 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
3070 return Result;
3071 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3072 return Result;
3073 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3074 return Result;
3075 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3076 return Result;
3077 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3078 return Result;
3079 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3080 return Result;
3081 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3082 return Result;
3083 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG);
3084}
3085
3087MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3088 MachineBasicBlock *BB) const {
3089 // $bb:
3090 // bposge32_pseudo $vr0
3091 // =>
3092 // $bb:
3093 // bposge32 $tbb
3094 // $fbb:
3095 // li $vr2, 0
3096 // b $sink
3097 // $tbb:
3098 // li $vr1, 1
3099 // $sink:
3100 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3101
3102 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3103 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3104 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3105 DebugLoc DL = MI.getDebugLoc();
3106 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3108 MachineFunction *F = BB->getParent();
3109 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3110 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3111 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3112 F->insert(It, FBB);
3113 F->insert(It, TBB);
3114 F->insert(It, Sink);
3115
3116 // Transfer the remainder of BB and its successor edges to Sink.
3117 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3118 BB->end());
3119 Sink->transferSuccessorsAndUpdatePHIs(BB);
3120
3121 // Add successors.
3122 BB->addSuccessor(FBB);
3123 BB->addSuccessor(TBB);
3124 FBB->addSuccessor(Sink);
3125 TBB->addSuccessor(Sink);
3126
3127 // Insert the real bposge32 instruction to $BB.
3128 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
3129 // Insert the real bposge32c instruction to $BB.
3130 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
3131
3132 // Fill $FBB.
3133 Register VR2 = RegInfo.createVirtualRegister(RC);
3134 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
3135 .addReg(Mips::ZERO).addImm(0);
3136 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3137
3138 // Fill $TBB.
3139 Register VR1 = RegInfo.createVirtualRegister(RC);
3140 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
3141 .addReg(Mips::ZERO).addImm(1);
3142
3143 // Insert phi function to $Sink.
3144 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3145 MI.getOperand(0).getReg())
3146 .addReg(VR2)
3147 .addMBB(FBB)
3148 .addReg(VR1)
3149 .addMBB(TBB);
3150
3151 MI.eraseFromParent(); // The pseudo instruction is gone now.
3152 return Sink;
3153}
3154
3155MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3156 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3157 // $bb:
3158 // vany_nonzero $rd, $ws
3159 // =>
3160 // $bb:
3161 // bnz.b $ws, $tbb
3162 // b $fbb
3163 // $fbb:
3164 // li $rd1, 0
3165 // b $sink
3166 // $tbb:
3167 // li $rd2, 1
3168 // $sink:
3169 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3170
3171 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3172 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3173 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3174 DebugLoc DL = MI.getDebugLoc();
3175 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3177 MachineFunction *F = BB->getParent();
3178 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3179 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3180 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3181 F->insert(It, FBB);
3182 F->insert(It, TBB);
3183 F->insert(It, Sink);
3184
3185 // Transfer the remainder of BB and its successor edges to Sink.
3186 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3187 BB->end());
3188 Sink->transferSuccessorsAndUpdatePHIs(BB);
3189
3190 // Add successors.
3191 BB->addSuccessor(FBB);
3192 BB->addSuccessor(TBB);
3193 FBB->addSuccessor(Sink);
3194 TBB->addSuccessor(Sink);
3195
3196 // Insert the real bnz.b instruction to $BB.
3197 BuildMI(BB, DL, TII->get(BranchOp))
3198 .addReg(MI.getOperand(1).getReg())
3199 .addMBB(TBB);
3200
3201 // Fill $FBB.
3202 Register RD1 = RegInfo.createVirtualRegister(RC);
3203 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
3204 .addReg(Mips::ZERO).addImm(0);
3205 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3206
3207 // Fill $TBB.
3208 Register RD2 = RegInfo.createVirtualRegister(RC);
3209 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
3210 .addReg(Mips::ZERO).addImm(1);
3211
3212 // Insert phi function to $Sink.
3213 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3214 MI.getOperand(0).getReg())
3215 .addReg(RD1)
3216 .addMBB(FBB)
3217 .addReg(RD2)
3218 .addMBB(TBB);
3219
3220 MI.eraseFromParent(); // The pseudo instruction is gone now.
3221 return Sink;
3222}
3223
3224// Emit the COPY_FW pseudo instruction.
3225//
3226// copy_fw_pseudo $fd, $ws, n
3227// =>
3228// copy_u_w $rt, $ws, $n
3229// mtc1 $rt, $fd
3230//
3231// When n is zero, the equivalent operation can be performed with (potentially)
3232// zero instructions due to register overlaps. This optimization is never valid
3233// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3235MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3236 MachineBasicBlock *BB) const {
3237 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3238 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3239 DebugLoc DL = MI.getDebugLoc();
3240 Register Fd = MI.getOperand(0).getReg();
3241 Register Ws = MI.getOperand(1).getReg();
3242 unsigned Lane = MI.getOperand(2).getImm();
3243
3244 if (Lane == 0) {
3245 unsigned Wt = Ws;
3246 if (!Subtarget.useOddSPReg()) {
3247 // We must copy to an even-numbered MSA register so that the
3248 // single-precision sub-register is also guaranteed to be even-numbered.
3249 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
3250
3251 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
3252 }
3253
3254 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3255 } else {
3256 Register Wt = RegInfo.createVirtualRegister(
3257 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3258 : &Mips::MSA128WEvensRegClass);
3259
3260 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
3261 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo);
3262 }
3263
3264 MI.eraseFromParent(); // The pseudo instruction is gone now.
3265 return BB;
3266}
3267
3268// Emit the COPY_FD pseudo instruction.
3269//
3270// copy_fd_pseudo $fd, $ws, n
3271// =>
3272// splati.d $wt, $ws, $n
3273// copy $fd, $wt:sub_64
3274//
3275// When n is zero, the equivalent operation can be performed with (potentially)
3276// zero instructions due to register overlaps. This optimization is always
3277// valid because FR=1 mode which is the only supported mode in MSA.
3279MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3280 MachineBasicBlock *BB) const {
3281 assert(Subtarget.isFP64bit());
3282
3283 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3284 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3285 Register Fd = MI.getOperand(0).getReg();
3286 Register Ws = MI.getOperand(1).getReg();
3287 unsigned Lane = MI.getOperand(2).getImm() * 2;
3288 DebugLoc DL = MI.getDebugLoc();
3289
3290 if (Lane == 0)
3291 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64);
3292 else {
3293 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3294
3295 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
3296 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64);
3297 }
3298
3299 MI.eraseFromParent(); // The pseudo instruction is gone now.
3300 return BB;
3301}
3302
3303// Emit the INSERT_FW pseudo instruction.
3304//
3305// insert_fw_pseudo $wd, $wd_in, $n, $fs
3306// =>
3307// subreg_to_reg $wt:sub_lo, $fs
3308// insve_w $wd[$n], $wd_in, $wt[0]
3310MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3311 MachineBasicBlock *BB) const {
3312 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3313 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3314 DebugLoc DL = MI.getDebugLoc();
3315 Register Wd = MI.getOperand(0).getReg();
3316 Register Wd_in = MI.getOperand(1).getReg();
3317 unsigned Lane = MI.getOperand(2).getImm();
3318 Register Fs = MI.getOperand(3).getReg();
3319 Register Wt = RegInfo.createVirtualRegister(
3320 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3321 : &Mips::MSA128WEvensRegClass);
3322
3323 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3324 .addImm(0)
3325 .addReg(Fs)
3326 .addImm(Mips::sub_lo);
3327 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
3328 .addReg(Wd_in)
3329 .addImm(Lane)
3330 .addReg(Wt)
3331 .addImm(0);
3332
3333 MI.eraseFromParent(); // The pseudo instruction is gone now.
3334 return BB;
3335}
3336
3337// Emit the INSERT_FD pseudo instruction.
3338//
3339// insert_fd_pseudo $wd, $fs, n
3340// =>
3341// subreg_to_reg $wt:sub_64, $fs
3342// insve_d $wd[$n], $wd_in, $wt[0]
3344MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3345 MachineBasicBlock *BB) const {
3346 assert(Subtarget.isFP64bit());
3347
3348 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3349 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3350 DebugLoc DL = MI.getDebugLoc();
3351 Register Wd = MI.getOperand(0).getReg();
3352 Register Wd_in = MI.getOperand(1).getReg();
3353 unsigned Lane = MI.getOperand(2).getImm();
3354 Register Fs = MI.getOperand(3).getReg();
3355 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3356
3357 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3358 .addImm(0)
3359 .addReg(Fs)
3360 .addImm(Mips::sub_64);
3361 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
3362 .addReg(Wd_in)
3363 .addImm(Lane)
3364 .addReg(Wt)
3365 .addImm(0);
3366
3367 MI.eraseFromParent(); // The pseudo instruction is gone now.
3368 return BB;
3369}
3370
3371// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3372//
3373// For integer:
3374// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3375// =>
3376// (SLL $lanetmp1, $lane, <log2size)
3377// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3378// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3379// (NEG $lanetmp2, $lanetmp1)
3380// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3381//
3382// For floating point:
3383// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3384// =>
3385// (SUBREG_TO_REG $wt, $fs, <subreg>)
3386// (SLL $lanetmp1, $lane, <log2size)
3387// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3388// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3389// (NEG $lanetmp2, $lanetmp1)
3390// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3391MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3392 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3393 bool IsFP) const {
3394 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3395 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3396 DebugLoc DL = MI.getDebugLoc();
3397 Register Wd = MI.getOperand(0).getReg();
3398 Register SrcVecReg = MI.getOperand(1).getReg();
3399 Register LaneReg = MI.getOperand(2).getReg();
3400 Register SrcValReg = MI.getOperand(3).getReg();
3401
3402 const TargetRegisterClass *VecRC = nullptr;
3403 // FIXME: This should be true for N32 too.
3404 const TargetRegisterClass *GPRRC =
3405 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3406 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3407 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3408 unsigned EltLog2Size;
3409 unsigned InsertOp = 0;
3410 unsigned InsveOp = 0;
3411 switch (EltSizeInBytes) {
3412 default:
3413 llvm_unreachable("Unexpected size");
3414 case 1:
3415 EltLog2Size = 0;
3416 InsertOp = Mips::INSERT_B;
3417 InsveOp = Mips::INSVE_B;
3418 VecRC = &Mips::MSA128BRegClass;
3419 break;
3420 case 2:
3421 EltLog2Size = 1;
3422 InsertOp = Mips::INSERT_H;
3423 InsveOp = Mips::INSVE_H;
3424 VecRC = &Mips::MSA128HRegClass;
3425 break;
3426 case 4:
3427 EltLog2Size = 2;
3428 InsertOp = Mips::INSERT_W;
3429 InsveOp = Mips::INSVE_W;
3430 VecRC = &Mips::MSA128WRegClass;
3431 break;
3432 case 8:
3433 EltLog2Size = 3;
3434 InsertOp = Mips::INSERT_D;
3435 InsveOp = Mips::INSVE_D;
3436 VecRC = &Mips::MSA128DRegClass;
3437 break;
3438 }
3439
3440 if (IsFP) {
3441 Register Wt = RegInfo.createVirtualRegister(VecRC);
3442 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3443 .addImm(0)
3444 .addReg(SrcValReg)
3445 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3446 SrcValReg = Wt;
3447 }
3448
3449 // Convert the lane index into a byte index
3450 if (EltSizeInBytes != 1) {
3451 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
3452 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
3453 .addReg(LaneReg)
3454 .addImm(EltLog2Size);
3455 LaneReg = LaneTmp1;
3456 }
3457
3458 // Rotate bytes around so that the desired lane is element zero
3459 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC);
3460 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
3461 .addReg(SrcVecReg)
3462 .addReg(SrcVecReg)
3463 .addReg(LaneReg, 0, SubRegIdx);
3464
3465 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC);
3466 if (IsFP) {
3467 // Use insve.df to insert to element zero
3468 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
3469 .addReg(WdTmp1)
3470 .addImm(0)
3471 .addReg(SrcValReg)
3472 .addImm(0);
3473 } else {
3474 // Use insert.df to insert to element zero
3475 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
3476 .addReg(WdTmp1)
3477 .addReg(SrcValReg)
3478 .addImm(0);
3479 }
3480
3481 // Rotate elements the rest of the way for a full rotation.
3482 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3483 // the lane index to do this.
3484 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
3485 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3486 LaneTmp2)
3487 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3488 .addReg(LaneReg);
3489 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
3490 .addReg(WdTmp2)
3491 .addReg(WdTmp2)
3492 .addReg(LaneTmp2, 0, SubRegIdx);
3493
3494 MI.eraseFromParent(); // The pseudo instruction is gone now.
3495 return BB;
3496}
3497
3498// Emit the FILL_FW pseudo instruction.
3499//
3500// fill_fw_pseudo $wd, $fs
3501// =>
3502// implicit_def $wt1
3503// insert_subreg $wt2:subreg_lo, $wt1, $fs
3504// splati.w $wd, $wt2[0]
3506MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3507 MachineBasicBlock *BB) const {
3508 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3509 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3510 DebugLoc DL = MI.getDebugLoc();
3511 Register Wd = MI.getOperand(0).getReg();
3512 Register Fs = MI.getOperand(1).getReg();
3513 Register Wt1 = RegInfo.createVirtualRegister(
3514 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3515 : &Mips::MSA128WEvensRegClass);
3516 Register Wt2 = RegInfo.createVirtualRegister(
3517 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3518 : &Mips::MSA128WEvensRegClass);
3519
3520 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3521 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3522 .addReg(Wt1)
3523 .addReg(Fs)
3524 .addImm(Mips::sub_lo);
3525 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
3526
3527 MI.eraseFromParent(); // The pseudo instruction is gone now.
3528 return BB;
3529}
3530
3531// Emit the FILL_FD pseudo instruction.
3532//
3533// fill_fd_pseudo $wd, $fs
3534// =>
3535// implicit_def $wt1
3536// insert_subreg $wt2:subreg_64, $wt1, $fs
3537// splati.d $wd, $wt2[0]
3539MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3540 MachineBasicBlock *BB) const {
3541 assert(Subtarget.isFP64bit());
3542
3543 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3544 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3545 DebugLoc DL = MI.getDebugLoc();
3546 Register Wd = MI.getOperand(0).getReg();
3547 Register Fs = MI.getOperand(1).getReg();
3548 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3549 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3550
3551 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3552 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3553 .addReg(Wt1)
3554 .addReg(Fs)
3555 .addImm(Mips::sub_64);
3556 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
3557
3558 MI.eraseFromParent(); // The pseudo instruction is gone now.
3559 return BB;
3560}
3561
3562// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3563// register.
3564//
3565// STF16 MSA128F16:$wd, mem_simm10:$addr
3566// =>
3567// copy_u.h $rtemp,$wd[0]
3568// sh $rtemp, $addr
3569//
3570// Safety: We can't use st.h & co as they would over write the memory after
3571// the destination. It would require half floats be allocated 16 bytes(!) of
3572// space.
3574MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3575 MachineBasicBlock *BB) const {
3576
3577 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3578 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3579 DebugLoc DL = MI.getDebugLoc();
3580 Register Ws = MI.getOperand(0).getReg();
3581 Register Rt = MI.getOperand(1).getReg();
3582 const MachineMemOperand &MMO = **MI.memoperands_begin();
3583 unsigned Imm = MMO.getOffset();
3584
3585 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3586 // spill and reload can expand as a GPR64 operand. Examine the
3587 // operand in detail and default to ABI.
3588 const TargetRegisterClass *RC =
3589 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3590 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3591 : &Mips::GPR64RegClass);
3592 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3593 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3594
3595 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
3596 if(!UsingMips32) {
3597 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
3598 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
3599 .addImm(0)
3600 .addReg(Rs)
3601 .addImm(Mips::sub_32);
3602 Rs = Tmp;
3603 }
3604 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
3605 .addReg(Rs)
3606 .addReg(Rt)
3607 .addImm(Imm)
3609 &MMO, MMO.getOffset(), MMO.getSize()));
3610
3611 MI.eraseFromParent();
3612 return BB;
3613}
3614
3615// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3616//
3617// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3618// =>
3619// lh $rtemp, $addr
3620// fill.h $wd, $rtemp
3621//
3622// Safety: We can't use ld.h & co as they over-read from the source.
3623// Additionally, if the address is not modulo 16, 2 cases can occur:
3624// a) Segmentation fault as the load instruction reads from a memory page
3625// memory it's not supposed to.
3626// b) The load crosses an implementation specific boundary, requiring OS
3627// intervention.
3629MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3630 MachineBasicBlock *BB) const {
3631
3632 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3633 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3634 DebugLoc DL = MI.getDebugLoc();
3635 Register Wd = MI.getOperand(0).getReg();
3636
3637 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3638 // spill and reload can expand as a GPR64 operand. Examine the
3639 // operand in detail and default to ABI.
3640 const TargetRegisterClass *RC =
3641 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3642 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3643 : &Mips::GPR64RegClass);
3644
3645 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3646 Register Rt = RegInfo.createVirtualRegister(RC);
3647
3648 MachineInstrBuilder MIB =
3649 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
3650 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
3651 MIB.add(MO);
3652
3653 if(!UsingMips32) {
3654 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3655 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32);
3656 Rt = Tmp;
3657 }
3658
3659 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
3660
3661 MI.eraseFromParent();
3662 return BB;
3663}
3664
3665// Emit the FPROUND_PSEUDO instruction.
3666//
3667// Round an FGR64Opnd, FGR32Opnd to an f16.
3668//
3669// Safety: Cycle the operand through the GPRs so the result always ends up
3670// the correct MSA register.
3671//
3672// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3673// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3674// (which they can be, as the MSA registers are defined to alias the
3675// FPU's 64 bit and 32 bit registers) the result can be accessed using
3676// the correct register class. That requires operands be tie-able across
3677// register classes which have a sub/super register class relationship.
3678//
3679// For FPG32Opnd:
3680//
3681// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3682// =>
3683// mfc1 $rtemp, $fs
3684// fill.w $rtemp, $wtemp
3685// fexdo.w $wd, $wtemp, $wtemp
3686//
3687// For FPG64Opnd on mips32r2+:
3688//
3689// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3690// =>
3691// mfc1 $rtemp, $fs
3692// fill.w $rtemp, $wtemp
3693// mfhc1 $rtemp2, $fs
3694// insert.w $wtemp[1], $rtemp2
3695// insert.w $wtemp[3], $rtemp2
3696// fexdo.w $wtemp2, $wtemp, $wtemp
3697// fexdo.h $wd, $temp2, $temp2
3698//
3699// For FGR64Opnd on mips64r2+:
3700//
3701// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3702// =>
3703// dmfc1 $rtemp, $fs
3704// fill.d $rtemp, $wtemp
3705// fexdo.w $wtemp2, $wtemp, $wtemp
3706// fexdo.h $wd, $wtemp2, $wtemp2
3707//
3708// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3709// undef bits are "just right" and the exception enable bits are
3710// set. By using fill.w to replicate $fs into all elements over
3711// insert.w for one element, we avoid that potiential case. If
3712// fexdo.[hw] causes an exception in, the exception is valid and it
3713// occurs for all elements.
3715MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3717 bool IsFGR64) const {
3718
3719 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3720 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3721 // it.
3722 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3723
3724 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3725 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3726
3727 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3728 DebugLoc DL = MI.getDebugLoc();
3729 Register Wd = MI.getOperand(0).getReg();
3730 Register Fs = MI.getOperand(1).getReg();
3731
3732 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3733 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3734 const TargetRegisterClass *GPRRC =
3735 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3736 unsigned MFC1Opc = IsFGR64onMips64
3737 ? Mips::DMFC1
3738 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3739 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3740
3741 // Perform the register class copy as mentioned above.
3742 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3743 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
3744 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
3745 unsigned WPHI = Wtemp;
3746
3747 if (IsFGR64onMips32) {
3748 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3749 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
3750 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3751 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3752 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
3753 .addReg(Wtemp)
3754 .addReg(Rtemp2)
3755 .addImm(1);
3756 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
3757 .addReg(Wtemp2)
3758 .addReg(Rtemp2)
3759 .addImm(3);
3760 WPHI = Wtemp3;
3761 }
3762
3763 if (IsFGR64) {
3764 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3765 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
3766 .addReg(WPHI)
3767 .addReg(WPHI);
3768 WPHI = Wtemp2;
3769 }
3770
3771 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
3772
3773 MI.eraseFromParent();
3774 return BB;
3775}
3776
3777// Emit the FPEXTEND_PSEUDO instruction.
3778//
3779// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3780//
3781// Safety: Cycle the result through the GPRs so the result always ends up
3782// the correct floating point register.
3783//
3784// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3785// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3786// (which they can be, as the MSA registers are defined to alias the
3787// FPU's 64 bit and 32 bit registers) the result can be accessed using
3788// the correct register class. That requires operands be tie-able across
3789// register classes which have a sub/super register class relationship. I
3790// haven't checked.
3791//
3792// For FGR32Opnd:
3793//
3794// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3795// =>
3796// fexupr.w $wtemp, $ws
3797// copy_s.w $rtemp, $ws[0]
3798// mtc1 $rtemp, $fd
3799//
3800// For FGR64Opnd on Mips64:
3801//
3802// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3803// =>
3804// fexupr.w $wtemp, $ws
3805// fexupr.d $wtemp2, $wtemp
3806// copy_s.d $rtemp, $wtemp2s[0]
3807// dmtc1 $rtemp, $fd
3808//
3809// For FGR64Opnd on Mips32:
3810//
3811// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3812// =>
3813// fexupr.w $wtemp, $ws
3814// fexupr.d $wtemp2, $wtemp
3815// copy_s.w $rtemp, $wtemp2[0]
3816// mtc1 $rtemp, $ftemp
3817// copy_s.w $rtemp2, $wtemp2[1]
3818// $fd = mthc1 $rtemp2, $ftemp
3820MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3822 bool IsFGR64) const {
3823
3824 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3825 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3826 // it.
3827 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3828
3829 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3830 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3831
3832 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3833 DebugLoc DL = MI.getDebugLoc();
3834 Register Fd = MI.getOperand(0).getReg();
3835 Register Ws = MI.getOperand(1).getReg();
3836
3837 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3838 const TargetRegisterClass *GPRRC =
3839 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3840 unsigned MTC1Opc = IsFGR64onMips64
3841 ? Mips::DMTC1
3842 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3843 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3844
3845 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3846 Register WPHI = Wtemp;
3847
3848 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
3849 if (IsFGR64) {
3850 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3851 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
3852 }
3853
3854 // Perform the safety regclass copy mentioned above.
3855 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3856 Register FPRPHI = IsFGR64onMips32
3857 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
3858 : Fd;
3859 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
3860 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
3861
3862 if (IsFGR64onMips32) {
3863 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3864 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
3865 .addReg(WPHI)
3866 .addImm(1);
3867 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
3868 .addReg(FPRPHI)
3869 .addReg(Rtemp2);
3870 }
3871
3872 MI.eraseFromParent();
3873 return BB;
3874}
3875
3876// Emit the FEXP2_W_1 pseudo instructions.
3877//
3878// fexp2_w_1_pseudo $wd, $wt
3879// =>
3880// ldi.w $ws, 1
3881// fexp2.w $wd, $ws, $wt
3883MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3884 MachineBasicBlock *BB) const {
3885 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3886 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3887 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3888 Register Ws1 = RegInfo.createVirtualRegister(RC);
3889 Register Ws2 = RegInfo.createVirtualRegister(RC);
3890 DebugLoc DL = MI.getDebugLoc();
3891
3892 // Splat 1.0 into a vector
3893 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
3894 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
3895
3896 // Emit 1.0 * fexp2(Wt)
3897 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg())
3898 .addReg(Ws2)
3899 .addReg(MI.getOperand(1).getReg());
3900
3901 MI.eraseFromParent(); // The pseudo instruction is gone now.
3902 return BB;
3903}
3904
3905// Emit the FEXP2_D_1 pseudo instructions.
3906//
3907// fexp2_d_1_pseudo $wd, $wt
3908// =>
3909// ldi.d $ws, 1
3910// fexp2.d $wd, $ws, $wt
3912MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3913 MachineBasicBlock *BB) const {
3914 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3915 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3916 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3917 Register Ws1 = RegInfo.createVirtualRegister(RC);
3918 Register Ws2 = RegInfo.createVirtualRegister(RC);
3919 DebugLoc DL = MI.getDebugLoc();
3920
3921 // Splat 1.0 into a vector
3922 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
3923 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
3924
3925 // Emit 1.0 * fexp2(Wt)
3926 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg())
3927 .addReg(Ws2)
3928 .addReg(MI.getOperand(1).getReg());
3929
3930 MI.eraseFromParent(); // The pseudo instruction is gone now.
3931 return BB;
3932}
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget, const AArch64TargetLowering &TLI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Promote Memory to Register
Definition Mem2Reg.cpp:110
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, SDValue Imm, bool BigEndian)
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static cl::opt< bool > NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " "stores to their single precision " "counterparts"))
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, bool BigEndian, SelectionDAG &DAG)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian)
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG)
static bool isBitwiseInverse(SDValue N, SDValue OfNode)
static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static bool isVectorAllOnes(SDValue N)
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC)
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG)
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG)
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, const SmallVector< int, 16 > &Indices, const bool isSPLATI, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static cl::opt< bool > UseMipsTailCalls("mips-tail-calls", cl::Hidden, cl::desc("MIPS: permit tail calls."), cl::init(false))
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1488
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
unsigned logBase2() const
Definition APInt.h:1761
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:851
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getInRegsParamsCount() const
uint64_t getZExtValue() const
const SDValue & getBasePtr() const
const Triple & getTargetTriple() const
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
Flags getFlags() const
Return the raw flags of the source value,.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MipsFunctionInfo - This class is derived from MachineFunction private Mips target-specific informatio...
unsigned getIncomingArgSize() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given floating-point type and Register class.
void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given integer type and Register class.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
const TargetRegisterClass * getRepRegClassFor(MVT VT) const override
Return the 'representative' register class for the specified value type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
MipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
bool hasMips32r6() const
bool isLittle() const
bool hasDSPR2() const
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
MipsTargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const
virtual void getOpndList(SmallVectorImpl< SDValue > &Ops, std::deque< std::pair< unsigned, SDValue > > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const
This function fills Ops, which is the list of operands that will later be used when a function call n...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
const MipsSubtarget & Subtarget
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
LLVM_ABI void printrWithDepth(raw_ostream &O, const SelectionDAG *G=nullptr, unsigned depth=100) const
Print a SelectionDAG node and children up to depth "depth." The given SelectionDAG allows target-spec...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVMContext * getContext() const
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getValue() const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
LLVM_ABI bool isLittleEndian() const
Tests whether the target triple is little endian.
Definition Triple.cpp:2075
LLVM Value Representation.
Definition Value.h:75
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
const MipsTargetLowering * createMipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
This class contains a discriminated union of information about pointers in memory operands,...