LLVM 23.0.0git
MipsSEISelLowering.cpp
Go to the documentation of this file.
1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/STLExtras.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/IntrinsicsMips.h"
40#include "llvm/Support/Debug.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49#include <utility>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "mips-isel"
54
55static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
56 cl::desc("Expand double precision loads and "
57 "stores to their single precision "
58 "counterparts"));
59
60// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
61// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
62// INST.h for v16, INST.w for v32, INST.d for v64.
65 if (this->Subtarget.hasMSA()) {
66 switch (VT.SimpleTy) {
67 // Leave v2i1 vectors to be promoted to larger ones.
68 // Other i1 types will be promoted by default.
69 case MVT::v2i1:
70 return TypePromoteInteger;
71 break;
72 // 16-bit vector types (v2 and longer)
73 case MVT::v2i8:
74 // 32-bit vector types (v2 and longer)
75 case MVT::v2i16:
76 case MVT::v4i8:
77 // 64-bit vector types (v2 and longer)
78 case MVT::v2i32:
79 case MVT::v4i16:
80 case MVT::v8i8:
81 return TypeWidenVector;
82 break;
83 // Only word (.w) and doubleword (.d) are available for floating point
84 // vectors. That means floating point vectors should be either v2f64
85 // or v4f32.
86 // Here we only explicitly widen the f32 types - f16 will be promoted
87 // by default.
88 case MVT::v2f32:
89 case MVT::v3f32:
90 return TypeWidenVector;
91 // v2i64 is already 128-bit wide.
92 default:
93 break;
94 }
95 }
97}
98
100 const MipsSubtarget &STI)
101 : MipsTargetLowering(TM, STI) {
102 // Set up the register classes
103 addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
104
105 if (Subtarget.isGP64bit())
106 addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
107
108 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
109 // Expand all truncating stores and extending loads.
112 setTruncStoreAction(VT0, VT1, Expand);
116 }
117 }
118 }
119
120 if (Subtarget.hasDSP()) {
121 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
122
123 for (const auto &VecTy : VecTys) {
124 addRegisterClass(VecTy, &Mips::DSPRRegClass);
125
126 // Expand all builtin opcodes.
127 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
129
135 }
136
139
140 if (Subtarget.hasMips32r2()) {
143 }
144 }
145
146 if (Subtarget.hasDSPR2())
147 setOperationAction(ISD::MUL, MVT::v2i16, Legal);
148
149 if (Subtarget.hasMSA()) {
150 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
151 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
152 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
153 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
154 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
155 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
156 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
157
158 // f16 is a storage-only type, always promote it to f32.
159 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
195
196 // Integer <-> Float conversions are keyed on the integer type. Make these
197 // custom so that we can handle the f16 case. Other float types use their
198 // default expansion.
200 if (Subtarget.isGP64bit())
202
209
211 }
212
213 if (!Subtarget.useSoftFloat()) {
214 addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
215
216 // When dealing with single precision only, use libcalls
217 if (!Subtarget.isSingleFloat()) {
218 if (Subtarget.isFP64bit())
219 addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
220 else
221 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
222 }
223
226 setOperationAction(Op, MVT::f32, Legal);
227 setOperationAction(Op, MVT::f64, Legal);
228 }
229 }
230
231 // Targets with 64bits integer registers, but no 64bit floating point register
232 // do not support conversion between them
233 if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
234 !Subtarget.useSoftFloat()) {
239 }
240
245
246 if (Subtarget.hasCnMips())
248 else if (Subtarget.isR5900()) {
249 // R5900 doesn't have DMULT/DMULTU/DDIV/DDIVU - expand to 32-bit ops
257 } else if (Subtarget.isGP64bit())
259
260 if (Subtarget.isGP64bit() && !Subtarget.isR5900()) {
267 }
268
271
275 if (Subtarget.hasMips32r6()) {
278 } else {
281 }
282
284
288
289 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
290 !Subtarget.hasMips64()) {
292 }
293
294 if (NoDPLoadStore) {
297 }
298
299 if (Subtarget.hasMips32r6()) {
300 // MIPS32r6 replaces the accumulator-based multiplies with a three register
301 // instruction
307
308 // MIPS32r6 replaces the accumulator-based division/remainder with separate
309 // three register division and remainder instructions.
316
317 // MIPS32r6 replaces conditional moves with an equivalent that removes the
318 // need for three GPR read ports.
322
326
327 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
331
333
334 // Floating point > and >= are supported via < and <=
343
352 }
353
354 if (Subtarget.hasMips64r6()) {
355 // MIPS64r6 replaces the accumulator-based multiplies with a three register
356 // instruction
362
363 // MIPS32r6 replaces the accumulator-based division/remainder with separate
364 // three register division and remainder instructions.
371
372 // MIPS64r6 replaces conditional moves with an equivalent that removes the
373 // need for three GPR read ports.
377 }
378
379 if (Subtarget.isR5900()) {
380 // R5900 FPU only supports 4 compare conditions: C.F, C.EQ, C.OLT, C.OLE
381 // (and their inversions via bc1t/bc1f). Expand all conditions that would
382 // require C.UN, C.UEQ, C.ULT, or C.ULE instructions (not available on
383 // R5900). The legalizer resolves these via operand swapping, condition
384 // inversion, and decomposition into supported conditions.
396
397 // R5900 FPU does not support IEEE 754 special values (NaN, infinity). Use
398 // custom lowering to decide per-instruction: hardware when nnan+ninf flags
399 // guarantee no NaN or infinity, software libcall otherwise.
405 }
406
407 computeRegisterProperties(Subtarget.getRegisterInfo());
408}
409
410const MipsTargetLowering *
412 const MipsSubtarget &STI) {
413 return new MipsSETargetLowering(TM, STI);
414}
415
418 if (VT == MVT::Untyped)
419 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
420
422}
423
424// Enable MSA support for the given integer type and Register class.
427 addRegisterClass(Ty, RC);
428
429 // Expand all builtin opcodes.
430 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
432
440
462
463 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
468 }
469
476}
477
478// Enable MSA support for the given floating-point type and Register class.
516
517SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
520
521 EVT ResTy = Op->getValueType(0);
522 SDLoc DL(Op);
523
524 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
525 // floating point register are undefined. Not really an issue as sel.d, which
526 // is produced from an FSELECT node, only looks at bit 0.
527 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
528 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
529 Op->getOperand(2));
530}
531
532SDValue MipsSETargetLowering::lowerINT_TO_FP(SDValue Op,
533 SelectionDAG &DAG) const {
534 // The f32/f64 case is already legal.
535 if (Op.getValueType() != MVT::f16)
536 return Op;
537
538 // For f16, first convert the integer to f32, then convert to f16.
539 SDLoc DL(Op);
540 SDValue FP = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0));
541 return DAG.getFPExtendOrRound(FP, DL, MVT::f16);
542}
543
544SDValue MipsSETargetLowering::lowerFP_TO_INT(SDValue Op,
545 SelectionDAG &DAG) const {
546 SDValue InOp = Op.getOperand(0);
547
548 // For f16, first convert to f32 and go from there.
549 if (InOp.getValueType() == MVT::f16) {
550 EVT VT = Op.getValueType();
551
552 assert((VT == MVT::i32 || VT == MVT::i64 || VT == MVT::i128) &&
553 "Unexpected result type for f16 -> integer conversion");
554
555 SDLoc DL(Op);
556 SDValue FP = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
557
558 // Use a trick from TargetLowering::expandFP_TO_UINT: we know that every
559 // integer value that can be represented by f16 is representable by i32, so
560 // fptoui and fptosi are equivalent.
561 //
562 // NOTE: the result of fptoui is poison when the value does not fit in the
563 // destination type (e.g. because it is negative).
564 return DAG.getNode(ISD::FP_TO_SINT, DL, VT, FP);
565 }
566
567 // Use the default lowering for f32/f64.
568 if (!isTypeLegal(Op.getValueType()))
569 return SDValue();
571}
572
574 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
576
577 if (Subtarget.systemSupportsUnalignedAccess()) {
578 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
579 // implementation defined whether this is handled by hardware, software, or
580 // a hybrid of the two but it's expected that most implementations will
581 // handle the majority of cases in hardware.
582 if (Fast)
583 *Fast = 1;
584 return true;
585 } else if (Subtarget.hasMips32r6()) {
586 return false;
587 }
588
589 switch (SVT) {
590 case MVT::i64:
591 case MVT::i32:
592 if (Fast)
593 *Fast = 1;
594 return true;
595 default:
596 return false;
597 }
598}
599
601 SelectionDAG &DAG) const {
602 switch(Op.getOpcode()) {
603 case ISD::LOAD: return lowerLOAD(Op, DAG);
604 case ISD::STORE: return lowerSTORE(Op, DAG);
605 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
606 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
607 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
608 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
609 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
610 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
611 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
612 DAG);
613 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
614 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
615 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
616 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
617 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
618 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
619 case ISD::SELECT: return lowerSELECT(Op, DAG);
620 case ISD::SINT_TO_FP:
621 return lowerINT_TO_FP(Op, DAG);
622 case ISD::FP_TO_SINT:
623 case ISD::FP_TO_UINT:
624 return lowerFP_TO_INT(Op, DAG);
625 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
626 case ISD::FADD:
627 return lowerR5900FPOp(Op, DAG, RTLIB::ADD_F32);
628 case ISD::FSUB:
629 return lowerR5900FPOp(Op, DAG, RTLIB::SUB_F32);
630 case ISD::FMUL:
631 return lowerR5900FPOp(Op, DAG, RTLIB::MUL_F32);
632 case ISD::FDIV:
633 return lowerR5900FPOp(Op, DAG, RTLIB::DIV_F32);
634 case ISD::FSQRT:
635 return lowerR5900FPOp(Op, DAG, RTLIB::SQRT_F32);
636 }
637
639}
640
641SDValue MipsSETargetLowering::lowerR5900FPOp(SDValue Op, SelectionDAG &DAG,
642 RTLIB::Libcall LC) const {
644 SDNodeFlags Flags = Op->getFlags();
645
646 if (Flags.hasNoNaNs() && Flags.hasNoInfs()) {
647 // Use the hardware FPU instruction if the operation is guaranteed to have
648 // no NaN or infinity inputs/outputs (nnan+ninf flags).
649 return Op;
650 }
651
652 // Fall back to a software libcall for IEEE correctness.
653 SDLoc DL(Op);
654 MVT VT = Op.getSimpleValueType();
655 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
657 auto [Result, Chain] = makeLibCall(DAG, LC, VT, Ops, CallOptions, DL);
658 return Result;
659}
660
661// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
662//
663// Performs the following transformations:
664// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
665// sign/zero-extension is completely overwritten by the new one performed by
666// the ISD::AND.
667// - Removes redundant zero extensions performed by an ISD::AND.
670 const MipsSubtarget &Subtarget) {
671 if (!Subtarget.hasMSA())
672 return SDValue();
673
674 SDValue Op0 = N->getOperand(0);
675 SDValue Op1 = N->getOperand(1);
676 unsigned Op0Opcode = Op0->getOpcode();
677
678 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
679 // where $d + 1 == 2^n and n == 32
680 // or $d + 1 == 2^n and n <= 32 and ZExt
681 // -> (MipsVExtractZExt $a, $b, $c)
682 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
683 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
685
686 if (!Mask)
687 return SDValue();
688
689 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
690
691 if (Log2IfPositive <= 0)
692 return SDValue(); // Mask+1 is not a power of 2
693
694 SDValue Op0Op2 = Op0->getOperand(2);
695 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
696 unsigned ExtendTySize = ExtendTy.getSizeInBits();
697 unsigned Log2 = Log2IfPositive;
698
699 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
700 Log2 == ExtendTySize) {
701 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
702 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0),
703 Op0->getVTList(),
704 ArrayRef(Ops, Op0->getNumOperands()));
705 }
706 }
707
708 return SDValue();
709}
710
711// Determine if the specified node is a constant vector splat.
712//
713// Returns true and sets Imm if:
714// * N is a ISD::BUILD_VECTOR representing a constant splat
715//
716// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
717// differences are that it assumes the MSA has already been checked and the
718// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
719// must not be in order for binsri.d to be selectable).
720static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
722
723 if (!Node)
724 return false;
725
726 APInt SplatValue, SplatUndef;
727 unsigned SplatBitSize;
728 bool HasAnyUndefs;
729
730 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
731 8, !IsLittleEndian))
732 return false;
733
734 Imm = SplatValue;
735
736 return true;
737}
738
739// Test whether the given node is an all-ones build_vector.
741 // Look through bitcasts. Endianness doesn't matter because we are looking
742 // for an all-ones value.
743 if (N->getOpcode() == ISD::BITCAST)
744 N = N->getOperand(0);
745
747
748 if (!BVN)
749 return false;
750
751 APInt SplatValue, SplatUndef;
752 unsigned SplatBitSize;
753 bool HasAnyUndefs;
754
755 // Endianness doesn't matter in this context because we are looking for
756 // an all-ones value.
757 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
758 return SplatValue.isAllOnes();
759
760 return false;
761}
762
763// Test whether N is the bitwise inverse of OfNode.
764static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
765 if (N->getOpcode() != ISD::XOR)
766 return false;
767
768 if (isVectorAllOnes(N->getOperand(0)))
769 return N->getOperand(1) == OfNode;
770
771 if (isVectorAllOnes(N->getOperand(1)))
772 return N->getOperand(0) == OfNode;
773
774 return false;
775}
776
777// Perform combines where ISD::OR is the root node.
778//
779// Performs the following transformations:
780// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
781// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
782// vector type.
785 const MipsSubtarget &Subtarget) {
786 if (!Subtarget.hasMSA())
787 return SDValue();
788
789 EVT Ty = N->getValueType(0);
790
791 if (!Ty.is128BitVector())
792 return SDValue();
793
794 SDValue Op0 = N->getOperand(0);
795 SDValue Op1 = N->getOperand(1);
796
797 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
798 SDValue Op0Op0 = Op0->getOperand(0);
799 SDValue Op0Op1 = Op0->getOperand(1);
800 SDValue Op1Op0 = Op1->getOperand(0);
801 SDValue Op1Op1 = Op1->getOperand(1);
802 bool IsLittleEndian = !Subtarget.isLittle();
803
804 SDValue IfSet, IfClr, Cond;
805 bool IsConstantMask = false;
806 APInt Mask, InvMask;
807
808 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
809 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
810 // looking.
811 // IfClr will be set if we find a valid match.
812 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
813 Cond = Op0Op0;
814 IfSet = Op0Op1;
815
816 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
817 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
818 IfClr = Op1Op1;
819 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
820 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
821 IfClr = Op1Op0;
822
823 IsConstantMask = true;
824 }
825
826 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
827 // thing again using this mask.
828 // IfClr will be set if we find a valid match.
829 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
830 Cond = Op0Op1;
831 IfSet = Op0Op0;
832
833 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
834 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
835 IfClr = Op1Op1;
836 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
837 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
838 IfClr = Op1Op0;
839
840 IsConstantMask = true;
841 }
842
843 // If IfClr is not yet set, try looking for a non-constant match.
844 // IfClr will be set if we find a valid match amongst the eight
845 // possibilities.
846 if (!IfClr.getNode()) {
847 if (isBitwiseInverse(Op0Op0, Op1Op0)) {
848 Cond = Op1Op0;
849 IfSet = Op1Op1;
850 IfClr = Op0Op1;
851 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
852 Cond = Op1Op0;
853 IfSet = Op1Op1;
854 IfClr = Op0Op0;
855 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
856 Cond = Op1Op1;
857 IfSet = Op1Op0;
858 IfClr = Op0Op1;
859 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
860 Cond = Op1Op1;
861 IfSet = Op1Op0;
862 IfClr = Op0Op0;
863 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
864 Cond = Op0Op0;
865 IfSet = Op0Op1;
866 IfClr = Op1Op1;
867 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
868 Cond = Op0Op0;
869 IfSet = Op0Op1;
870 IfClr = Op1Op0;
871 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
872 Cond = Op0Op1;
873 IfSet = Op0Op0;
874 IfClr = Op1Op1;
875 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
876 Cond = Op0Op1;
877 IfSet = Op0Op0;
878 IfClr = Op1Op0;
879 }
880 }
881
882 // At this point, IfClr will be set if we have a valid match.
883 if (!IfClr.getNode())
884 return SDValue();
885
886 assert(Cond.getNode() && IfSet.getNode());
887
888 // Fold degenerate cases.
889 if (IsConstantMask) {
890 if (Mask.isAllOnes())
891 return IfSet;
892 else if (Mask == 0)
893 return IfClr;
894 }
895
896 // Transform the DAG into an equivalent VSELECT.
897 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
898 }
899
900 return SDValue();
901}
902
904 SelectionDAG &DAG,
905 const MipsSubtarget &Subtarget) {
906 // Estimate the number of operations the below transform will turn a
907 // constant multiply into. The number is approximately equal to the minimal
908 // number of powers of two that constant can be broken down to by adding
909 // or subtracting them.
910 //
911 // If we have taken more than 12[1] / 8[2] steps to attempt the
912 // optimization for a native sized value, it is more than likely that this
913 // optimization will make things worse.
914 //
915 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
916 // multiplication requires at least 4 cycles, but another cycle (or two)
917 // to retrieve the result from the HI/LO registers.
918 //
919 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
920 // materialized in 2 instructions, multiplication requires at least 4
921 // cycles, but another cycle (or two) to retrieve the result from the
922 // HI/LO registers.
923 //
924 // TODO:
925 // - MaxSteps needs to consider the `VT` of the constant for the current
926 // target.
927 // - Consider to perform this optimization after type legalization.
928 // That allows to remove a workaround for types not supported natively.
929 // - Take in account `-Os, -Oz` flags because this optimization
930 // increases code size.
931 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
932
933 SmallVector<APInt, 16> WorkStack(1, C);
934 unsigned Steps = 0;
935 unsigned BitWidth = C.getBitWidth();
936
937 while (!WorkStack.empty()) {
938 APInt Val = WorkStack.pop_back_val();
939
940 if (Val == 0 || Val == 1)
941 continue;
942
943 if (Steps >= MaxSteps)
944 return false;
945
946 if (Val.isPowerOf2()) {
947 ++Steps;
948 continue;
949 }
950
951 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
952 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
953 : APInt(BitWidth, 1) << C.ceilLogBase2();
954 if ((Val - Floor).ule(Ceil - Val)) {
955 WorkStack.push_back(Floor);
956 WorkStack.push_back(Val - Floor);
957 } else {
958 WorkStack.push_back(Ceil);
959 WorkStack.push_back(Ceil - Val);
960 }
961
962 ++Steps;
963 }
964
965 // If the value being multiplied is not supported natively, we have to pay
966 // an additional legalization cost, conservatively assume an increase in the
967 // cost of 3 instructions per step. This values for this heuristic were
968 // determined experimentally.
969 unsigned RegisterSize = DAG.getTargetLoweringInfo()
970 .getRegisterType(*DAG.getContext(), VT)
971 .getSizeInBits();
972 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
973 if (Steps > 27)
974 return false;
975
976 return true;
977}
978
980 EVT ShiftTy, SelectionDAG &DAG) {
981 // Return 0.
982 if (C == 0)
983 return DAG.getConstant(0, DL, VT);
984
985 // Return x.
986 if (C == 1)
987 return X;
988
989 // If c is power of 2, return (shl x, log2(c)).
990 if (C.isPowerOf2())
991 return DAG.getNode(ISD::SHL, DL, VT, X,
992 DAG.getConstant(C.logBase2(), DL, ShiftTy));
993
994 unsigned BitWidth = C.getBitWidth();
995 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
996 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
997 APInt(BitWidth, 1) << C.ceilLogBase2();
998
999 // If |c - floor_c| <= |c - ceil_c|,
1000 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
1001 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
1002 if ((C - Floor).ule(Ceil - C)) {
1003 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
1004 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
1005 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
1006 }
1007
1008 // If |c - floor_c| > |c - ceil_c|,
1009 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
1010 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
1011 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
1012 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
1013}
1014
1017 const MipsSETargetLowering *TL,
1018 const MipsSubtarget &Subtarget) {
1019 EVT VT = N->getValueType(0);
1020
1021 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
1023 C->getAPIntValue(), VT, DAG, Subtarget))
1024 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
1026 DAG);
1027
1028 return SDValue(N, 0);
1029}
1030
1032 SelectionDAG &DAG,
1033 const MipsSubtarget &Subtarget) {
1034 // See if this is a vector splat immediate node.
1035 APInt SplatValue, SplatUndef;
1036 unsigned SplatBitSize;
1037 bool HasAnyUndefs;
1038 unsigned EltSize = Ty.getScalarSizeInBits();
1039 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
1040
1041 if (!Subtarget.hasDSP())
1042 return SDValue();
1043
1044 if (!BV ||
1045 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1046 EltSize, !Subtarget.isLittle()) ||
1047 (SplatBitSize != EltSize) ||
1048 (SplatValue.getZExtValue() >= EltSize))
1049 return SDValue();
1050
1051 SDLoc DL(N);
1052 return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
1053 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
1054}
1055
1058 const MipsSubtarget &Subtarget) {
1059 EVT Ty = N->getValueType(0);
1060
1061 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1062 return SDValue();
1063
1064 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
1065}
1066
1067// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
1068// constant splats into MipsISD::SHRA_DSP for DSPr2.
1069//
1070// Performs the following transformations:
1071// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
1072// sign/zero-extension is completely overwritten by the new one performed by
1073// the ISD::SRA and ISD::SHL nodes.
1074// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
1075// sequence.
1076//
1077// See performDSPShiftCombine for more information about the transformation
1078// used for DSPr2.
1081 const MipsSubtarget &Subtarget) {
1082 EVT Ty = N->getValueType(0);
1083
1084 if (Subtarget.hasMSA()) {
1085 SDValue Op0 = N->getOperand(0);
1086 SDValue Op1 = N->getOperand(1);
1087
1088 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
1089 // where $d + sizeof($c) == 32
1090 // or $d + sizeof($c) <= 32 and SExt
1091 // -> (MipsVExtractSExt $a, $b, $c)
1092 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
1093 SDValue Op0Op0 = Op0->getOperand(0);
1095
1096 if (!ShAmount)
1097 return SDValue();
1098
1099 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
1100 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
1101 return SDValue();
1102
1103 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
1104 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
1105
1106 if (TotalBits == 32 ||
1107 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
1108 TotalBits <= 32)) {
1109 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
1110 Op0Op0->getOperand(2) };
1111 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
1112 Op0Op0->getVTList(),
1113 ArrayRef(Ops, Op0Op0->getNumOperands()));
1114 }
1115 }
1116 }
1117
1118 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
1119 return SDValue();
1120
1121 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
1122}
1123
1124
1127 const MipsSubtarget &Subtarget) {
1128 EVT Ty = N->getValueType(0);
1129
1130 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
1131 return SDValue();
1132
1133 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
1134}
1135
1137 bool IsV216 = (Ty == MVT::v2i16);
1138
1139 switch (CC) {
1140 case ISD::SETEQ:
1141 case ISD::SETNE: return true;
1142 case ISD::SETLT:
1143 case ISD::SETLE:
1144 case ISD::SETGT:
1145 case ISD::SETGE: return IsV216;
1146 case ISD::SETULT:
1147 case ISD::SETULE:
1148 case ISD::SETUGT:
1149 case ISD::SETUGE: return !IsV216;
1150 default: return false;
1151 }
1152}
1153
1155 EVT Ty = N->getValueType(0);
1156
1157 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1158 return SDValue();
1159
1160 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
1161 return SDValue();
1162
1163 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
1164 N->getOperand(1), N->getOperand(2));
1165}
1166
1168 EVT Ty = N->getValueType(0);
1169
1170 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
1171 SDValue SetCC = N->getOperand(0);
1172
1173 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
1174 return SDValue();
1175
1176 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
1177 SetCC.getOperand(0), SetCC.getOperand(1),
1178 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
1179 }
1180
1181 return SDValue();
1182}
1183
1185 const MipsSubtarget &Subtarget) {
1186 EVT Ty = N->getValueType(0);
1187
1188 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
1189 // Try the following combines:
1190 // (xor (or $a, $b), (build_vector allones))
1191 // (xor (or $a, $b), (bitcast (build_vector allones)))
1192 SDValue Op0 = N->getOperand(0);
1193 SDValue Op1 = N->getOperand(1);
1194 SDValue NotOp;
1195
1197 NotOp = Op1;
1198 else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
1199 NotOp = Op0;
1200 else
1201 return SDValue();
1202
1203 if (NotOp->getOpcode() == ISD::OR)
1204 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
1205 NotOp->getOperand(1));
1206 }
1207
1208 return SDValue();
1209}
1210
1211SDValue
1213 SelectionDAG &DAG = DCI.DAG;
1214 SDValue Val;
1215
1216 switch (N->getOpcode()) {
1217 case ISD::AND:
1218 Val = performANDCombine(N, DAG, DCI, Subtarget);
1219 break;
1220 case ISD::OR:
1221 Val = performORCombine(N, DAG, DCI, Subtarget);
1222 break;
1223 case ISD::MUL:
1224 return performMULCombine(N, DAG, DCI, this, Subtarget);
1225 case ISD::SHL:
1226 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1227 break;
1228 case ISD::SRA:
1229 return performSRACombine(N, DAG, DCI, Subtarget);
1230 case ISD::SRL:
1231 return performSRLCombine(N, DAG, DCI, Subtarget);
1232 case ISD::VSELECT:
1233 return performVSELECTCombine(N, DAG);
1234 case ISD::XOR:
1235 Val = performXORCombine(N, DAG, Subtarget);
1236 break;
1237 case ISD::SETCC:
1238 Val = performSETCCCombine(N, DAG);
1239 break;
1240 }
1241
1242 if (Val.getNode()) {
1243 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1244 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1245 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1246 return Val;
1247 }
1248
1250}
1251
1254 MachineBasicBlock *BB) const {
1255 switch (MI.getOpcode()) {
1256 default:
1258 case Mips::BPOSGE32_PSEUDO:
1259 return emitBPOSGE32(MI, BB);
1260 case Mips::SNZ_B_PSEUDO:
1261 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1262 case Mips::SNZ_H_PSEUDO:
1263 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1264 case Mips::SNZ_W_PSEUDO:
1265 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1266 case Mips::SNZ_D_PSEUDO:
1267 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1268 case Mips::SNZ_V_PSEUDO:
1269 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1270 case Mips::SZ_B_PSEUDO:
1271 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1272 case Mips::SZ_H_PSEUDO:
1273 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1274 case Mips::SZ_W_PSEUDO:
1275 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1276 case Mips::SZ_D_PSEUDO:
1277 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1278 case Mips::SZ_V_PSEUDO:
1279 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1280 case Mips::COPY_FW_PSEUDO:
1281 return emitCOPY_FW(MI, BB);
1282 case Mips::COPY_FD_PSEUDO:
1283 return emitCOPY_FD(MI, BB);
1284 case Mips::INSERT_FW_PSEUDO:
1285 return emitINSERT_FW(MI, BB);
1286 case Mips::INSERT_FD_PSEUDO:
1287 return emitINSERT_FD(MI, BB);
1288 case Mips::INSERT_B_VIDX_PSEUDO:
1289 case Mips::INSERT_B_VIDX64_PSEUDO:
1290 return emitINSERT_DF_VIDX(MI, BB, 1, false);
1291 case Mips::INSERT_H_VIDX_PSEUDO:
1292 case Mips::INSERT_H_VIDX64_PSEUDO:
1293 return emitINSERT_DF_VIDX(MI, BB, 2, false);
1294 case Mips::INSERT_W_VIDX_PSEUDO:
1295 case Mips::INSERT_W_VIDX64_PSEUDO:
1296 return emitINSERT_DF_VIDX(MI, BB, 4, false);
1297 case Mips::INSERT_D_VIDX_PSEUDO:
1298 case Mips::INSERT_D_VIDX64_PSEUDO:
1299 return emitINSERT_DF_VIDX(MI, BB, 8, false);
1300 case Mips::INSERT_FW_VIDX_PSEUDO:
1301 case Mips::INSERT_FW_VIDX64_PSEUDO:
1302 return emitINSERT_DF_VIDX(MI, BB, 4, true);
1303 case Mips::INSERT_FD_VIDX_PSEUDO:
1304 case Mips::INSERT_FD_VIDX64_PSEUDO:
1305 return emitINSERT_DF_VIDX(MI, BB, 8, true);
1306 case Mips::FILL_FW_PSEUDO:
1307 return emitFILL_FW(MI, BB);
1308 case Mips::FILL_FD_PSEUDO:
1309 return emitFILL_FD(MI, BB);
1310 case Mips::FEXP2_W_1_PSEUDO:
1311 return emitFEXP2_W_1(MI, BB);
1312 case Mips::FEXP2_D_1_PSEUDO:
1313 return emitFEXP2_D_1(MI, BB);
1314 case Mips::ST_F16:
1315 return emitST_F16_PSEUDO(MI, BB);
1316 case Mips::LD_F16:
1317 return emitLD_F16_PSEUDO(MI, BB);
1318 case Mips::MSA_FP_EXTEND_W_PSEUDO:
1319 return emitFPEXTEND_PSEUDO(MI, BB, false);
1320 case Mips::MSA_FP_ROUND_W_PSEUDO:
1321 return emitFPROUND_PSEUDO(MI, BB, false);
1322 case Mips::MSA_FP_EXTEND_D_PSEUDO:
1323 return emitFPEXTEND_PSEUDO(MI, BB, true);
1324 case Mips::MSA_FP_ROUND_D_PSEUDO:
1325 return emitFPROUND_PSEUDO(MI, BB, true);
1326 }
1327}
1328
1329bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1330 const CCState &CCInfo, unsigned NextStackOffset,
1331 const MipsFunctionInfo &FI) const {
1332 // Exception has to be cleared with eret.
1333 if (FI.isISR())
1334 return false;
1335
1336 // Return false if either the callee or caller has a byval argument.
1337 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1338 return false;
1339
1340 // Return true if the callee's argument area is no larger than the caller's.
1341 return NextStackOffset <= FI.getIncomingArgSize();
1342}
1343
1344void MipsSETargetLowering::
1345getOpndList(SmallVectorImpl<SDValue> &Ops,
1346 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1347 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1348 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1349 SDValue Chain) const {
1350 Ops.push_back(Callee);
1351 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1352 InternalLinkage, IsCallReloc, CLI, Callee,
1353 Chain);
1354}
1355
1356SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1357 LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1358
1359 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1360 return MipsTargetLowering::lowerLOAD(Op, DAG);
1361
1362 // Replace a double precision load with two i32 loads and a buildpair64.
1363 SDLoc DL(Op);
1364 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1365 EVT PtrVT = Ptr.getValueType();
1366
1367 // i32 load from lower address.
1368 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
1369 Nd.getAlign(), Nd.getMemOperand()->getFlags());
1370
1371 // i32 load from higher address.
1372 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1373 SDValue Hi = DAG.getLoad(
1374 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
1376
1377 if (!Subtarget.isLittle())
1378 std::swap(Lo, Hi);
1379
1380 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1381 SDValue Ops[2] = {BP, Hi.getValue(1)};
1382 return DAG.getMergeValues(Ops, DL);
1383}
1384
1385SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1386 StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1387
1388 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1390
1391 // Replace a double precision store with two extractelement64s and i32 stores.
1392 SDLoc DL(Op);
1393 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1394 EVT PtrVT = Ptr.getValueType();
1395 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1396 Val, DAG.getConstant(0, DL, MVT::i32));
1397 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1398 Val, DAG.getConstant(1, DL, MVT::i32));
1399
1400 if (!Subtarget.isLittle())
1401 std::swap(Lo, Hi);
1402
1403 // i32 store to lower address.
1404 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(),
1405 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1406
1407 // i32 store to higher address.
1408 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1409 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1410 commonAlignment(Nd.getAlign(), 4),
1411 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1412}
1413
1414SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1415 SelectionDAG &DAG) const {
1416 SDLoc DL(Op);
1417 MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
1418 MVT Dest = Op.getValueType().getSimpleVT();
1419
1420 // Bitcast i64 to double.
1421 if (Src == MVT::i64 && Dest == MVT::f64) {
1422 SDValue Lo, Hi;
1423 std::tie(Lo, Hi) =
1424 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32);
1425 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1426 }
1427
1428 // Bitcast double to i64.
1429 if (Src == MVT::f64 && Dest == MVT::i64) {
1430 // Skip lower bitcast when operand0 has converted float results to integer
1431 // which was done by function SoftenFloatResult.
1432 if (getTypeAction(*DAG.getContext(), Op.getOperand(0).getValueType()) ==
1434 return SDValue();
1435 SDValue Lo =
1436 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1437 DAG.getConstant(0, DL, MVT::i32));
1438 SDValue Hi =
1439 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1440 DAG.getConstant(1, DL, MVT::i32));
1441 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1442 }
1443
1444 // Skip other cases of bitcast and use default lowering.
1445 return SDValue();
1446}
1447
1448SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1449 bool HasLo, bool HasHi,
1450 SelectionDAG &DAG) const {
1451 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1452 assert(!Subtarget.hasMips32r6());
1453
1454 EVT Ty = Op.getOperand(0).getValueType();
1455 SDLoc DL(Op);
1456 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1457 Op.getOperand(0), Op.getOperand(1));
1458 SDValue Lo, Hi;
1459
1460 if (HasLo)
1461 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1462 if (HasHi)
1463 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1464
1465 if (!HasLo || !HasHi)
1466 return HasLo ? Lo : Hi;
1467
1468 SDValue Vals[] = { Lo, Hi };
1469 return DAG.getMergeValues(Vals, DL);
1470}
1471
1473 SDValue InLo, InHi;
1474 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32);
1475 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1476}
1477
1479 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1480 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1481 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1482}
1483
1484// This function expands mips intrinsic nodes which have 64-bit input operands
1485// or output values.
1486//
1487// out64 = intrinsic-node in64
1488// =>
1489// lo = copy (extract-element (in64, 0))
1490// hi = copy (extract-element (in64, 1))
1491// mips-specific-node
1492// v0 = copy lo
1493// v1 = copy hi
1494// out64 = merge-values (v0, v1)
1495//
1497 SDLoc DL(Op);
1498 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1500 unsigned OpNo = 0;
1501
1502 // See if Op has a chain input.
1503 if (HasChainIn)
1504 Ops.push_back(Op->getOperand(OpNo++));
1505
1506 // The next operand is the intrinsic opcode.
1507 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1508
1509 // See if the next operand has type i64.
1510 SDValue Opnd = Op->getOperand(++OpNo), In64;
1511
1512 if (Opnd.getValueType() == MVT::i64)
1513 In64 = initAccumulator(Opnd, DL, DAG);
1514 else
1515 Ops.push_back(Opnd);
1516
1517 // Push the remaining operands.
1518 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1519 Ops.push_back(Op->getOperand(OpNo));
1520
1521 // Add In64 to the end of the list.
1522 if (In64.getNode())
1523 Ops.push_back(In64);
1524
1525 // Scan output.
1526 SmallVector<EVT, 2> ResTys;
1527
1528 for (EVT Ty : Op->values())
1529 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty);
1530
1531 // Create node.
1532 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
1533 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
1534
1535 if (!HasChainIn)
1536 return Out;
1537
1538 assert(Val->getValueType(1) == MVT::Other);
1539 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1540 return DAG.getMergeValues(Vals, DL);
1541}
1542
1543// Lower an MSA copy intrinsic into the specified SelectionDAG node
1545 SDLoc DL(Op);
1546 SDValue Vec = Op->getOperand(1);
1547 SDValue Idx = Op->getOperand(2);
1548 EVT ResTy = Op->getValueType(0);
1549 EVT EltTy = Vec->getValueType(0).getVectorElementType();
1550
1551 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1552 DAG.getValueType(EltTy));
1553
1554 return Result;
1555}
1556
1557static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1558 EVT ResVecTy = Op->getValueType(0);
1559 EVT ViaVecTy = ResVecTy;
1560 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1561 SDLoc DL(Op);
1562
1563 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1564 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1565 // lanes.
1566 SDValue LaneA = Op->getOperand(OpNr);
1567 SDValue LaneB;
1568
1569 if (ResVecTy == MVT::v2i64) {
1570 // In case of the index being passed as an immediate value, set the upper
1571 // lane to 0 so that the splati.d instruction can be matched.
1572 if (isa<ConstantSDNode>(LaneA))
1573 LaneB = DAG.getConstant(0, DL, MVT::i32);
1574 // Having the index passed in a register, set the upper lane to the same
1575 // value as the lower - this results in the BUILD_VECTOR node not being
1576 // expanded through stack. This way we are able to pattern match the set of
1577 // nodes created here to splat.d.
1578 else
1579 LaneB = LaneA;
1580 ViaVecTy = MVT::v4i32;
1581 if(BigEndian)
1582 std::swap(LaneA, LaneB);
1583 } else
1584 LaneB = LaneA;
1585
1586 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1587 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1588
1589 SDValue Result = DAG.getBuildVector(
1590 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1591
1592 if (ViaVecTy != ResVecTy) {
1593 SDValue One = DAG.getConstant(1, DL, ViaVecTy);
1594 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
1595 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
1596 }
1597
1598 return Result;
1599}
1600
1601static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1602 bool IsSigned = false) {
1603 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1604 return DAG.getConstant(
1605 APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
1606 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1607 SDLoc(Op), Op->getValueType(0));
1608}
1609
1610static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1611 bool BigEndian, SelectionDAG &DAG) {
1612 EVT ViaVecTy = VecTy;
1613 SDValue SplatValueA = SplatValue;
1614 SDValue SplatValueB = SplatValue;
1615 SDLoc DL(SplatValue);
1616
1617 if (VecTy == MVT::v2i64) {
1618 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1619 ViaVecTy = MVT::v4i32;
1620
1621 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1622 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1623 DAG.getConstant(32, DL, MVT::i32));
1624 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1625 }
1626
1627 // We currently hold the parts in little endian order. Swap them if
1628 // necessary.
1629 if (BigEndian)
1630 std::swap(SplatValueA, SplatValueB);
1631
1632 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1633 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1634 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1635 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1636
1637 SDValue Result = DAG.getBuildVector(
1638 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1639
1640 if (VecTy != ViaVecTy)
1641 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1642
1643 return Result;
1644}
1645
1647 unsigned Opc, SDValue Imm,
1648 bool BigEndian) {
1649 EVT VecTy = Op->getValueType(0);
1650 SDValue Exp2Imm;
1651 SDLoc DL(Op);
1652
1653 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1654 // here for now.
1655 if (VecTy == MVT::v2i64) {
1656 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1657 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1658
1659 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
1660 MVT::i32);
1661 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
1662
1663 if (BigEndian)
1664 std::swap(BitImmLoOp, BitImmHiOp);
1665
1666 Exp2Imm = DAG.getNode(
1667 ISD::BITCAST, DL, MVT::v2i64,
1668 DAG.getBuildVector(MVT::v4i32, DL,
1669 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1670 }
1671 }
1672
1673 if (!Exp2Imm.getNode()) {
1674 // We couldnt constant fold, do a vector shift instead
1675
1676 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1677 // only values 0-63 are valid.
1678 if (VecTy == MVT::v2i64)
1679 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1680
1681 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1682
1683 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
1684 Exp2Imm);
1685 }
1686
1687 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1688}
1689
1691 SDLoc DL(Op);
1692 EVT ResTy = Op->getValueType(0);
1693 SDValue Vec = Op->getOperand(2);
1694 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1695 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1696 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
1697 DL, ResEltTy);
1698 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
1699
1700 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
1701}
1702
1704 EVT ResTy = Op->getValueType(0);
1705 SDLoc DL(Op);
1706 SDValue One = DAG.getConstant(1, DL, ResTy);
1707 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
1708
1709 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1710 DAG.getNOT(DL, Bit, ResTy));
1711}
1712
1714 SDLoc DL(Op);
1715 EVT ResTy = Op->getValueType(0);
1716 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1717 << Op->getConstantOperandAPInt(2);
1718 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
1719
1720 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1721}
1722
1723SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1724 SelectionDAG &DAG) const {
1725 SDLoc DL(Op);
1726 unsigned Intrinsic = Op->getConstantOperandVal(0);
1727 switch (Intrinsic) {
1728 default:
1729 return SDValue();
1730 case Intrinsic::mips_shilo:
1731 return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1732 case Intrinsic::mips_dpau_h_qbl:
1733 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1734 case Intrinsic::mips_dpau_h_qbr:
1735 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1736 case Intrinsic::mips_dpsu_h_qbl:
1737 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1738 case Intrinsic::mips_dpsu_h_qbr:
1739 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1740 case Intrinsic::mips_dpa_w_ph:
1741 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1742 case Intrinsic::mips_dps_w_ph:
1743 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1744 case Intrinsic::mips_dpax_w_ph:
1745 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1746 case Intrinsic::mips_dpsx_w_ph:
1747 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1748 case Intrinsic::mips_mulsa_w_ph:
1749 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1750 case Intrinsic::mips_mult:
1751 return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1752 case Intrinsic::mips_multu:
1753 return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1754 case Intrinsic::mips_madd:
1755 return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1756 case Intrinsic::mips_maddu:
1757 return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1758 case Intrinsic::mips_msub:
1759 return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1760 case Intrinsic::mips_msubu:
1761 return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1762 case Intrinsic::mips_addv_b:
1763 case Intrinsic::mips_addv_h:
1764 case Intrinsic::mips_addv_w:
1765 case Intrinsic::mips_addv_d:
1766 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1767 Op->getOperand(2));
1768 case Intrinsic::mips_addvi_b:
1769 case Intrinsic::mips_addvi_h:
1770 case Intrinsic::mips_addvi_w:
1771 case Intrinsic::mips_addvi_d:
1772 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1773 lowerMSASplatImm(Op, 2, DAG));
1774 case Intrinsic::mips_and_v:
1775 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1776 Op->getOperand(2));
1777 case Intrinsic::mips_andi_b:
1778 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1779 lowerMSASplatImm(Op, 2, DAG));
1780 case Intrinsic::mips_bclr_b:
1781 case Intrinsic::mips_bclr_h:
1782 case Intrinsic::mips_bclr_w:
1783 case Intrinsic::mips_bclr_d:
1784 return lowerMSABitClear(Op, DAG);
1785 case Intrinsic::mips_bclri_b:
1786 case Intrinsic::mips_bclri_h:
1787 case Intrinsic::mips_bclri_w:
1788 case Intrinsic::mips_bclri_d:
1789 return lowerMSABitClearImm(Op, DAG);
1790 case Intrinsic::mips_binsli_b:
1791 case Intrinsic::mips_binsli_h:
1792 case Intrinsic::mips_binsli_w:
1793 case Intrinsic::mips_binsli_d: {
1794 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1795 EVT VecTy = Op->getValueType(0);
1796 EVT EltTy = VecTy.getVectorElementType();
1797 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1798 report_fatal_error("Immediate out of range");
1800 Op->getConstantOperandVal(3) + 1);
1801 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1802 DAG.getConstant(Mask, DL, VecTy, true),
1803 Op->getOperand(2), Op->getOperand(1));
1804 }
1805 case Intrinsic::mips_binsri_b:
1806 case Intrinsic::mips_binsri_h:
1807 case Intrinsic::mips_binsri_w:
1808 case Intrinsic::mips_binsri_d: {
1809 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1810 EVT VecTy = Op->getValueType(0);
1811 EVT EltTy = VecTy.getVectorElementType();
1812 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1813 report_fatal_error("Immediate out of range");
1814 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
1815 Op->getConstantOperandVal(3) + 1);
1816 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1817 DAG.getConstant(Mask, DL, VecTy, true),
1818 Op->getOperand(2), Op->getOperand(1));
1819 }
1820 case Intrinsic::mips_bmnz_v:
1821 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1822 Op->getOperand(2), Op->getOperand(1));
1823 case Intrinsic::mips_bmnzi_b:
1824 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1825 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1826 Op->getOperand(1));
1827 case Intrinsic::mips_bmz_v:
1828 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1829 Op->getOperand(1), Op->getOperand(2));
1830 case Intrinsic::mips_bmzi_b:
1831 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1832 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1833 Op->getOperand(2));
1834 case Intrinsic::mips_bneg_b:
1835 case Intrinsic::mips_bneg_h:
1836 case Intrinsic::mips_bneg_w:
1837 case Intrinsic::mips_bneg_d: {
1838 EVT VecTy = Op->getValueType(0);
1839 SDValue One = DAG.getConstant(1, DL, VecTy);
1840
1841 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1842 DAG.getNode(ISD::SHL, DL, VecTy, One,
1843 truncateVecElts(Op, DAG)));
1844 }
1845 case Intrinsic::mips_bnegi_b:
1846 case Intrinsic::mips_bnegi_h:
1847 case Intrinsic::mips_bnegi_w:
1848 case Intrinsic::mips_bnegi_d:
1849 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1850 !Subtarget.isLittle());
1851 case Intrinsic::mips_bnz_b:
1852 case Intrinsic::mips_bnz_h:
1853 case Intrinsic::mips_bnz_w:
1854 case Intrinsic::mips_bnz_d:
1855 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1856 Op->getOperand(1));
1857 case Intrinsic::mips_bnz_v:
1858 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1859 Op->getOperand(1));
1860 case Intrinsic::mips_bsel_v:
1861 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1862 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1863 Op->getOperand(1), Op->getOperand(3),
1864 Op->getOperand(2));
1865 case Intrinsic::mips_bseli_b:
1866 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1867 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1868 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1869 Op->getOperand(2));
1870 case Intrinsic::mips_bset_b:
1871 case Intrinsic::mips_bset_h:
1872 case Intrinsic::mips_bset_w:
1873 case Intrinsic::mips_bset_d: {
1874 EVT VecTy = Op->getValueType(0);
1875 SDValue One = DAG.getConstant(1, DL, VecTy);
1876
1877 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1878 DAG.getNode(ISD::SHL, DL, VecTy, One,
1879 truncateVecElts(Op, DAG)));
1880 }
1881 case Intrinsic::mips_bseti_b:
1882 case Intrinsic::mips_bseti_h:
1883 case Intrinsic::mips_bseti_w:
1884 case Intrinsic::mips_bseti_d:
1885 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1886 !Subtarget.isLittle());
1887 case Intrinsic::mips_bz_b:
1888 case Intrinsic::mips_bz_h:
1889 case Intrinsic::mips_bz_w:
1890 case Intrinsic::mips_bz_d:
1891 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1892 Op->getOperand(1));
1893 case Intrinsic::mips_bz_v:
1894 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1895 Op->getOperand(1));
1896 case Intrinsic::mips_ceq_b:
1897 case Intrinsic::mips_ceq_h:
1898 case Intrinsic::mips_ceq_w:
1899 case Intrinsic::mips_ceq_d:
1900 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1901 Op->getOperand(2), ISD::SETEQ);
1902 case Intrinsic::mips_ceqi_b:
1903 case Intrinsic::mips_ceqi_h:
1904 case Intrinsic::mips_ceqi_w:
1905 case Intrinsic::mips_ceqi_d:
1906 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1907 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
1908 case Intrinsic::mips_cle_s_b:
1909 case Intrinsic::mips_cle_s_h:
1910 case Intrinsic::mips_cle_s_w:
1911 case Intrinsic::mips_cle_s_d:
1912 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1913 Op->getOperand(2), ISD::SETLE);
1914 case Intrinsic::mips_clei_s_b:
1915 case Intrinsic::mips_clei_s_h:
1916 case Intrinsic::mips_clei_s_w:
1917 case Intrinsic::mips_clei_s_d:
1918 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1919 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
1920 case Intrinsic::mips_cle_u_b:
1921 case Intrinsic::mips_cle_u_h:
1922 case Intrinsic::mips_cle_u_w:
1923 case Intrinsic::mips_cle_u_d:
1924 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1925 Op->getOperand(2), ISD::SETULE);
1926 case Intrinsic::mips_clei_u_b:
1927 case Intrinsic::mips_clei_u_h:
1928 case Intrinsic::mips_clei_u_w:
1929 case Intrinsic::mips_clei_u_d:
1930 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1931 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1932 case Intrinsic::mips_clt_s_b:
1933 case Intrinsic::mips_clt_s_h:
1934 case Intrinsic::mips_clt_s_w:
1935 case Intrinsic::mips_clt_s_d:
1936 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1937 Op->getOperand(2), ISD::SETLT);
1938 case Intrinsic::mips_clti_s_b:
1939 case Intrinsic::mips_clti_s_h:
1940 case Intrinsic::mips_clti_s_w:
1941 case Intrinsic::mips_clti_s_d:
1942 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1943 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
1944 case Intrinsic::mips_clt_u_b:
1945 case Intrinsic::mips_clt_u_h:
1946 case Intrinsic::mips_clt_u_w:
1947 case Intrinsic::mips_clt_u_d:
1948 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1949 Op->getOperand(2), ISD::SETULT);
1950 case Intrinsic::mips_clti_u_b:
1951 case Intrinsic::mips_clti_u_h:
1952 case Intrinsic::mips_clti_u_w:
1953 case Intrinsic::mips_clti_u_d:
1954 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1955 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1956 case Intrinsic::mips_copy_s_b:
1957 case Intrinsic::mips_copy_s_h:
1958 case Intrinsic::mips_copy_s_w:
1959 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1960 case Intrinsic::mips_copy_s_d:
1961 if (Subtarget.hasMips64())
1962 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1963 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1964 else {
1965 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1966 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1967 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1968 Op->getValueType(0), Op->getOperand(1),
1969 Op->getOperand(2));
1970 }
1971 case Intrinsic::mips_copy_u_b:
1972 case Intrinsic::mips_copy_u_h:
1973 case Intrinsic::mips_copy_u_w:
1974 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1975 case Intrinsic::mips_copy_u_d:
1976 if (Subtarget.hasMips64())
1977 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1978 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1979 else {
1980 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1981 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1982 // Note: When i64 is illegal, this results in copy_s.w instructions
1983 // instead of copy_u.w instructions. This makes no difference to the
1984 // behaviour since i64 is only illegal when the register file is 32-bit.
1985 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1986 Op->getValueType(0), Op->getOperand(1),
1987 Op->getOperand(2));
1988 }
1989 case Intrinsic::mips_div_s_b:
1990 case Intrinsic::mips_div_s_h:
1991 case Intrinsic::mips_div_s_w:
1992 case Intrinsic::mips_div_s_d:
1993 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1994 Op->getOperand(2));
1995 case Intrinsic::mips_div_u_b:
1996 case Intrinsic::mips_div_u_h:
1997 case Intrinsic::mips_div_u_w:
1998 case Intrinsic::mips_div_u_d:
1999 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
2000 Op->getOperand(2));
2001 case Intrinsic::mips_fadd_w:
2002 case Intrinsic::mips_fadd_d:
2003 // TODO: If intrinsics have fast-math-flags, propagate them.
2004 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
2005 Op->getOperand(2));
2006 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
2007 case Intrinsic::mips_fceq_w:
2008 case Intrinsic::mips_fceq_d:
2009 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2010 Op->getOperand(2), ISD::SETOEQ);
2011 case Intrinsic::mips_fcle_w:
2012 case Intrinsic::mips_fcle_d:
2013 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2014 Op->getOperand(2), ISD::SETOLE);
2015 case Intrinsic::mips_fclt_w:
2016 case Intrinsic::mips_fclt_d:
2017 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2018 Op->getOperand(2), ISD::SETOLT);
2019 case Intrinsic::mips_fcne_w:
2020 case Intrinsic::mips_fcne_d:
2021 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2022 Op->getOperand(2), ISD::SETONE);
2023 case Intrinsic::mips_fcor_w:
2024 case Intrinsic::mips_fcor_d:
2025 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2026 Op->getOperand(2), ISD::SETO);
2027 case Intrinsic::mips_fcueq_w:
2028 case Intrinsic::mips_fcueq_d:
2029 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2030 Op->getOperand(2), ISD::SETUEQ);
2031 case Intrinsic::mips_fcule_w:
2032 case Intrinsic::mips_fcule_d:
2033 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2034 Op->getOperand(2), ISD::SETULE);
2035 case Intrinsic::mips_fcult_w:
2036 case Intrinsic::mips_fcult_d:
2037 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2038 Op->getOperand(2), ISD::SETULT);
2039 case Intrinsic::mips_fcun_w:
2040 case Intrinsic::mips_fcun_d:
2041 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2042 Op->getOperand(2), ISD::SETUO);
2043 case Intrinsic::mips_fcune_w:
2044 case Intrinsic::mips_fcune_d:
2045 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
2046 Op->getOperand(2), ISD::SETUNE);
2047 case Intrinsic::mips_fdiv_w:
2048 case Intrinsic::mips_fdiv_d:
2049 // TODO: If intrinsics have fast-math-flags, propagate them.
2050 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
2051 Op->getOperand(2));
2052 case Intrinsic::mips_ffint_u_w:
2053 case Intrinsic::mips_ffint_u_d:
2054 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
2055 Op->getOperand(1));
2056 case Intrinsic::mips_ffint_s_w:
2057 case Intrinsic::mips_ffint_s_d:
2058 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
2059 Op->getOperand(1));
2060 case Intrinsic::mips_fill_b:
2061 case Intrinsic::mips_fill_h:
2062 case Intrinsic::mips_fill_w:
2063 case Intrinsic::mips_fill_d: {
2064 EVT ResTy = Op->getValueType(0);
2066 Op->getOperand(1));
2067
2068 // If ResTy is v2i64 then the type legalizer will break this node down into
2069 // an equivalent v4i32.
2070 return DAG.getBuildVector(ResTy, DL, Ops);
2071 }
2072 case Intrinsic::mips_fexp2_w:
2073 case Intrinsic::mips_fexp2_d: {
2074 // TODO: If intrinsics have fast-math-flags, propagate them.
2075 EVT ResTy = Op->getValueType(0);
2076 return DAG.getNode(
2077 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
2078 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
2079 }
2080 case Intrinsic::mips_flog2_w:
2081 case Intrinsic::mips_flog2_d:
2082 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
2083 case Intrinsic::mips_fmadd_w:
2084 case Intrinsic::mips_fmadd_d:
2085 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
2086 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2087 case Intrinsic::mips_fmul_w:
2088 case Intrinsic::mips_fmul_d:
2089 // TODO: If intrinsics have fast-math-flags, propagate them.
2090 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
2091 Op->getOperand(2));
2092 case Intrinsic::mips_fmsub_w:
2093 case Intrinsic::mips_fmsub_d: {
2094 // TODO: If intrinsics have fast-math-flags, propagate them.
2095 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
2096 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2097 }
2098 case Intrinsic::mips_frint_w:
2099 case Intrinsic::mips_frint_d:
2100 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
2101 case Intrinsic::mips_fsqrt_w:
2102 case Intrinsic::mips_fsqrt_d:
2103 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
2104 case Intrinsic::mips_fsub_w:
2105 case Intrinsic::mips_fsub_d:
2106 // TODO: If intrinsics have fast-math-flags, propagate them.
2107 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
2108 Op->getOperand(2));
2109 case Intrinsic::mips_ftrunc_u_w:
2110 case Intrinsic::mips_ftrunc_u_d:
2111 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
2112 Op->getOperand(1));
2113 case Intrinsic::mips_ftrunc_s_w:
2114 case Intrinsic::mips_ftrunc_s_d:
2115 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
2116 Op->getOperand(1));
2117 case Intrinsic::mips_ilvev_b:
2118 case Intrinsic::mips_ilvev_h:
2119 case Intrinsic::mips_ilvev_w:
2120 case Intrinsic::mips_ilvev_d:
2121 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
2122 Op->getOperand(1), Op->getOperand(2));
2123 case Intrinsic::mips_ilvl_b:
2124 case Intrinsic::mips_ilvl_h:
2125 case Intrinsic::mips_ilvl_w:
2126 case Intrinsic::mips_ilvl_d:
2127 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
2128 Op->getOperand(1), Op->getOperand(2));
2129 case Intrinsic::mips_ilvod_b:
2130 case Intrinsic::mips_ilvod_h:
2131 case Intrinsic::mips_ilvod_w:
2132 case Intrinsic::mips_ilvod_d:
2133 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
2134 Op->getOperand(1), Op->getOperand(2));
2135 case Intrinsic::mips_ilvr_b:
2136 case Intrinsic::mips_ilvr_h:
2137 case Intrinsic::mips_ilvr_w:
2138 case Intrinsic::mips_ilvr_d:
2139 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
2140 Op->getOperand(1), Op->getOperand(2));
2141 case Intrinsic::mips_insert_b:
2142 case Intrinsic::mips_insert_h:
2143 case Intrinsic::mips_insert_w:
2144 case Intrinsic::mips_insert_d:
2145 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
2146 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
2147 case Intrinsic::mips_insve_b:
2148 case Intrinsic::mips_insve_h:
2149 case Intrinsic::mips_insve_w:
2150 case Intrinsic::mips_insve_d: {
2151 // Report an error for out of range values.
2152 int64_t Max;
2153 switch (Intrinsic) {
2154 case Intrinsic::mips_insve_b: Max = 15; break;
2155 case Intrinsic::mips_insve_h: Max = 7; break;
2156 case Intrinsic::mips_insve_w: Max = 3; break;
2157 case Intrinsic::mips_insve_d: Max = 1; break;
2158 default: llvm_unreachable("Unmatched intrinsic");
2159 }
2160 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2161 if (Value < 0 || Value > Max)
2162 report_fatal_error("Immediate out of range");
2163 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
2164 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
2165 DAG.getConstant(0, DL, MVT::i32));
2166 }
2167 case Intrinsic::mips_ldi_b:
2168 case Intrinsic::mips_ldi_h:
2169 case Intrinsic::mips_ldi_w:
2170 case Intrinsic::mips_ldi_d:
2171 return lowerMSASplatImm(Op, 1, DAG, true);
2172 case Intrinsic::mips_lsa:
2173 case Intrinsic::mips_dlsa: {
2174 EVT ResTy = Op->getValueType(0);
2175 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
2176 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
2177 Op->getOperand(2), Op->getOperand(3)));
2178 }
2179 case Intrinsic::mips_maddv_b:
2180 case Intrinsic::mips_maddv_h:
2181 case Intrinsic::mips_maddv_w:
2182 case Intrinsic::mips_maddv_d: {
2183 EVT ResTy = Op->getValueType(0);
2184 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
2185 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2186 Op->getOperand(2), Op->getOperand(3)));
2187 }
2188 case Intrinsic::mips_max_s_b:
2189 case Intrinsic::mips_max_s_h:
2190 case Intrinsic::mips_max_s_w:
2191 case Intrinsic::mips_max_s_d:
2192 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2193 Op->getOperand(1), Op->getOperand(2));
2194 case Intrinsic::mips_max_u_b:
2195 case Intrinsic::mips_max_u_h:
2196 case Intrinsic::mips_max_u_w:
2197 case Intrinsic::mips_max_u_d:
2198 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2199 Op->getOperand(1), Op->getOperand(2));
2200 case Intrinsic::mips_maxi_s_b:
2201 case Intrinsic::mips_maxi_s_h:
2202 case Intrinsic::mips_maxi_s_w:
2203 case Intrinsic::mips_maxi_s_d:
2204 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2205 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2206 case Intrinsic::mips_maxi_u_b:
2207 case Intrinsic::mips_maxi_u_h:
2208 case Intrinsic::mips_maxi_u_w:
2209 case Intrinsic::mips_maxi_u_d:
2210 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2211 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2212 case Intrinsic::mips_min_s_b:
2213 case Intrinsic::mips_min_s_h:
2214 case Intrinsic::mips_min_s_w:
2215 case Intrinsic::mips_min_s_d:
2216 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2217 Op->getOperand(1), Op->getOperand(2));
2218 case Intrinsic::mips_min_u_b:
2219 case Intrinsic::mips_min_u_h:
2220 case Intrinsic::mips_min_u_w:
2221 case Intrinsic::mips_min_u_d:
2222 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2223 Op->getOperand(1), Op->getOperand(2));
2224 case Intrinsic::mips_mini_s_b:
2225 case Intrinsic::mips_mini_s_h:
2226 case Intrinsic::mips_mini_s_w:
2227 case Intrinsic::mips_mini_s_d:
2228 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2229 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2230 case Intrinsic::mips_mini_u_b:
2231 case Intrinsic::mips_mini_u_h:
2232 case Intrinsic::mips_mini_u_w:
2233 case Intrinsic::mips_mini_u_d:
2234 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2235 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2236 case Intrinsic::mips_mod_s_b:
2237 case Intrinsic::mips_mod_s_h:
2238 case Intrinsic::mips_mod_s_w:
2239 case Intrinsic::mips_mod_s_d:
2240 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
2241 Op->getOperand(2));
2242 case Intrinsic::mips_mod_u_b:
2243 case Intrinsic::mips_mod_u_h:
2244 case Intrinsic::mips_mod_u_w:
2245 case Intrinsic::mips_mod_u_d:
2246 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
2247 Op->getOperand(2));
2248 case Intrinsic::mips_mulv_b:
2249 case Intrinsic::mips_mulv_h:
2250 case Intrinsic::mips_mulv_w:
2251 case Intrinsic::mips_mulv_d:
2252 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
2253 Op->getOperand(2));
2254 case Intrinsic::mips_msubv_b:
2255 case Intrinsic::mips_msubv_h:
2256 case Intrinsic::mips_msubv_w:
2257 case Intrinsic::mips_msubv_d: {
2258 EVT ResTy = Op->getValueType(0);
2259 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
2260 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2261 Op->getOperand(2), Op->getOperand(3)));
2262 }
2263 case Intrinsic::mips_nlzc_b:
2264 case Intrinsic::mips_nlzc_h:
2265 case Intrinsic::mips_nlzc_w:
2266 case Intrinsic::mips_nlzc_d:
2267 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
2268 case Intrinsic::mips_nor_v: {
2269 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2270 Op->getOperand(1), Op->getOperand(2));
2271 return DAG.getNOT(DL, Res, Res->getValueType(0));
2272 }
2273 case Intrinsic::mips_nori_b: {
2274 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2275 Op->getOperand(1),
2276 lowerMSASplatImm(Op, 2, DAG));
2277 return DAG.getNOT(DL, Res, Res->getValueType(0));
2278 }
2279 case Intrinsic::mips_or_v:
2280 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
2281 Op->getOperand(2));
2282 case Intrinsic::mips_ori_b:
2283 return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2284 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2285 case Intrinsic::mips_pckev_b:
2286 case Intrinsic::mips_pckev_h:
2287 case Intrinsic::mips_pckev_w:
2288 case Intrinsic::mips_pckev_d:
2289 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
2290 Op->getOperand(1), Op->getOperand(2));
2291 case Intrinsic::mips_pckod_b:
2292 case Intrinsic::mips_pckod_h:
2293 case Intrinsic::mips_pckod_w:
2294 case Intrinsic::mips_pckod_d:
2295 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
2296 Op->getOperand(1), Op->getOperand(2));
2297 case Intrinsic::mips_pcnt_b:
2298 case Intrinsic::mips_pcnt_h:
2299 case Intrinsic::mips_pcnt_w:
2300 case Intrinsic::mips_pcnt_d:
2301 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
2302 case Intrinsic::mips_sat_s_b:
2303 case Intrinsic::mips_sat_s_h:
2304 case Intrinsic::mips_sat_s_w:
2305 case Intrinsic::mips_sat_s_d:
2306 case Intrinsic::mips_sat_u_b:
2307 case Intrinsic::mips_sat_u_h:
2308 case Intrinsic::mips_sat_u_w:
2309 case Intrinsic::mips_sat_u_d: {
2310 // Report an error for out of range values.
2311 int64_t Max;
2312 switch (Intrinsic) {
2313 case Intrinsic::mips_sat_s_b:
2314 case Intrinsic::mips_sat_u_b: Max = 7; break;
2315 case Intrinsic::mips_sat_s_h:
2316 case Intrinsic::mips_sat_u_h: Max = 15; break;
2317 case Intrinsic::mips_sat_s_w:
2318 case Intrinsic::mips_sat_u_w: Max = 31; break;
2319 case Intrinsic::mips_sat_s_d:
2320 case Intrinsic::mips_sat_u_d: Max = 63; break;
2321 default: llvm_unreachable("Unmatched intrinsic");
2322 }
2323 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2324 if (Value < 0 || Value > Max)
2325 report_fatal_error("Immediate out of range");
2326 return SDValue();
2327 }
2328 case Intrinsic::mips_shf_b:
2329 case Intrinsic::mips_shf_h:
2330 case Intrinsic::mips_shf_w: {
2331 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2332 if (Value < 0 || Value > 255)
2333 report_fatal_error("Immediate out of range");
2334 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
2335 Op->getOperand(2), Op->getOperand(1));
2336 }
2337 case Intrinsic::mips_sldi_b:
2338 case Intrinsic::mips_sldi_h:
2339 case Intrinsic::mips_sldi_w:
2340 case Intrinsic::mips_sldi_d: {
2341 // Report an error for out of range values.
2342 int64_t Max;
2343 switch (Intrinsic) {
2344 case Intrinsic::mips_sldi_b: Max = 15; break;
2345 case Intrinsic::mips_sldi_h: Max = 7; break;
2346 case Intrinsic::mips_sldi_w: Max = 3; break;
2347 case Intrinsic::mips_sldi_d: Max = 1; break;
2348 default: llvm_unreachable("Unmatched intrinsic");
2349 }
2350 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
2351 if (Value < 0 || Value > Max)
2352 report_fatal_error("Immediate out of range");
2353 return SDValue();
2354 }
2355 case Intrinsic::mips_sll_b:
2356 case Intrinsic::mips_sll_h:
2357 case Intrinsic::mips_sll_w:
2358 case Intrinsic::mips_sll_d:
2359 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
2360 truncateVecElts(Op, DAG));
2361 case Intrinsic::mips_slli_b:
2362 case Intrinsic::mips_slli_h:
2363 case Intrinsic::mips_slli_w:
2364 case Intrinsic::mips_slli_d:
2365 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
2366 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2367 case Intrinsic::mips_splat_b:
2368 case Intrinsic::mips_splat_h:
2369 case Intrinsic::mips_splat_w:
2370 case Intrinsic::mips_splat_d:
2371 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2372 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2373 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2374 // Instead we lower to MipsISD::VSHF and match from there.
2375 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2376 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
2377 Op->getOperand(1));
2378 case Intrinsic::mips_splati_b:
2379 case Intrinsic::mips_splati_h:
2380 case Intrinsic::mips_splati_w:
2381 case Intrinsic::mips_splati_d:
2382 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2383 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
2384 Op->getOperand(1));
2385 case Intrinsic::mips_sra_b:
2386 case Intrinsic::mips_sra_h:
2387 case Intrinsic::mips_sra_w:
2388 case Intrinsic::mips_sra_d:
2389 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
2390 truncateVecElts(Op, DAG));
2391 case Intrinsic::mips_srai_b:
2392 case Intrinsic::mips_srai_h:
2393 case Intrinsic::mips_srai_w:
2394 case Intrinsic::mips_srai_d:
2395 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2396 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2397 case Intrinsic::mips_srari_b:
2398 case Intrinsic::mips_srari_h:
2399 case Intrinsic::mips_srari_w:
2400 case Intrinsic::mips_srari_d: {
2401 // Report an error for out of range values.
2402 int64_t Max;
2403 switch (Intrinsic) {
2404 case Intrinsic::mips_srari_b: Max = 7; break;
2405 case Intrinsic::mips_srari_h: Max = 15; break;
2406 case Intrinsic::mips_srari_w: Max = 31; break;
2407 case Intrinsic::mips_srari_d: Max = 63; break;
2408 default: llvm_unreachable("Unmatched intrinsic");
2409 }
2410 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2411 if (Value < 0 || Value > Max)
2412 report_fatal_error("Immediate out of range");
2413 return SDValue();
2414 }
2415 case Intrinsic::mips_srl_b:
2416 case Intrinsic::mips_srl_h:
2417 case Intrinsic::mips_srl_w:
2418 case Intrinsic::mips_srl_d:
2419 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2420 truncateVecElts(Op, DAG));
2421 case Intrinsic::mips_srli_b:
2422 case Intrinsic::mips_srli_h:
2423 case Intrinsic::mips_srli_w:
2424 case Intrinsic::mips_srli_d:
2425 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2426 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2427 case Intrinsic::mips_srlri_b:
2428 case Intrinsic::mips_srlri_h:
2429 case Intrinsic::mips_srlri_w:
2430 case Intrinsic::mips_srlri_d: {
2431 // Report an error for out of range values.
2432 int64_t Max;
2433 switch (Intrinsic) {
2434 case Intrinsic::mips_srlri_b: Max = 7; break;
2435 case Intrinsic::mips_srlri_h: Max = 15; break;
2436 case Intrinsic::mips_srlri_w: Max = 31; break;
2437 case Intrinsic::mips_srlri_d: Max = 63; break;
2438 default: llvm_unreachable("Unmatched intrinsic");
2439 }
2440 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2441 if (Value < 0 || Value > Max)
2442 report_fatal_error("Immediate out of range");
2443 return SDValue();
2444 }
2445 case Intrinsic::mips_subv_b:
2446 case Intrinsic::mips_subv_h:
2447 case Intrinsic::mips_subv_w:
2448 case Intrinsic::mips_subv_d:
2449 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2450 Op->getOperand(2));
2451 case Intrinsic::mips_subvi_b:
2452 case Intrinsic::mips_subvi_h:
2453 case Intrinsic::mips_subvi_w:
2454 case Intrinsic::mips_subvi_d:
2455 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2456 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2457 case Intrinsic::mips_vshf_b:
2458 case Intrinsic::mips_vshf_h:
2459 case Intrinsic::mips_vshf_w:
2460 case Intrinsic::mips_vshf_d:
2461 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2462 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2463 case Intrinsic::mips_xor_v:
2464 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2465 Op->getOperand(2));
2466 case Intrinsic::mips_xori_b:
2467 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2468 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2469 case Intrinsic::thread_pointer: {
2470 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2471 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
2472 }
2473 }
2474}
2475
2476static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2477 const MipsSubtarget &Subtarget) {
2478 SDLoc DL(Op);
2479 SDValue ChainIn = Op->getOperand(0);
2480 SDValue Address = Op->getOperand(2);
2481 SDValue Offset = Op->getOperand(3);
2482 EVT ResTy = Op->getValueType(0);
2483 EVT PtrTy = Address->getValueType(0);
2484
2485 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2486 // however takes an i32 signed constant offset. The actual type of the
2487 // intrinsic is a scaled signed i10.
2488 if (Subtarget.isABI_N64())
2489 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2490
2491 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2492 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
2493 Align(16));
2494}
2495
2496SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2497 SelectionDAG &DAG) const {
2498 unsigned Intr = Op->getConstantOperandVal(1);
2499 switch (Intr) {
2500 default:
2501 return SDValue();
2502 case Intrinsic::mips_extp:
2503 return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2504 case Intrinsic::mips_extpdp:
2505 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2506 case Intrinsic::mips_extr_w:
2507 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2508 case Intrinsic::mips_extr_r_w:
2509 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2510 case Intrinsic::mips_extr_rs_w:
2511 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2512 case Intrinsic::mips_extr_s_h:
2513 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2514 case Intrinsic::mips_mthlip:
2515 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2516 case Intrinsic::mips_mulsaq_s_w_ph:
2517 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
2518 case Intrinsic::mips_maq_s_w_phl:
2519 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2520 case Intrinsic::mips_maq_s_w_phr:
2521 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2522 case Intrinsic::mips_maq_sa_w_phl:
2523 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
2524 case Intrinsic::mips_maq_sa_w_phr:
2525 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
2526 case Intrinsic::mips_dpaq_s_w_ph:
2527 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2528 case Intrinsic::mips_dpsq_s_w_ph:
2529 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2530 case Intrinsic::mips_dpaq_sa_l_w:
2531 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2532 case Intrinsic::mips_dpsq_sa_l_w:
2533 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2534 case Intrinsic::mips_dpaqx_s_w_ph:
2535 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
2536 case Intrinsic::mips_dpaqx_sa_w_ph:
2537 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
2538 case Intrinsic::mips_dpsqx_s_w_ph:
2539 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
2540 case Intrinsic::mips_dpsqx_sa_w_ph:
2541 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
2542 case Intrinsic::mips_ld_b:
2543 case Intrinsic::mips_ld_h:
2544 case Intrinsic::mips_ld_w:
2545 case Intrinsic::mips_ld_d:
2546 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2547 }
2548}
2549
2551 const MipsSubtarget &Subtarget) {
2552 SDLoc DL(Op);
2553 SDValue ChainIn = Op->getOperand(0);
2554 SDValue Value = Op->getOperand(2);
2555 SDValue Address = Op->getOperand(3);
2556 SDValue Offset = Op->getOperand(4);
2557 EVT PtrTy = Address->getValueType(0);
2558
2559 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2560 // however takes an i32 signed constant offset. The actual type of the
2561 // intrinsic is a scaled signed i10.
2562 if (Subtarget.isABI_N64())
2563 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2564
2565 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2566
2567 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
2568 Align(16));
2569}
2570
2571SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2572 SelectionDAG &DAG) const {
2573 unsigned Intr = Op->getConstantOperandVal(1);
2574 switch (Intr) {
2575 default:
2576 return SDValue();
2577 case Intrinsic::mips_st_b:
2578 case Intrinsic::mips_st_h:
2579 case Intrinsic::mips_st_w:
2580 case Intrinsic::mips_st_d:
2581 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2582 }
2583}
2584
2585// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2586//
2587// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2588// choose to sign-extend but we could have equally chosen zero-extend. The
2589// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2590// result into this node later (possibly changing it to a zero-extend in the
2591// process).
2592SDValue MipsSETargetLowering::
2593lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2594 SDLoc DL(Op);
2595 EVT ResTy = Op->getValueType(0);
2596 SDValue Op0 = Op->getOperand(0);
2597 EVT VecTy = Op0->getValueType(0);
2598
2599 if (!VecTy.is128BitVector())
2600 return SDValue();
2601
2602 if (ResTy.isInteger()) {
2603 SDValue Op1 = Op->getOperand(1);
2604 EVT EltTy = VecTy.getVectorElementType();
2605 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2606 DAG.getValueType(EltTy));
2607 }
2608
2609 return Op;
2610}
2611
2612static bool isConstantOrUndef(const SDValue Op) {
2613 if (Op->isUndef())
2614 return true;
2616 return true;
2618 return true;
2619 return false;
2620}
2621
2623 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2624 if (isConstantOrUndef(Op->getOperand(i)))
2625 return true;
2626 return false;
2627}
2628
2629// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2630// backend.
2631//
2632// Lowers according to the following rules:
2633// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2634// 2 less than or equal to 64 and the value fits into a signed 10-bit
2635// immediate
2636// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2637// is a power of 2 less than or equal to 64 and the value does not fit into a
2638// signed 10-bit immediate
2639// - Non-constant splats are legal as-is.
2640// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2641// - All others are illegal and must be expanded.
2642SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2643 SelectionDAG &DAG) const {
2644 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2645 EVT ResTy = Op->getValueType(0);
2646 SDLoc DL(Op);
2647 APInt SplatValue, SplatUndef;
2648 unsigned SplatBitSize;
2649 bool HasAnyUndefs;
2650
2651 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2652 return SDValue();
2653
2654 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2655 HasAnyUndefs, 8,
2656 !Subtarget.isLittle()) && SplatBitSize <= 64) {
2657 // We can only cope with 8, 16, 32, or 64-bit elements
2658 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2659 SplatBitSize != 64)
2660 return SDValue();
2661
2662 // If the value isn't an integer type we will have to bitcast
2663 // from an integer type first. Also, if there are any undefs, we must
2664 // lower them to defined values first.
2665 if (ResTy.isInteger() && !HasAnyUndefs)
2666 return Op;
2667
2668 EVT ViaVecTy;
2669
2670 switch (SplatBitSize) {
2671 default:
2672 return SDValue();
2673 case 8:
2674 ViaVecTy = MVT::v16i8;
2675 break;
2676 case 16:
2677 ViaVecTy = MVT::v8i16;
2678 break;
2679 case 32:
2680 ViaVecTy = MVT::v4i32;
2681 break;
2682 case 64:
2683 // There's no fill.d to fall back on for 64-bit values
2684 return SDValue();
2685 }
2686
2687 // SelectionDAG::getConstant will promote SplatValue appropriately.
2688 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2689
2690 // Bitcast to the type we originally wanted
2691 if (ViaVecTy != ResTy)
2692 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2693
2694 return Result;
2695 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
2696 return Op;
2697 else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2698 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2699 // The resulting code is the same length as the expansion, but it doesn't
2700 // use memory operations
2701 EVT ResTy = Node->getValueType(0);
2702
2703 assert(ResTy.isVector());
2704
2705 unsigned NumElts = ResTy.getVectorNumElements();
2706 SDValue Vector = DAG.getUNDEF(ResTy);
2707 for (unsigned i = 0; i < NumElts; ++i) {
2709 Node->getOperand(i),
2710 DAG.getConstant(i, DL, MVT::i32));
2711 }
2712 return Vector;
2713 }
2714
2715 return SDValue();
2716}
2717
2718// Lower VECTOR_SHUFFLE into SHF (if possible).
2719//
2720// SHF splits the vector into blocks of four elements, then shuffles these
2721// elements according to a <4 x i2> constant (encoded as an integer immediate).
2722//
2723// It is therefore possible to lower into SHF when the mask takes the form:
2724// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2725// When undef's appear they are treated as if they were whatever value is
2726// necessary in order to fit the above forms.
2727//
2728// For example:
2729// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2730// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2731// i32 7, i32 6, i32 5, i32 4>
2732// is lowered to:
2733// (SHF_H $w0, $w1, 27)
2734// where the 27 comes from:
2735// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2737 SmallVector<int, 16> Indices,
2738 SelectionDAG &DAG) {
2739 int SHFIndices[4] = { -1, -1, -1, -1 };
2740
2741 if (Indices.size() < 4)
2742 return SDValue();
2743
2744 for (unsigned i = 0; i < 4; ++i) {
2745 for (unsigned j = i; j < Indices.size(); j += 4) {
2746 int Idx = Indices[j];
2747
2748 // Convert from vector index to 4-element subvector index
2749 // If an index refers to an element outside of the subvector then give up
2750 if (Idx != -1) {
2751 Idx -= 4 * (j / 4);
2752 if (Idx < 0 || Idx >= 4)
2753 return SDValue();
2754 }
2755
2756 // If the mask has an undef, replace it with the current index.
2757 // Note that it might still be undef if the current index is also undef
2758 if (SHFIndices[i] == -1)
2759 SHFIndices[i] = Idx;
2760
2761 // Check that non-undef values are the same as in the mask. If they
2762 // aren't then give up
2763 if (!(Idx == -1 || Idx == SHFIndices[i]))
2764 return SDValue();
2765 }
2766 }
2767
2768 // Calculate the immediate. Replace any remaining undefs with zero
2769 APInt Imm(32, 0);
2770 for (int i = 3; i >= 0; --i) {
2771 int Idx = SHFIndices[i];
2772
2773 if (Idx == -1)
2774 Idx = 0;
2775
2776 Imm <<= 2;
2777 Imm |= Idx & 0x3;
2778 }
2779
2780 SDLoc DL(Op);
2781 return DAG.getNode(MipsISD::SHF, DL, ResTy,
2782 DAG.getTargetConstant(Imm, DL, MVT::i32),
2783 Op->getOperand(0));
2784}
2785
2786/// Determine whether a range fits a regular pattern of values.
2787/// This function accounts for the possibility of jumping over the End iterator.
2788template <typename ValType>
2789static bool
2791 unsigned CheckStride,
2793 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2794 auto &I = Begin;
2795
2796 while (I != End) {
2797 if (*I != -1 && *I != ExpectedIndex)
2798 return false;
2799 ExpectedIndex += ExpectedIndexStride;
2800
2801 // Incrementing past End is undefined behaviour so we must increment one
2802 // step at a time and check for End at each step.
2803 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2804 ; // Empty loop body.
2805 }
2806 return true;
2807}
2808
2809// Determine whether VECTOR_SHUFFLE is a SPLATI.
2810//
2811// It is a SPLATI when the mask is:
2812// <x, x, x, ...>
2813// where x is any valid index.
2814//
2815// When undef's appear in the mask they are treated as if they were whatever
2816// value is necessary in order to fit the above form.
2818 SmallVector<int, 16> Indices,
2819 SelectionDAG &DAG) {
2820 assert((Indices.size() % 2) == 0);
2821
2822 int SplatIndex = -1;
2823 for (const auto &V : Indices) {
2824 if (V != -1) {
2825 SplatIndex = V;
2826 break;
2827 }
2828 }
2829
2830 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
2831 0);
2832}
2833
2834// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2835//
2836// ILVEV interleaves the even elements from each vector.
2837//
2838// It is possible to lower into ILVEV when the mask consists of two of the
2839// following forms interleaved:
2840// <0, 2, 4, ...>
2841// <n, n+2, n+4, ...>
2842// where n is the number of elements in the vector.
2843// For example:
2844// <0, 0, 2, 2, 4, 4, ...>
2845// <0, n, 2, n+2, 4, n+4, ...>
2846//
2847// When undef's appear in the mask they are treated as if they were whatever
2848// value is necessary in order to fit the above forms.
2850 SmallVector<int, 16> Indices,
2851 SelectionDAG &DAG) {
2852 assert((Indices.size() % 2) == 0);
2853
2854 SDValue Wt;
2855 SDValue Ws;
2856 const auto &Begin = Indices.begin();
2857 const auto &End = Indices.end();
2858
2859 // Check even elements are taken from the even elements of one half or the
2860 // other and pick an operand accordingly.
2861 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2862 Wt = Op->getOperand(0);
2863 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
2864 Wt = Op->getOperand(1);
2865 else
2866 return SDValue();
2867
2868 // Check odd elements are taken from the even elements of one half or the
2869 // other and pick an operand accordingly.
2870 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2871 Ws = Op->getOperand(0);
2872 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
2873 Ws = Op->getOperand(1);
2874 else
2875 return SDValue();
2876
2877 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
2878}
2879
2880// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2881//
2882// ILVOD interleaves the odd elements from each vector.
2883//
2884// It is possible to lower into ILVOD when the mask consists of two of the
2885// following forms interleaved:
2886// <1, 3, 5, ...>
2887// <n+1, n+3, n+5, ...>
2888// where n is the number of elements in the vector.
2889// For example:
2890// <1, 1, 3, 3, 5, 5, ...>
2891// <1, n+1, 3, n+3, 5, n+5, ...>
2892//
2893// When undef's appear in the mask they are treated as if they were whatever
2894// value is necessary in order to fit the above forms.
2896 SmallVector<int, 16> Indices,
2897 SelectionDAG &DAG) {
2898 assert((Indices.size() % 2) == 0);
2899
2900 SDValue Wt;
2901 SDValue Ws;
2902 const auto &Begin = Indices.begin();
2903 const auto &End = Indices.end();
2904
2905 // Check even elements are taken from the odd elements of one half or the
2906 // other and pick an operand accordingly.
2907 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2908 Wt = Op->getOperand(0);
2909 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
2910 Wt = Op->getOperand(1);
2911 else
2912 return SDValue();
2913
2914 // Check odd elements are taken from the odd elements of one half or the
2915 // other and pick an operand accordingly.
2916 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2917 Ws = Op->getOperand(0);
2918 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
2919 Ws = Op->getOperand(1);
2920 else
2921 return SDValue();
2922
2923 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Ws, Wt);
2924}
2925
2926// Lower VECTOR_SHUFFLE into ILVR (if possible).
2927//
2928// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2929// each vector.
2930//
2931// It is possible to lower into ILVR when the mask consists of two of the
2932// following forms interleaved:
2933// <0, 1, 2, ...>
2934// <n, n+1, n+2, ...>
2935// where n is the number of elements in the vector.
2936// For example:
2937// <0, 0, 1, 1, 2, 2, ...>
2938// <0, n, 1, n+1, 2, n+2, ...>
2939//
2940// When undef's appear in the mask they are treated as if they were whatever
2941// value is necessary in order to fit the above forms.
2943 SmallVector<int, 16> Indices,
2944 SelectionDAG &DAG) {
2945 assert((Indices.size() % 2) == 0);
2946
2947 SDValue Wt;
2948 SDValue Ws;
2949 const auto &Begin = Indices.begin();
2950 const auto &End = Indices.end();
2951
2952 // Check even elements are taken from the right (lowest-indexed) elements of
2953 // one half or the other and pick an operand accordingly.
2954 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2955 Wt = Op->getOperand(0);
2956 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
2957 Wt = Op->getOperand(1);
2958 else
2959 return SDValue();
2960
2961 // Check odd elements are taken from the right (lowest-indexed) elements of
2962 // one half or the other and pick an operand accordingly.
2963 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2964 Ws = Op->getOperand(0);
2965 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
2966 Ws = Op->getOperand(1);
2967 else
2968 return SDValue();
2969
2970 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
2971}
2972
2973// Lower VECTOR_SHUFFLE into ILVL (if possible).
2974//
2975// ILVL interleaves consecutive elements from the left (highest-indexed) half
2976// of each vector.
2977//
2978// It is possible to lower into ILVL when the mask consists of two of the
2979// following forms interleaved:
2980// <x, x+1, x+2, ...>
2981// <n+x, n+x+1, n+x+2, ...>
2982// where n is the number of elements in the vector and x is half n.
2983// For example:
2984// <x, x, x+1, x+1, x+2, x+2, ...>
2985// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2986//
2987// When undef's appear in the mask they are treated as if they were whatever
2988// value is necessary in order to fit the above forms.
2990 SmallVector<int, 16> Indices,
2991 SelectionDAG &DAG) {
2992 assert((Indices.size() % 2) == 0);
2993
2994 unsigned HalfSize = Indices.size() / 2;
2995 SDValue Wt;
2996 SDValue Ws;
2997 const auto &Begin = Indices.begin();
2998 const auto &End = Indices.end();
2999
3000 // Check even elements are taken from the left (highest-indexed) elements of
3001 // one half or the other and pick an operand accordingly.
3002 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
3003 Wt = Op->getOperand(0);
3004 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
3005 Wt = Op->getOperand(1);
3006 else
3007 return SDValue();
3008
3009 // Check odd elements are taken from the left (highest-indexed) elements of
3010 // one half or the other and pick an operand accordingly.
3011 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
3012 Ws = Op->getOperand(0);
3013 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
3014 1))
3015 Ws = Op->getOperand(1);
3016 else
3017 return SDValue();
3018
3019 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
3020}
3021
3022// Lower VECTOR_SHUFFLE into PCKEV (if possible).
3023//
3024// PCKEV copies the even elements of each vector into the result vector.
3025//
3026// It is possible to lower into PCKEV when the mask consists of two of the
3027// following forms concatenated:
3028// <0, 2, 4, ...>
3029// <n, n+2, n+4, ...>
3030// where n is the number of elements in the vector.
3031// For example:
3032// <0, 2, 4, ..., 0, 2, 4, ...>
3033// <0, 2, 4, ..., n, n+2, n+4, ...>
3034//
3035// When undef's appear in the mask they are treated as if they were whatever
3036// value is necessary in order to fit the above forms.
3038 SmallVector<int, 16> Indices,
3039 SelectionDAG &DAG) {
3040 assert((Indices.size() % 2) == 0);
3041
3042 SDValue Wt;
3043 SDValue Ws;
3044 const auto &Begin = Indices.begin();
3045 const auto &Mid = Indices.begin() + Indices.size() / 2;
3046 const auto &End = Indices.end();
3047
3048 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
3049 Wt = Op->getOperand(0);
3050 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
3051 Wt = Op->getOperand(1);
3052 else
3053 return SDValue();
3054
3055 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
3056 Ws = Op->getOperand(0);
3057 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
3058 Ws = Op->getOperand(1);
3059 else
3060 return SDValue();
3061
3062 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
3063}
3064
3065// Lower VECTOR_SHUFFLE into PCKOD (if possible).
3066//
3067// PCKOD copies the odd elements of each vector into the result vector.
3068//
3069// It is possible to lower into PCKOD when the mask consists of two of the
3070// following forms concatenated:
3071// <1, 3, 5, ...>
3072// <n+1, n+3, n+5, ...>
3073// where n is the number of elements in the vector.
3074// For example:
3075// <1, 3, 5, ..., 1, 3, 5, ...>
3076// <1, 3, 5, ..., n+1, n+3, n+5, ...>
3077//
3078// When undef's appear in the mask they are treated as if they were whatever
3079// value is necessary in order to fit the above forms.
3081 SmallVector<int, 16> Indices,
3082 SelectionDAG &DAG) {
3083 assert((Indices.size() % 2) == 0);
3084
3085 SDValue Wt;
3086 SDValue Ws;
3087 const auto &Begin = Indices.begin();
3088 const auto &Mid = Indices.begin() + Indices.size() / 2;
3089 const auto &End = Indices.end();
3090
3091 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
3092 Wt = Op->getOperand(0);
3093 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
3094 Wt = Op->getOperand(1);
3095 else
3096 return SDValue();
3097
3098 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
3099 Ws = Op->getOperand(0);
3100 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
3101 Ws = Op->getOperand(1);
3102 else
3103 return SDValue();
3104
3105 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
3106}
3107
3108// Lower VECTOR_SHUFFLE into VSHF.
3109//
3110// This mostly consists of converting the shuffle indices in Indices into a
3111// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
3112// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
3113// if the type is v8i16 and all the indices are less than 8 then the second
3114// operand is unused and can be replaced with anything. We choose to replace it
3115// with the used operand since this reduces the number of instructions overall.
3116//
3117// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
3118// UNDEFs as same as SPLATI index.
3119// For other instances we use the last valid index if UNDEF is
3120// encountered.
3122 const SmallVector<int, 16> &Indices,
3123 const bool isSPLATI,
3124 SelectionDAG &DAG) {
3126 SDValue Op0;
3127 SDValue Op1;
3128 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
3129 EVT MaskEltTy = MaskVecTy.getVectorElementType();
3130 bool Using1stVec = false;
3131 bool Using2ndVec = false;
3132 SDLoc DL(Op);
3133 int ResTyNumElts = ResTy.getVectorNumElements();
3134
3135 assert(Indices[0] >= 0 &&
3136 "shuffle mask starts with an UNDEF, which is not expected");
3137
3138 for (int i = 0; i < ResTyNumElts; ++i) {
3139 // Idx == -1 means UNDEF
3140 int Idx = Indices[i];
3141
3142 if (0 <= Idx && Idx < ResTyNumElts)
3143 Using1stVec = true;
3144 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
3145 Using2ndVec = true;
3146 }
3147 int LastValidIndex = 0;
3148 for (size_t i = 0; i < Indices.size(); i++) {
3149 int Idx = Indices[i];
3150 if (Idx < 0) {
3151 // Continue using splati index or use the last valid index.
3152 Idx = isSPLATI ? Indices[0] : LastValidIndex;
3153 } else {
3154 LastValidIndex = Idx;
3155 }
3156 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
3157 }
3158
3159 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
3160
3161 if (Using1stVec && Using2ndVec) {
3162 Op0 = Op->getOperand(0);
3163 Op1 = Op->getOperand(1);
3164 } else if (Using1stVec)
3165 Op0 = Op1 = Op->getOperand(0);
3166 else if (Using2ndVec)
3167 Op0 = Op1 = Op->getOperand(1);
3168 else
3169 llvm_unreachable("shuffle vector mask references neither vector operand?");
3170
3171 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
3172 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
3173 // VSHF concatenates the vectors in a bitwise fashion:
3174 // <0b00, 0b01> + <0b10, 0b11> ->
3175 // 0b0100 + 0b1110 -> 0b01001110
3176 // <0b10, 0b11, 0b00, 0b01>
3177 // We must therefore swap the operands to get the correct result.
3178 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
3179}
3180
3181// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
3182// indices in the shuffle.
3183SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3184 SelectionDAG &DAG) const {
3185 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
3186 EVT ResTy = Op->getValueType(0);
3187
3188 if (!ResTy.is128BitVector())
3189 return SDValue();
3190
3191 int ResTyNumElts = ResTy.getVectorNumElements();
3192 SmallVector<int, 16> Indices;
3193
3194 for (int i = 0; i < ResTyNumElts; ++i)
3195 Indices.push_back(Node->getMaskElt(i));
3196
3197 // splati.[bhwd] is preferable to the others but is matched from
3198 // MipsISD::VSHF.
3199 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
3200 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG);
3202 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
3203 return Result;
3204 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3205 return Result;
3206 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3207 return Result;
3208 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3209 return Result;
3210 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3211 return Result;
3212 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3213 return Result;
3214 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3215 return Result;
3216 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG);
3217}
3218
3220MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3221 MachineBasicBlock *BB) const {
3222 // $bb:
3223 // bposge32_pseudo $vr0
3224 // =>
3225 // $bb:
3226 // bposge32 $tbb
3227 // $fbb:
3228 // li $vr2, 0
3229 // b $sink
3230 // $tbb:
3231 // li $vr1, 1
3232 // $sink:
3233 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3234
3235 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3236 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3237 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3238 DebugLoc DL = MI.getDebugLoc();
3239 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3241 MachineFunction *F = BB->getParent();
3242 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3243 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3244 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3245 F->insert(It, FBB);
3246 F->insert(It, TBB);
3247 F->insert(It, Sink);
3248
3249 // Transfer the remainder of BB and its successor edges to Sink.
3250 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3251 BB->end());
3252 Sink->transferSuccessorsAndUpdatePHIs(BB);
3253
3254 // Add successors.
3255 BB->addSuccessor(FBB);
3256 BB->addSuccessor(TBB);
3257 FBB->addSuccessor(Sink);
3258 TBB->addSuccessor(Sink);
3259
3260 // Insert the real bposge32 instruction to $BB.
3261 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
3262 // Insert the real bposge32c instruction to $BB.
3263 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
3264
3265 // Fill $FBB.
3266 Register VR2 = RegInfo.createVirtualRegister(RC);
3267 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
3268 .addReg(Mips::ZERO).addImm(0);
3269 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3270
3271 // Fill $TBB.
3272 Register VR1 = RegInfo.createVirtualRegister(RC);
3273 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
3274 .addReg(Mips::ZERO).addImm(1);
3275
3276 // Insert phi function to $Sink.
3277 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3278 MI.getOperand(0).getReg())
3279 .addReg(VR2)
3280 .addMBB(FBB)
3281 .addReg(VR1)
3282 .addMBB(TBB);
3283
3284 MI.eraseFromParent(); // The pseudo instruction is gone now.
3285 return Sink;
3286}
3287
3288MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3289 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3290 // $bb:
3291 // vany_nonzero $rd, $ws
3292 // =>
3293 // $bb:
3294 // bnz.b $ws, $tbb
3295 // b $fbb
3296 // $fbb:
3297 // li $rd1, 0
3298 // b $sink
3299 // $tbb:
3300 // li $rd2, 1
3301 // $sink:
3302 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3303
3304 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3305 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3306 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3307 DebugLoc DL = MI.getDebugLoc();
3308 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3310 MachineFunction *F = BB->getParent();
3311 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3312 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3313 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3314 F->insert(It, FBB);
3315 F->insert(It, TBB);
3316 F->insert(It, Sink);
3317
3318 // Transfer the remainder of BB and its successor edges to Sink.
3319 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3320 BB->end());
3321 Sink->transferSuccessorsAndUpdatePHIs(BB);
3322
3323 // Add successors.
3324 BB->addSuccessor(FBB);
3325 BB->addSuccessor(TBB);
3326 FBB->addSuccessor(Sink);
3327 TBB->addSuccessor(Sink);
3328
3329 // Insert the real bnz.b instruction to $BB.
3330 BuildMI(BB, DL, TII->get(BranchOp))
3331 .addReg(MI.getOperand(1).getReg())
3332 .addMBB(TBB);
3333
3334 // Fill $FBB.
3335 Register RD1 = RegInfo.createVirtualRegister(RC);
3336 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
3337 .addReg(Mips::ZERO).addImm(0);
3338 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3339
3340 // Fill $TBB.
3341 Register RD2 = RegInfo.createVirtualRegister(RC);
3342 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
3343 .addReg(Mips::ZERO).addImm(1);
3344
3345 // Insert phi function to $Sink.
3346 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3347 MI.getOperand(0).getReg())
3348 .addReg(RD1)
3349 .addMBB(FBB)
3350 .addReg(RD2)
3351 .addMBB(TBB);
3352
3353 MI.eraseFromParent(); // The pseudo instruction is gone now.
3354 return Sink;
3355}
3356
3357// Emit the COPY_FW pseudo instruction.
3358//
3359// copy_fw_pseudo $fd, $ws, n
3360// =>
3361// copy_u_w $rt, $ws, $n
3362// mtc1 $rt, $fd
3363//
3364// When n is zero, the equivalent operation can be performed with (potentially)
3365// zero instructions due to register overlaps. This optimization is never valid
3366// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3368MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3369 MachineBasicBlock *BB) const {
3370 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3371 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3372 DebugLoc DL = MI.getDebugLoc();
3373 Register Fd = MI.getOperand(0).getReg();
3374 Register Ws = MI.getOperand(1).getReg();
3375 unsigned Lane = MI.getOperand(2).getImm();
3376
3377 if (Lane == 0) {
3378 unsigned Wt = Ws;
3379 if (!Subtarget.useOddSPReg()) {
3380 // We must copy to an even-numbered MSA register so that the
3381 // single-precision sub-register is also guaranteed to be even-numbered.
3382 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
3383
3384 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
3385 }
3386
3387 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, {}, Mips::sub_lo);
3388 } else {
3389 Register Wt = RegInfo.createVirtualRegister(
3390 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3391 : &Mips::MSA128WEvensRegClass);
3392
3393 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
3394 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, {}, Mips::sub_lo);
3395 }
3396
3397 MI.eraseFromParent(); // The pseudo instruction is gone now.
3398 return BB;
3399}
3400
3401// Emit the COPY_FD pseudo instruction.
3402//
3403// copy_fd_pseudo $fd, $ws, n
3404// =>
3405// splati.d $wt, $ws, $n
3406// copy $fd, $wt:sub_64
3407//
3408// When n is zero, the equivalent operation can be performed with (potentially)
3409// zero instructions due to register overlaps. This optimization is always
3410// valid because FR=1 mode which is the only supported mode in MSA.
3412MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3413 MachineBasicBlock *BB) const {
3414 assert(Subtarget.isFP64bit());
3415
3416 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3417 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3418 Register Fd = MI.getOperand(0).getReg();
3419 Register Ws = MI.getOperand(1).getReg();
3420 unsigned Lane = MI.getOperand(2).getImm() * 2;
3421 DebugLoc DL = MI.getDebugLoc();
3422
3423 if (Lane == 0)
3424 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, {}, Mips::sub_64);
3425 else {
3426 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3427
3428 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
3429 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, {}, Mips::sub_64);
3430 }
3431
3432 MI.eraseFromParent(); // The pseudo instruction is gone now.
3433 return BB;
3434}
3435
3436// Emit the INSERT_FW pseudo instruction.
3437//
3438// insert_fw_pseudo $wd, $wd_in, $n, $fs
3439// =>
3440// subreg_to_reg $wt:sub_lo, $fs
3441// insve_w $wd[$n], $wd_in, $wt[0]
3443MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3444 MachineBasicBlock *BB) const {
3445 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3446 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3447 DebugLoc DL = MI.getDebugLoc();
3448 Register Wd = MI.getOperand(0).getReg();
3449 Register Wd_in = MI.getOperand(1).getReg();
3450 unsigned Lane = MI.getOperand(2).getImm();
3451 Register Fs = MI.getOperand(3).getReg();
3452 Register Wt = RegInfo.createVirtualRegister(
3453 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3454 : &Mips::MSA128WEvensRegClass);
3455
3456 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3457 .addReg(Fs)
3458 .addImm(Mips::sub_lo);
3459 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
3460 .addReg(Wd_in)
3461 .addImm(Lane)
3462 .addReg(Wt)
3463 .addImm(0);
3464
3465 MI.eraseFromParent(); // The pseudo instruction is gone now.
3466 return BB;
3467}
3468
3469// Emit the INSERT_FD pseudo instruction.
3470//
3471// insert_fd_pseudo $wd, $fs, n
3472// =>
3473// subreg_to_reg $wt:sub_64, $fs
3474// insve_d $wd[$n], $wd_in, $wt[0]
3476MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3477 MachineBasicBlock *BB) const {
3478 assert(Subtarget.isFP64bit());
3479
3480 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3481 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3482 DebugLoc DL = MI.getDebugLoc();
3483 Register Wd = MI.getOperand(0).getReg();
3484 Register Wd_in = MI.getOperand(1).getReg();
3485 unsigned Lane = MI.getOperand(2).getImm();
3486 Register Fs = MI.getOperand(3).getReg();
3487 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3488
3489 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3490 .addReg(Fs)
3491 .addImm(Mips::sub_64);
3492 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
3493 .addReg(Wd_in)
3494 .addImm(Lane)
3495 .addReg(Wt)
3496 .addImm(0);
3497
3498 MI.eraseFromParent(); // The pseudo instruction is gone now.
3499 return BB;
3500}
3501
3502// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3503//
3504// For integer:
3505// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3506// =>
3507// (SLL $lanetmp1, $lane, <log2size)
3508// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3509// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3510// (NEG $lanetmp2, $lanetmp1)
3511// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3512//
3513// For floating point:
3514// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3515// =>
3516// (SUBREG_TO_REG $wt, $fs, <subreg>)
3517// (SLL $lanetmp1, $lane, <log2size)
3518// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3519// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3520// (NEG $lanetmp2, $lanetmp1)
3521// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3522MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3523 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3524 bool IsFP) const {
3525 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3526 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3527 DebugLoc DL = MI.getDebugLoc();
3528 Register Wd = MI.getOperand(0).getReg();
3529 Register SrcVecReg = MI.getOperand(1).getReg();
3530 Register LaneReg = MI.getOperand(2).getReg();
3531 Register SrcValReg = MI.getOperand(3).getReg();
3532
3533 const TargetRegisterClass *VecRC = nullptr;
3534 // FIXME: This should be true for N32 too.
3535 const TargetRegisterClass *GPRRC =
3536 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3537 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3538 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3539 unsigned EltLog2Size;
3540 unsigned InsertOp = 0;
3541 unsigned InsveOp = 0;
3542 switch (EltSizeInBytes) {
3543 default:
3544 llvm_unreachable("Unexpected size");
3545 case 1:
3546 EltLog2Size = 0;
3547 InsertOp = Mips::INSERT_B;
3548 InsveOp = Mips::INSVE_B;
3549 VecRC = &Mips::MSA128BRegClass;
3550 break;
3551 case 2:
3552 EltLog2Size = 1;
3553 InsertOp = Mips::INSERT_H;
3554 InsveOp = Mips::INSVE_H;
3555 VecRC = &Mips::MSA128HRegClass;
3556 break;
3557 case 4:
3558 EltLog2Size = 2;
3559 InsertOp = Mips::INSERT_W;
3560 InsveOp = Mips::INSVE_W;
3561 VecRC = &Mips::MSA128WRegClass;
3562 break;
3563 case 8:
3564 EltLog2Size = 3;
3565 InsertOp = Mips::INSERT_D;
3566 InsveOp = Mips::INSVE_D;
3567 VecRC = &Mips::MSA128DRegClass;
3568 break;
3569 }
3570
3571 if (IsFP) {
3572 Register Wt = RegInfo.createVirtualRegister(VecRC);
3573 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3574 .addReg(SrcValReg)
3575 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3576 SrcValReg = Wt;
3577 }
3578
3579 // Convert the lane index into a byte index
3580 if (EltSizeInBytes != 1) {
3581 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
3582 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
3583 .addReg(LaneReg)
3584 .addImm(EltLog2Size);
3585 LaneReg = LaneTmp1;
3586 }
3587
3588 // Rotate bytes around so that the desired lane is element zero
3589 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC);
3590 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
3591 .addReg(SrcVecReg)
3592 .addReg(SrcVecReg)
3593 .addReg(LaneReg, {}, SubRegIdx);
3594
3595 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC);
3596 if (IsFP) {
3597 // Use insve.df to insert to element zero
3598 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
3599 .addReg(WdTmp1)
3600 .addImm(0)
3601 .addReg(SrcValReg)
3602 .addImm(0);
3603 } else {
3604 // Use insert.df to insert to element zero
3605 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
3606 .addReg(WdTmp1)
3607 .addReg(SrcValReg)
3608 .addImm(0);
3609 }
3610
3611 // Rotate elements the rest of the way for a full rotation.
3612 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3613 // the lane index to do this.
3614 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
3615 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3616 LaneTmp2)
3617 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3618 .addReg(LaneReg);
3619 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
3620 .addReg(WdTmp2)
3621 .addReg(WdTmp2)
3622 .addReg(LaneTmp2, {}, SubRegIdx);
3623
3624 MI.eraseFromParent(); // The pseudo instruction is gone now.
3625 return BB;
3626}
3627
3628// Emit the FILL_FW pseudo instruction.
3629//
3630// fill_fw_pseudo $wd, $fs
3631// =>
3632// implicit_def $wt1
3633// insert_subreg $wt2:subreg_lo, $wt1, $fs
3634// splati.w $wd, $wt2[0]
3636MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3637 MachineBasicBlock *BB) const {
3638 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3639 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3640 DebugLoc DL = MI.getDebugLoc();
3641 Register Wd = MI.getOperand(0).getReg();
3642 Register Fs = MI.getOperand(1).getReg();
3643 Register Wt1 = RegInfo.createVirtualRegister(
3644 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3645 : &Mips::MSA128WEvensRegClass);
3646 Register Wt2 = RegInfo.createVirtualRegister(
3647 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3648 : &Mips::MSA128WEvensRegClass);
3649
3650 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3651 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3652 .addReg(Wt1)
3653 .addReg(Fs)
3654 .addImm(Mips::sub_lo);
3655 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
3656
3657 MI.eraseFromParent(); // The pseudo instruction is gone now.
3658 return BB;
3659}
3660
3661// Emit the FILL_FD pseudo instruction.
3662//
3663// fill_fd_pseudo $wd, $fs
3664// =>
3665// implicit_def $wt1
3666// insert_subreg $wt2:subreg_64, $wt1, $fs
3667// splati.d $wd, $wt2[0]
3669MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3670 MachineBasicBlock *BB) const {
3671 assert(Subtarget.isFP64bit());
3672
3673 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3674 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3675 DebugLoc DL = MI.getDebugLoc();
3676 Register Wd = MI.getOperand(0).getReg();
3677 Register Fs = MI.getOperand(1).getReg();
3678 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3679 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3680
3681 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3682 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3683 .addReg(Wt1)
3684 .addReg(Fs)
3685 .addImm(Mips::sub_64);
3686 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
3687
3688 MI.eraseFromParent(); // The pseudo instruction is gone now.
3689 return BB;
3690}
3691
3692// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3693// register.
3694//
3695// STF16 MSA128F16:$wd, mem_simm10:$addr
3696// =>
3697// copy_u.h $rtemp,$wd[0]
3698// sh $rtemp, $addr
3699//
3700// Safety: We can't use st.h & co as they would over write the memory after
3701// the destination. It would require half floats be allocated 16 bytes(!) of
3702// space.
3704MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3705 MachineBasicBlock *BB) const {
3706
3707 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3708 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3709 DebugLoc DL = MI.getDebugLoc();
3710 Register Ws = MI.getOperand(0).getReg();
3711 Register Rt = MI.getOperand(1).getReg();
3712 const MachineMemOperand &MMO = **MI.memoperands_begin();
3713 unsigned Imm = MMO.getOffset();
3714
3715 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3716 // spill and reload can expand as a GPR64 operand. Examine the
3717 // operand in detail and default to ABI.
3718 const TargetRegisterClass *RC =
3719 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3720 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3721 : &Mips::GPR64RegClass);
3722 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3723 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3724
3725 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
3726 if(!UsingMips32) {
3727 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
3728 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
3729 .addReg(Rs)
3730 .addImm(Mips::sub_32);
3731 Rs = Tmp;
3732 }
3733 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
3734 .addReg(Rs)
3735 .addReg(Rt)
3736 .addImm(Imm)
3738 &MMO, MMO.getOffset(), MMO.getSize()));
3739
3740 MI.eraseFromParent();
3741 return BB;
3742}
3743
3744// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3745//
3746// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3747// =>
3748// lh $rtemp, $addr
3749// fill.h $wd, $rtemp
3750//
3751// Safety: We can't use ld.h & co as they over-read from the source.
3752// Additionally, if the address is not modulo 16, 2 cases can occur:
3753// a) Segmentation fault as the load instruction reads from a memory page
3754// memory it's not supposed to.
3755// b) The load crosses an implementation specific boundary, requiring OS
3756// intervention.
3758MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3759 MachineBasicBlock *BB) const {
3760
3761 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3762 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3763 DebugLoc DL = MI.getDebugLoc();
3764 Register Wd = MI.getOperand(0).getReg();
3765
3766 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3767 // spill and reload can expand as a GPR64 operand. Examine the
3768 // operand in detail and default to ABI.
3769 const TargetRegisterClass *RC =
3770 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3771 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3772 : &Mips::GPR64RegClass);
3773
3774 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3775 Register Rt = RegInfo.createVirtualRegister(RC);
3776
3777 MachineInstrBuilder MIB =
3778 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
3779 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
3780 MIB.add(MO);
3781
3782 if(!UsingMips32) {
3783 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3784 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp)
3785 .addReg(Rt, {}, Mips::sub_32);
3786 Rt = Tmp;
3787 }
3788
3789 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
3790
3791 MI.eraseFromParent();
3792 return BB;
3793}
3794
3795// Emit the FPROUND_PSEUDO instruction.
3796//
3797// Round an FGR64Opnd, FGR32Opnd to an f16.
3798//
3799// Safety: Cycle the operand through the GPRs so the result always ends up
3800// the correct MSA register.
3801//
3802// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3803// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3804// (which they can be, as the MSA registers are defined to alias the
3805// FPU's 64 bit and 32 bit registers) the result can be accessed using
3806// the correct register class. That requires operands be tie-able across
3807// register classes which have a sub/super register class relationship.
3808//
3809// For FPG32Opnd:
3810//
3811// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3812// =>
3813// mfc1 $rtemp, $fs
3814// fill.w $rtemp, $wtemp
3815// fexdo.w $wd, $wtemp, $wtemp
3816//
3817// For FPG64Opnd on mips32r2+:
3818//
3819// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3820// =>
3821// mfc1 $rtemp, $fs
3822// fill.w $rtemp, $wtemp
3823// mfhc1 $rtemp2, $fs
3824// insert.w $wtemp[1], $rtemp2
3825// insert.w $wtemp[3], $rtemp2
3826// fexdo.w $wtemp2, $wtemp, $wtemp
3827// fexdo.h $wd, $temp2, $temp2
3828//
3829// For FGR64Opnd on mips64r2+:
3830//
3831// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3832// =>
3833// dmfc1 $rtemp, $fs
3834// fill.d $rtemp, $wtemp
3835// fexdo.w $wtemp2, $wtemp, $wtemp
3836// fexdo.h $wd, $wtemp2, $wtemp2
3837//
3838// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3839// undef bits are "just right" and the exception enable bits are
3840// set. By using fill.w to replicate $fs into all elements over
3841// insert.w for one element, we avoid that potiential case. If
3842// fexdo.[hw] causes an exception in, the exception is valid and it
3843// occurs for all elements.
3845MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3847 bool IsFGR64) const {
3848
3849 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3850 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3851 // it.
3852 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3853
3854 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3855 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3856
3857 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3858 DebugLoc DL = MI.getDebugLoc();
3859 Register Wd = MI.getOperand(0).getReg();
3860 Register Fs = MI.getOperand(1).getReg();
3861
3862 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3863 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3864 const TargetRegisterClass *GPRRC =
3865 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3866 unsigned MFC1Opc = IsFGR64onMips64
3867 ? Mips::DMFC1
3868 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3869 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3870
3871 // Perform the register class copy as mentioned above.
3872 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3873 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
3874 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
3875 unsigned WPHI = Wtemp;
3876
3877 if (IsFGR64onMips32) {
3878 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3879 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
3880 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3881 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3882 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
3883 .addReg(Wtemp)
3884 .addReg(Rtemp2)
3885 .addImm(1);
3886 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
3887 .addReg(Wtemp2)
3888 .addReg(Rtemp2)
3889 .addImm(3);
3890 WPHI = Wtemp3;
3891 }
3892
3893 if (IsFGR64) {
3894 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3895 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
3896 .addReg(WPHI)
3897 .addReg(WPHI);
3898 WPHI = Wtemp2;
3899 }
3900
3901 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
3902
3903 MI.eraseFromParent();
3904 return BB;
3905}
3906
3907// Emit the FPEXTEND_PSEUDO instruction.
3908//
3909// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3910//
3911// Safety: Cycle the result through the GPRs so the result always ends up
3912// the correct floating point register.
3913//
3914// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3915// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3916// (which they can be, as the MSA registers are defined to alias the
3917// FPU's 64 bit and 32 bit registers) the result can be accessed using
3918// the correct register class. That requires operands be tie-able across
3919// register classes which have a sub/super register class relationship. I
3920// haven't checked.
3921//
3922// For FGR32Opnd:
3923//
3924// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3925// =>
3926// fexupr.w $wtemp, $ws
3927// copy_s.w $rtemp, $ws[0]
3928// mtc1 $rtemp, $fd
3929//
3930// For FGR64Opnd on Mips64:
3931//
3932// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3933// =>
3934// fexupr.w $wtemp, $ws
3935// fexupr.d $wtemp2, $wtemp
3936// copy_s.d $rtemp, $wtemp2s[0]
3937// dmtc1 $rtemp, $fd
3938//
3939// For FGR64Opnd on Mips32:
3940//
3941// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3942// =>
3943// fexupr.w $wtemp, $ws
3944// fexupr.d $wtemp2, $wtemp
3945// copy_s.w $rtemp, $wtemp2[0]
3946// mtc1 $rtemp, $ftemp
3947// copy_s.w $rtemp2, $wtemp2[1]
3948// $fd = mthc1 $rtemp2, $ftemp
3950MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3952 bool IsFGR64) const {
3953
3954 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3955 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3956 // it.
3957 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3958
3959 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3960 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3961
3962 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3963 DebugLoc DL = MI.getDebugLoc();
3964 Register Fd = MI.getOperand(0).getReg();
3965 Register Ws = MI.getOperand(1).getReg();
3966
3967 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3968 const TargetRegisterClass *GPRRC =
3969 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3970 unsigned MTC1Opc = IsFGR64onMips64
3971 ? Mips::DMTC1
3972 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3973 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3974
3975 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3976 Register WPHI = Wtemp;
3977
3978 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
3979 if (IsFGR64) {
3980 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3981 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
3982 }
3983
3984 // Perform the safety regclass copy mentioned above.
3985 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3986 Register FPRPHI = IsFGR64onMips32
3987 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
3988 : Fd;
3989 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
3990 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
3991
3992 if (IsFGR64onMips32) {
3993 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3994 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
3995 .addReg(WPHI)
3996 .addImm(1);
3997 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
3998 .addReg(FPRPHI)
3999 .addReg(Rtemp2);
4000 }
4001
4002 MI.eraseFromParent();
4003 return BB;
4004}
4005
4006// Emit the FEXP2_W_1 pseudo instructions.
4007//
4008// fexp2_w_1_pseudo $wd, $wt
4009// =>
4010// ldi.w $ws, 1
4011// fexp2.w $wd, $ws, $wt
4013MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
4014 MachineBasicBlock *BB) const {
4015 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4016 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
4017 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
4018 Register Ws1 = RegInfo.createVirtualRegister(RC);
4019 Register Ws2 = RegInfo.createVirtualRegister(RC);
4020 DebugLoc DL = MI.getDebugLoc();
4021
4022 // Splat 1.0 into a vector
4023 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
4024 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
4025
4026 // Emit 1.0 * fexp2(Wt)
4027 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg())
4028 .addReg(Ws2)
4029 .addReg(MI.getOperand(1).getReg());
4030
4031 MI.eraseFromParent(); // The pseudo instruction is gone now.
4032 return BB;
4033}
4034
4035// Emit the FEXP2_D_1 pseudo instructions.
4036//
4037// fexp2_d_1_pseudo $wd, $wt
4038// =>
4039// ldi.d $ws, 1
4040// fexp2.d $wd, $ws, $wt
4042MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
4043 MachineBasicBlock *BB) const {
4044 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4045 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
4046 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
4047 Register Ws1 = RegInfo.createVirtualRegister(RC);
4048 Register Ws2 = RegInfo.createVirtualRegister(RC);
4049 DebugLoc DL = MI.getDebugLoc();
4050
4051 // Splat 1.0 into a vector
4052 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
4053 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
4054
4055 // Emit 1.0 * fexp2(Wt)
4056 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg())
4057 .addReg(Ws2)
4058 .addReg(MI.getOperand(1).getReg());
4059
4060 MI.eraseFromParent(); // The pseudo instruction is gone now.
4061 return BB;
4062}
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Promote Memory to Register
Definition Mem2Reg.cpp:110
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, SDValue Imm, bool BigEndian)
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static cl::opt< bool > NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " "stores to their single precision " "counterparts"))
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, bool BigEndian, SelectionDAG &DAG)
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian)
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG)
static bool isBitwiseInverse(SDValue N, SDValue OfNode)
static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static bool isVectorAllOnes(SDValue N)
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC)
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG)
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG)
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, const SmallVector< int, 16 > &Indices, const bool isSPLATI, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
unsigned logBase2() const
Definition APInt.h:1784
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getInRegsParamsCount() const
uint64_t getZExtValue() const
const SDValue & getBasePtr() const
const Triple & getTargetTriple() const
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
Flags getFlags() const
Return the raw flags of the source value,.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MipsFunctionInfo - This class is derived from MachineFunction private Mips target-specific informatio...
unsigned getIncomingArgSize() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given floating-point type and Register class.
void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given integer type and Register class.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
const TargetRegisterClass * getRepRegClassFor(MVT VT) const override
Return the 'representative' register class for the specified value type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
MipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
bool hasMips32r6() const
bool isLittle() const
bool hasDSPR2() const
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
MipsTargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const
virtual void getOpndList(SmallVectorImpl< SDValue > &Ops, std::deque< std::pair< unsigned, SDValue > > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const
This function fills Ops, which is the list of operands that will later be used when a function call n...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
const MipsSubtarget & Subtarget
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
LLVM_ABI void printrWithDepth(raw_ostream &O, const SelectionDAG *G=nullptr, unsigned depth=100) const
Print a SelectionDAG node and children up to depth "depth." The given SelectionDAG allows target-spec...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVMContext * getContext() const
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getValue() const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
LLVM_ABI bool isLittleEndian() const
Tests whether the target triple is little endian.
Definition Triple.cpp:2445
LLVM Value Representation.
Definition Value.h:75
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ BRCOND
BRCOND - Conditional branch.
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
const MipsTargetLowering * createMipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
@ Custom
The result value requires a custom uniformity check.
Definition Uniformity.h:31
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This class contains a discriminated union of information about pointers in memory operands,...
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.