LLVM 23.0.0git
MipsSEISelLowering.cpp
Go to the documentation of this file.
1//===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Subclass of MipsTargetLowering specialized for mips32/64.
10//
11//===----------------------------------------------------------------------===//
12
13#include "MipsSEISelLowering.h"
14#include "MipsMachineFunction.h"
15#include "MipsRegisterInfo.h"
16#include "MipsSubtarget.h"
17#include "llvm/ADT/APInt.h"
18#include "llvm/ADT/STLExtras.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/Intrinsics.h"
37#include "llvm/IR/IntrinsicsMips.h"
40#include "llvm/Support/Debug.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <cstdint>
48#include <iterator>
49#include <utility>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "mips-isel"
54
55static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
56 cl::desc("Expand double precision loads and "
57 "stores to their single precision "
58 "counterparts"));
59
60// Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16,
61// v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e.
62// INST.h for v16, INST.w for v32, INST.d for v64.
65 if (this->Subtarget.hasMSA()) {
66 switch (VT.SimpleTy) {
67 // Leave v2i1 vectors to be promoted to larger ones.
68 // Other i1 types will be promoted by default.
69 case MVT::v2i1:
70 return TypePromoteInteger;
71 break;
72 // 16-bit vector types (v2 and longer)
73 case MVT::v2i8:
74 // 32-bit vector types (v2 and longer)
75 case MVT::v2i16:
76 case MVT::v4i8:
77 // 64-bit vector types (v2 and longer)
78 case MVT::v2i32:
79 case MVT::v4i16:
80 case MVT::v8i8:
81 return TypeWidenVector;
82 break;
83 // Only word (.w) and doubleword (.d) are available for floating point
84 // vectors. That means floating point vectors should be either v2f64
85 // or v4f32.
86 // Here we only explicitly widen the f32 types - f16 will be promoted
87 // by default.
88 case MVT::v2f32:
89 case MVT::v3f32:
90 return TypeWidenVector;
91 // v2i64 is already 128-bit wide.
92 default:
93 break;
94 }
95 }
97}
98
100 const MipsSubtarget &STI)
101 : MipsTargetLowering(TM, STI) {
102 // Set up the register classes
103 addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
104
105 if (Subtarget.isGP64bit())
106 addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
107
108 if (Subtarget.hasDSP() || Subtarget.hasMSA()) {
109 // Expand all truncating stores and extending loads.
112 setTruncStoreAction(VT0, VT1, Expand);
116 }
117 }
118 }
119
120 if (Subtarget.hasDSP()) {
121 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8};
122
123 for (const auto &VecTy : VecTys) {
124 addRegisterClass(VecTy, &Mips::DSPRRegClass);
125
126 // Expand all builtin opcodes.
127 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
129
135 }
136
139
140 if (Subtarget.hasMips32r2()) {
143 }
144 }
145
146 if (Subtarget.hasDSPR2())
147 setOperationAction(ISD::MUL, MVT::v2i16, Legal);
148
149 if (Subtarget.hasMSA()) {
150 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass);
151 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass);
152 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass);
153 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass);
154 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass);
155 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
156 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
157
158 // f16 is a storage-only type, always promote it to f32.
159 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
195
197 }
198
199 if (!Subtarget.useSoftFloat()) {
200 addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
201
202 // When dealing with single precision only, use libcalls
203 if (!Subtarget.isSingleFloat()) {
204 if (Subtarget.isFP64bit())
205 addRegisterClass(MVT::f64, &Mips::FGR64RegClass);
206 else
207 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass);
208 }
209
212 setOperationAction(Op, MVT::f32, Legal);
213 setOperationAction(Op, MVT::f64, Legal);
214 }
215 }
216
217 // Targets with 64bits integer registers, but no 64bit floating point register
218 // do not support conversion between them
219 if (Subtarget.isGP64bit() && Subtarget.isSingleFloat() &&
220 !Subtarget.useSoftFloat()) {
225 }
226
231
232 if (Subtarget.hasCnMips())
234 else if (Subtarget.isR5900()) {
235 // R5900 doesn't have DMULT/DMULTU/DDIV/DDIVU - expand to 32-bit ops
243 } else if (Subtarget.isGP64bit())
245
246 if (Subtarget.isGP64bit() && !Subtarget.isR5900()) {
253 }
254
257
261 if (Subtarget.hasMips32r6()) {
264 } else {
267 }
268
270
274
275 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() &&
276 !Subtarget.hasMips64()) {
278 }
279
280 if (NoDPLoadStore) {
283 }
284
285 if (Subtarget.hasMips32r6()) {
286 // MIPS32r6 replaces the accumulator-based multiplies with a three register
287 // instruction
293
294 // MIPS32r6 replaces the accumulator-based division/remainder with separate
295 // three register division and remainder instructions.
302
303 // MIPS32r6 replaces conditional moves with an equivalent that removes the
304 // need for three GPR read ports.
308
312
313 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6");
317
319
320 // Floating point > and >= are supported via < and <=
329
338 }
339
340 if (Subtarget.hasMips64r6()) {
341 // MIPS64r6 replaces the accumulator-based multiplies with a three register
342 // instruction
348
349 // MIPS32r6 replaces the accumulator-based division/remainder with separate
350 // three register division and remainder instructions.
357
358 // MIPS64r6 replaces conditional moves with an equivalent that removes the
359 // need for three GPR read ports.
363 }
364
365 if (Subtarget.isR5900()) {
366 // R5900 FPU only supports 4 compare conditions: C.F, C.EQ, C.OLT, C.OLE
367 // (and their inversions via bc1t/bc1f). Expand all conditions that would
368 // require C.UN, C.UEQ, C.ULT, or C.ULE instructions (not available on
369 // R5900). The legalizer resolves these via operand swapping, condition
370 // inversion, and decomposition into supported conditions.
382
383 // R5900 FPU does not support IEEE 754 special values (NaN, infinity). Use
384 // custom lowering to decide per-instruction: hardware when nnan+ninf flags
385 // guarantee no NaN or infinity, software libcall otherwise.
391 }
392
393 computeRegisterProperties(Subtarget.getRegisterInfo());
394}
395
396const MipsTargetLowering *
398 const MipsSubtarget &STI) {
399 return new MipsSETargetLowering(TM, STI);
400}
401
404 if (VT == MVT::Untyped)
405 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
406
408}
409
410// Enable MSA support for the given integer type and Register class.
413 addRegisterClass(Ty, RC);
414
415 // Expand all builtin opcodes.
416 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
418
426
448
449 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) {
454 }
455
462}
463
464// Enable MSA support for the given floating-point type and Register class.
502
503SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
506
507 EVT ResTy = Op->getValueType(0);
508 SDLoc DL(Op);
509
510 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the
511 // floating point register are undefined. Not really an issue as sel.d, which
512 // is produced from an FSELECT node, only looks at bit 0.
513 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0));
514 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1),
515 Op->getOperand(2));
516}
517
519 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
521
522 if (Subtarget.systemSupportsUnalignedAccess()) {
523 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
524 // implementation defined whether this is handled by hardware, software, or
525 // a hybrid of the two but it's expected that most implementations will
526 // handle the majority of cases in hardware.
527 if (Fast)
528 *Fast = 1;
529 return true;
530 } else if (Subtarget.hasMips32r6()) {
531 return false;
532 }
533
534 switch (SVT) {
535 case MVT::i64:
536 case MVT::i32:
537 if (Fast)
538 *Fast = 1;
539 return true;
540 default:
541 return false;
542 }
543}
544
546 SelectionDAG &DAG) const {
547 switch(Op.getOpcode()) {
548 case ISD::LOAD: return lowerLOAD(Op, DAG);
549 case ISD::STORE: return lowerSTORE(Op, DAG);
550 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG);
551 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG);
552 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG);
553 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG);
554 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG);
555 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG);
556 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true,
557 DAG);
558 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG);
559 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG);
560 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG);
561 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG);
562 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG);
563 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG);
564 case ISD::SELECT: return lowerSELECT(Op, DAG);
565 case ISD::BITCAST: return lowerBITCAST(Op, DAG);
566 case ISD::FADD:
567 return lowerR5900FPOp(Op, DAG, RTLIB::ADD_F32);
568 case ISD::FSUB:
569 return lowerR5900FPOp(Op, DAG, RTLIB::SUB_F32);
570 case ISD::FMUL:
571 return lowerR5900FPOp(Op, DAG, RTLIB::MUL_F32);
572 case ISD::FDIV:
573 return lowerR5900FPOp(Op, DAG, RTLIB::DIV_F32);
574 case ISD::FSQRT:
575 return lowerR5900FPOp(Op, DAG, RTLIB::SQRT_F32);
576 }
577
579}
580
581SDValue MipsSETargetLowering::lowerR5900FPOp(SDValue Op, SelectionDAG &DAG,
582 RTLIB::Libcall LC) const {
584 SDNodeFlags Flags = Op->getFlags();
585
586 if (Flags.hasNoNaNs() && Flags.hasNoInfs()) {
587 // Use the hardware FPU instruction if the operation is guaranteed to have
588 // no NaN or infinity inputs/outputs (nnan+ninf flags).
589 return Op;
590 }
591
592 // Fall back to a software libcall for IEEE correctness.
593 SDLoc DL(Op);
594 MVT VT = Op.getSimpleValueType();
595 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
597 auto [Result, Chain] = makeLibCall(DAG, LC, VT, Ops, CallOptions, DL);
598 return Result;
599}
600
601// Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT
602//
603// Performs the following transformations:
604// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its
605// sign/zero-extension is completely overwritten by the new one performed by
606// the ISD::AND.
607// - Removes redundant zero extensions performed by an ISD::AND.
610 const MipsSubtarget &Subtarget) {
611 if (!Subtarget.hasMSA())
612 return SDValue();
613
614 SDValue Op0 = N->getOperand(0);
615 SDValue Op1 = N->getOperand(1);
616 unsigned Op0Opcode = Op0->getOpcode();
617
618 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d)
619 // where $d + 1 == 2^n and n == 32
620 // or $d + 1 == 2^n and n <= 32 and ZExt
621 // -> (MipsVExtractZExt $a, $b, $c)
622 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT ||
623 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) {
625
626 if (!Mask)
627 return SDValue();
628
629 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2();
630
631 if (Log2IfPositive <= 0)
632 return SDValue(); // Mask+1 is not a power of 2
633
634 SDValue Op0Op2 = Op0->getOperand(2);
635 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT();
636 unsigned ExtendTySize = ExtendTy.getSizeInBits();
637 unsigned Log2 = Log2IfPositive;
638
639 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) ||
640 Log2 == ExtendTySize) {
641 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
642 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0),
643 Op0->getVTList(),
644 ArrayRef(Ops, Op0->getNumOperands()));
645 }
646 }
647
648 return SDValue();
649}
650
651// Determine if the specified node is a constant vector splat.
652//
653// Returns true and sets Imm if:
654// * N is a ISD::BUILD_VECTOR representing a constant splat
655//
656// This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The
657// differences are that it assumes the MSA has already been checked and the
658// arbitrary requirement for a maximum of 32-bit integers isn't applied (and
659// must not be in order for binsri.d to be selectable).
660static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
662
663 if (!Node)
664 return false;
665
666 APInt SplatValue, SplatUndef;
667 unsigned SplatBitSize;
668 bool HasAnyUndefs;
669
670 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
671 8, !IsLittleEndian))
672 return false;
673
674 Imm = SplatValue;
675
676 return true;
677}
678
679// Test whether the given node is an all-ones build_vector.
681 // Look through bitcasts. Endianness doesn't matter because we are looking
682 // for an all-ones value.
683 if (N->getOpcode() == ISD::BITCAST)
684 N = N->getOperand(0);
685
687
688 if (!BVN)
689 return false;
690
691 APInt SplatValue, SplatUndef;
692 unsigned SplatBitSize;
693 bool HasAnyUndefs;
694
695 // Endianness doesn't matter in this context because we are looking for
696 // an all-ones value.
697 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
698 return SplatValue.isAllOnes();
699
700 return false;
701}
702
703// Test whether N is the bitwise inverse of OfNode.
704static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
705 if (N->getOpcode() != ISD::XOR)
706 return false;
707
708 if (isVectorAllOnes(N->getOperand(0)))
709 return N->getOperand(1) == OfNode;
710
711 if (isVectorAllOnes(N->getOperand(1)))
712 return N->getOperand(0) == OfNode;
713
714 return false;
715}
716
717// Perform combines where ISD::OR is the root node.
718//
719// Performs the following transformations:
720// - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b)
721// where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit
722// vector type.
725 const MipsSubtarget &Subtarget) {
726 if (!Subtarget.hasMSA())
727 return SDValue();
728
729 EVT Ty = N->getValueType(0);
730
731 if (!Ty.is128BitVector())
732 return SDValue();
733
734 SDValue Op0 = N->getOperand(0);
735 SDValue Op1 = N->getOperand(1);
736
737 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) {
738 SDValue Op0Op0 = Op0->getOperand(0);
739 SDValue Op0Op1 = Op0->getOperand(1);
740 SDValue Op1Op0 = Op1->getOperand(0);
741 SDValue Op1Op1 = Op1->getOperand(1);
742 bool IsLittleEndian = !Subtarget.isLittle();
743
744 SDValue IfSet, IfClr, Cond;
745 bool IsConstantMask = false;
746 APInt Mask, InvMask;
747
748 // If Op0Op0 is an appropriate mask, try to find it's inverse in either
749 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while
750 // looking.
751 // IfClr will be set if we find a valid match.
752 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) {
753 Cond = Op0Op0;
754 IfSet = Op0Op1;
755
756 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
757 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
758 IfClr = Op1Op1;
759 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
760 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
761 IfClr = Op1Op0;
762
763 IsConstantMask = true;
764 }
765
766 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
767 // thing again using this mask.
768 // IfClr will be set if we find a valid match.
769 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) {
770 Cond = Op0Op1;
771 IfSet = Op0Op0;
772
773 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) &&
774 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
775 IfClr = Op1Op1;
776 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) &&
777 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask)
778 IfClr = Op1Op0;
779
780 IsConstantMask = true;
781 }
782
783 // If IfClr is not yet set, try looking for a non-constant match.
784 // IfClr will be set if we find a valid match amongst the eight
785 // possibilities.
786 if (!IfClr.getNode()) {
787 if (isBitwiseInverse(Op0Op0, Op1Op0)) {
788 Cond = Op1Op0;
789 IfSet = Op1Op1;
790 IfClr = Op0Op1;
791 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
792 Cond = Op1Op0;
793 IfSet = Op1Op1;
794 IfClr = Op0Op0;
795 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
796 Cond = Op1Op1;
797 IfSet = Op1Op0;
798 IfClr = Op0Op1;
799 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
800 Cond = Op1Op1;
801 IfSet = Op1Op0;
802 IfClr = Op0Op0;
803 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
804 Cond = Op0Op0;
805 IfSet = Op0Op1;
806 IfClr = Op1Op1;
807 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
808 Cond = Op0Op0;
809 IfSet = Op0Op1;
810 IfClr = Op1Op0;
811 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
812 Cond = Op0Op1;
813 IfSet = Op0Op0;
814 IfClr = Op1Op1;
815 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
816 Cond = Op0Op1;
817 IfSet = Op0Op0;
818 IfClr = Op1Op0;
819 }
820 }
821
822 // At this point, IfClr will be set if we have a valid match.
823 if (!IfClr.getNode())
824 return SDValue();
825
826 assert(Cond.getNode() && IfSet.getNode());
827
828 // Fold degenerate cases.
829 if (IsConstantMask) {
830 if (Mask.isAllOnes())
831 return IfSet;
832 else if (Mask == 0)
833 return IfClr;
834 }
835
836 // Transform the DAG into an equivalent VSELECT.
837 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr);
838 }
839
840 return SDValue();
841}
842
844 SelectionDAG &DAG,
845 const MipsSubtarget &Subtarget) {
846 // Estimate the number of operations the below transform will turn a
847 // constant multiply into. The number is approximately equal to the minimal
848 // number of powers of two that constant can be broken down to by adding
849 // or subtracting them.
850 //
851 // If we have taken more than 12[1] / 8[2] steps to attempt the
852 // optimization for a native sized value, it is more than likely that this
853 // optimization will make things worse.
854 //
855 // [1] MIPS64 requires 6 instructions at most to materialize any constant,
856 // multiplication requires at least 4 cycles, but another cycle (or two)
857 // to retrieve the result from the HI/LO registers.
858 //
859 // [2] For MIPS32, more than 8 steps is expensive as the constant could be
860 // materialized in 2 instructions, multiplication requires at least 4
861 // cycles, but another cycle (or two) to retrieve the result from the
862 // HI/LO registers.
863 //
864 // TODO:
865 // - MaxSteps needs to consider the `VT` of the constant for the current
866 // target.
867 // - Consider to perform this optimization after type legalization.
868 // That allows to remove a workaround for types not supported natively.
869 // - Take in account `-Os, -Oz` flags because this optimization
870 // increases code size.
871 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12;
872
873 SmallVector<APInt, 16> WorkStack(1, C);
874 unsigned Steps = 0;
875 unsigned BitWidth = C.getBitWidth();
876
877 while (!WorkStack.empty()) {
878 APInt Val = WorkStack.pop_back_val();
879
880 if (Val == 0 || Val == 1)
881 continue;
882
883 if (Steps >= MaxSteps)
884 return false;
885
886 if (Val.isPowerOf2()) {
887 ++Steps;
888 continue;
889 }
890
891 APInt Floor = APInt(BitWidth, 1) << Val.logBase2();
892 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0)
893 : APInt(BitWidth, 1) << C.ceilLogBase2();
894 if ((Val - Floor).ule(Ceil - Val)) {
895 WorkStack.push_back(Floor);
896 WorkStack.push_back(Val - Floor);
897 } else {
898 WorkStack.push_back(Ceil);
899 WorkStack.push_back(Ceil - Val);
900 }
901
902 ++Steps;
903 }
904
905 // If the value being multiplied is not supported natively, we have to pay
906 // an additional legalization cost, conservatively assume an increase in the
907 // cost of 3 instructions per step. This values for this heuristic were
908 // determined experimentally.
909 unsigned RegisterSize = DAG.getTargetLoweringInfo()
910 .getRegisterType(*DAG.getContext(), VT)
911 .getSizeInBits();
912 Steps *= (VT.getSizeInBits() != RegisterSize) * 3;
913 if (Steps > 27)
914 return false;
915
916 return true;
917}
918
920 EVT ShiftTy, SelectionDAG &DAG) {
921 // Return 0.
922 if (C == 0)
923 return DAG.getConstant(0, DL, VT);
924
925 // Return x.
926 if (C == 1)
927 return X;
928
929 // If c is power of 2, return (shl x, log2(c)).
930 if (C.isPowerOf2())
931 return DAG.getNode(ISD::SHL, DL, VT, X,
932 DAG.getConstant(C.logBase2(), DL, ShiftTy));
933
934 unsigned BitWidth = C.getBitWidth();
935 APInt Floor = APInt(BitWidth, 1) << C.logBase2();
936 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) :
937 APInt(BitWidth, 1) << C.ceilLogBase2();
938
939 // If |c - floor_c| <= |c - ceil_c|,
940 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))),
941 // return (add constMult(x, floor_c), constMult(x, c - floor_c)).
942 if ((C - Floor).ule(Ceil - C)) {
943 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG);
944 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG);
945 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
946 }
947
948 // If |c - floor_c| > |c - ceil_c|,
949 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)).
950 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG);
951 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG);
952 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
953}
954
957 const MipsSETargetLowering *TL,
958 const MipsSubtarget &Subtarget) {
959 EVT VT = N->getValueType(0);
960
961 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
963 C->getAPIntValue(), VT, DAG, Subtarget))
964 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT,
966 DAG);
967
968 return SDValue(N, 0);
969}
970
972 SelectionDAG &DAG,
973 const MipsSubtarget &Subtarget) {
974 // See if this is a vector splat immediate node.
975 APInt SplatValue, SplatUndef;
976 unsigned SplatBitSize;
977 bool HasAnyUndefs;
978 unsigned EltSize = Ty.getScalarSizeInBits();
979 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
980
981 if (!Subtarget.hasDSP())
982 return SDValue();
983
984 if (!BV ||
985 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
986 EltSize, !Subtarget.isLittle()) ||
987 (SplatBitSize != EltSize) ||
988 (SplatValue.getZExtValue() >= EltSize))
989 return SDValue();
990
991 SDLoc DL(N);
992 return DAG.getNode(Opc, DL, Ty, N->getOperand(0),
993 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32));
994}
995
998 const MipsSubtarget &Subtarget) {
999 EVT Ty = N->getValueType(0);
1000
1001 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1002 return SDValue();
1003
1004 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget);
1005}
1006
1007// Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold
1008// constant splats into MipsISD::SHRA_DSP for DSPr2.
1009//
1010// Performs the following transformations:
1011// - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its
1012// sign/zero-extension is completely overwritten by the new one performed by
1013// the ISD::SRA and ISD::SHL nodes.
1014// - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL
1015// sequence.
1016//
1017// See performDSPShiftCombine for more information about the transformation
1018// used for DSPr2.
1021 const MipsSubtarget &Subtarget) {
1022 EVT Ty = N->getValueType(0);
1023
1024 if (Subtarget.hasMSA()) {
1025 SDValue Op0 = N->getOperand(0);
1026 SDValue Op1 = N->getOperand(1);
1027
1028 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d)
1029 // where $d + sizeof($c) == 32
1030 // or $d + sizeof($c) <= 32 and SExt
1031 // -> (MipsVExtractSExt $a, $b, $c)
1032 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) {
1033 SDValue Op0Op0 = Op0->getOperand(0);
1035
1036 if (!ShAmount)
1037 return SDValue();
1038
1039 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT &&
1040 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT)
1041 return SDValue();
1042
1043 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT();
1044 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits();
1045
1046 if (TotalBits == 32 ||
1047 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT &&
1048 TotalBits <= 32)) {
1049 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
1050 Op0Op0->getOperand(2) };
1051 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0),
1052 Op0Op0->getVTList(),
1053 ArrayRef(Ops, Op0Op0->getNumOperands()));
1054 }
1055 }
1056 }
1057
1058 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2()))
1059 return SDValue();
1060
1061 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget);
1062}
1063
1064
1067 const MipsSubtarget &Subtarget) {
1068 EVT Ty = N->getValueType(0);
1069
1070 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8))
1071 return SDValue();
1072
1073 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget);
1074}
1075
1077 bool IsV216 = (Ty == MVT::v2i16);
1078
1079 switch (CC) {
1080 case ISD::SETEQ:
1081 case ISD::SETNE: return true;
1082 case ISD::SETLT:
1083 case ISD::SETLE:
1084 case ISD::SETGT:
1085 case ISD::SETGE: return IsV216;
1086 case ISD::SETULT:
1087 case ISD::SETULE:
1088 case ISD::SETUGT:
1089 case ISD::SETUGE: return !IsV216;
1090 default: return false;
1091 }
1092}
1093
1095 EVT Ty = N->getValueType(0);
1096
1097 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8))
1098 return SDValue();
1099
1100 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get()))
1101 return SDValue();
1102
1103 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0),
1104 N->getOperand(1), N->getOperand(2));
1105}
1106
1108 EVT Ty = N->getValueType(0);
1109
1110 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) {
1111 SDValue SetCC = N->getOperand(0);
1112
1113 if (SetCC.getOpcode() != MipsISD::SETCC_DSP)
1114 return SDValue();
1115
1116 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty,
1117 SetCC.getOperand(0), SetCC.getOperand(1),
1118 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2));
1119 }
1120
1121 return SDValue();
1122}
1123
1125 const MipsSubtarget &Subtarget) {
1126 EVT Ty = N->getValueType(0);
1127
1128 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) {
1129 // Try the following combines:
1130 // (xor (or $a, $b), (build_vector allones))
1131 // (xor (or $a, $b), (bitcast (build_vector allones)))
1132 SDValue Op0 = N->getOperand(0);
1133 SDValue Op1 = N->getOperand(1);
1134 SDValue NotOp;
1135
1137 NotOp = Op1;
1138 else if (ISD::isBuildVectorAllOnes(Op1.getNode()))
1139 NotOp = Op0;
1140 else
1141 return SDValue();
1142
1143 if (NotOp->getOpcode() == ISD::OR)
1144 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0),
1145 NotOp->getOperand(1));
1146 }
1147
1148 return SDValue();
1149}
1150
1151SDValue
1153 SelectionDAG &DAG = DCI.DAG;
1154 SDValue Val;
1155
1156 switch (N->getOpcode()) {
1157 case ISD::AND:
1158 Val = performANDCombine(N, DAG, DCI, Subtarget);
1159 break;
1160 case ISD::OR:
1161 Val = performORCombine(N, DAG, DCI, Subtarget);
1162 break;
1163 case ISD::MUL:
1164 return performMULCombine(N, DAG, DCI, this, Subtarget);
1165 case ISD::SHL:
1166 Val = performSHLCombine(N, DAG, DCI, Subtarget);
1167 break;
1168 case ISD::SRA:
1169 return performSRACombine(N, DAG, DCI, Subtarget);
1170 case ISD::SRL:
1171 return performSRLCombine(N, DAG, DCI, Subtarget);
1172 case ISD::VSELECT:
1173 return performVSELECTCombine(N, DAG);
1174 case ISD::XOR:
1175 Val = performXORCombine(N, DAG, Subtarget);
1176 break;
1177 case ISD::SETCC:
1178 Val = performSETCCCombine(N, DAG);
1179 break;
1180 }
1181
1182 if (Val.getNode()) {
1183 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n";
1184 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n";
1185 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n");
1186 return Val;
1187 }
1188
1190}
1191
1194 MachineBasicBlock *BB) const {
1195 switch (MI.getOpcode()) {
1196 default:
1198 case Mips::BPOSGE32_PSEUDO:
1199 return emitBPOSGE32(MI, BB);
1200 case Mips::SNZ_B_PSEUDO:
1201 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B);
1202 case Mips::SNZ_H_PSEUDO:
1203 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H);
1204 case Mips::SNZ_W_PSEUDO:
1205 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W);
1206 case Mips::SNZ_D_PSEUDO:
1207 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D);
1208 case Mips::SNZ_V_PSEUDO:
1209 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V);
1210 case Mips::SZ_B_PSEUDO:
1211 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B);
1212 case Mips::SZ_H_PSEUDO:
1213 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H);
1214 case Mips::SZ_W_PSEUDO:
1215 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W);
1216 case Mips::SZ_D_PSEUDO:
1217 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D);
1218 case Mips::SZ_V_PSEUDO:
1219 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V);
1220 case Mips::COPY_FW_PSEUDO:
1221 return emitCOPY_FW(MI, BB);
1222 case Mips::COPY_FD_PSEUDO:
1223 return emitCOPY_FD(MI, BB);
1224 case Mips::INSERT_FW_PSEUDO:
1225 return emitINSERT_FW(MI, BB);
1226 case Mips::INSERT_FD_PSEUDO:
1227 return emitINSERT_FD(MI, BB);
1228 case Mips::INSERT_B_VIDX_PSEUDO:
1229 case Mips::INSERT_B_VIDX64_PSEUDO:
1230 return emitINSERT_DF_VIDX(MI, BB, 1, false);
1231 case Mips::INSERT_H_VIDX_PSEUDO:
1232 case Mips::INSERT_H_VIDX64_PSEUDO:
1233 return emitINSERT_DF_VIDX(MI, BB, 2, false);
1234 case Mips::INSERT_W_VIDX_PSEUDO:
1235 case Mips::INSERT_W_VIDX64_PSEUDO:
1236 return emitINSERT_DF_VIDX(MI, BB, 4, false);
1237 case Mips::INSERT_D_VIDX_PSEUDO:
1238 case Mips::INSERT_D_VIDX64_PSEUDO:
1239 return emitINSERT_DF_VIDX(MI, BB, 8, false);
1240 case Mips::INSERT_FW_VIDX_PSEUDO:
1241 case Mips::INSERT_FW_VIDX64_PSEUDO:
1242 return emitINSERT_DF_VIDX(MI, BB, 4, true);
1243 case Mips::INSERT_FD_VIDX_PSEUDO:
1244 case Mips::INSERT_FD_VIDX64_PSEUDO:
1245 return emitINSERT_DF_VIDX(MI, BB, 8, true);
1246 case Mips::FILL_FW_PSEUDO:
1247 return emitFILL_FW(MI, BB);
1248 case Mips::FILL_FD_PSEUDO:
1249 return emitFILL_FD(MI, BB);
1250 case Mips::FEXP2_W_1_PSEUDO:
1251 return emitFEXP2_W_1(MI, BB);
1252 case Mips::FEXP2_D_1_PSEUDO:
1253 return emitFEXP2_D_1(MI, BB);
1254 case Mips::ST_F16:
1255 return emitST_F16_PSEUDO(MI, BB);
1256 case Mips::LD_F16:
1257 return emitLD_F16_PSEUDO(MI, BB);
1258 case Mips::MSA_FP_EXTEND_W_PSEUDO:
1259 return emitFPEXTEND_PSEUDO(MI, BB, false);
1260 case Mips::MSA_FP_ROUND_W_PSEUDO:
1261 return emitFPROUND_PSEUDO(MI, BB, false);
1262 case Mips::MSA_FP_EXTEND_D_PSEUDO:
1263 return emitFPEXTEND_PSEUDO(MI, BB, true);
1264 case Mips::MSA_FP_ROUND_D_PSEUDO:
1265 return emitFPROUND_PSEUDO(MI, BB, true);
1266 }
1267}
1268
1269bool MipsSETargetLowering::isEligibleForTailCallOptimization(
1270 const CCState &CCInfo, unsigned NextStackOffset,
1271 const MipsFunctionInfo &FI) const {
1272 // Exception has to be cleared with eret.
1273 if (FI.isISR())
1274 return false;
1275
1276 // Return false if either the callee or caller has a byval argument.
1277 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
1278 return false;
1279
1280 // Return true if the callee's argument area is no larger than the caller's.
1281 return NextStackOffset <= FI.getIncomingArgSize();
1282}
1283
1284void MipsSETargetLowering::
1285getOpndList(SmallVectorImpl<SDValue> &Ops,
1286 std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
1287 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
1288 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee,
1289 SDValue Chain) const {
1290 Ops.push_back(Callee);
1291 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal,
1292 InternalLinkage, IsCallReloc, CLI, Callee,
1293 Chain);
1294}
1295
1296SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1297 LoadSDNode &Nd = *cast<LoadSDNode>(Op);
1298
1299 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1300 return MipsTargetLowering::lowerLOAD(Op, DAG);
1301
1302 // Replace a double precision load with two i32 loads and a buildpair64.
1303 SDLoc DL(Op);
1304 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1305 EVT PtrVT = Ptr.getValueType();
1306
1307 // i32 load from lower address.
1308 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(),
1309 Nd.getAlign(), Nd.getMemOperand()->getFlags());
1310
1311 // i32 load from higher address.
1312 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1313 SDValue Hi = DAG.getLoad(
1314 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(),
1316
1317 if (!Subtarget.isLittle())
1318 std::swap(Lo, Hi);
1319
1320 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1321 SDValue Ops[2] = {BP, Hi.getValue(1)};
1322 return DAG.getMergeValues(Ops, DL);
1323}
1324
1325SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1326 StoreSDNode &Nd = *cast<StoreSDNode>(Op);
1327
1328 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore)
1330
1331 // Replace a double precision store with two extractelement64s and i32 stores.
1332 SDLoc DL(Op);
1333 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain();
1334 EVT PtrVT = Ptr.getValueType();
1335 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1336 Val, DAG.getConstant(0, DL, MVT::i32));
1337 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
1338 Val, DAG.getConstant(1, DL, MVT::i32));
1339
1340 if (!Subtarget.isLittle())
1341 std::swap(Lo, Hi);
1342
1343 // i32 store to lower address.
1344 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(),
1345 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1346
1347 // i32 store to higher address.
1348 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT));
1349 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(),
1350 commonAlignment(Nd.getAlign(), 4),
1351 Nd.getMemOperand()->getFlags(), Nd.getAAInfo());
1352}
1353
1354SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
1355 SelectionDAG &DAG) const {
1356 SDLoc DL(Op);
1357 MVT Src = Op.getOperand(0).getValueType().getSimpleVT();
1358 MVT Dest = Op.getValueType().getSimpleVT();
1359
1360 // Bitcast i64 to double.
1361 if (Src == MVT::i64 && Dest == MVT::f64) {
1362 SDValue Lo, Hi;
1363 std::tie(Lo, Hi) =
1364 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32);
1365 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
1366 }
1367
1368 // Bitcast double to i64.
1369 if (Src == MVT::f64 && Dest == MVT::i64) {
1370 // Skip lower bitcast when operand0 has converted float results to integer
1371 // which was done by function SoftenFloatResult.
1372 if (getTypeAction(*DAG.getContext(), Op.getOperand(0).getValueType()) ==
1374 return SDValue();
1375 SDValue Lo =
1376 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1377 DAG.getConstant(0, DL, MVT::i32));
1378 SDValue Hi =
1379 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
1380 DAG.getConstant(1, DL, MVT::i32));
1381 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1382 }
1383
1384 // Skip other cases of bitcast and use default lowering.
1385 return SDValue();
1386}
1387
1388SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
1389 bool HasLo, bool HasHi,
1390 SelectionDAG &DAG) const {
1391 // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
1392 assert(!Subtarget.hasMips32r6());
1393
1394 EVT Ty = Op.getOperand(0).getValueType();
1395 SDLoc DL(Op);
1396 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
1397 Op.getOperand(0), Op.getOperand(1));
1398 SDValue Lo, Hi;
1399
1400 if (HasLo)
1401 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult);
1402 if (HasHi)
1403 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult);
1404
1405 if (!HasLo || !HasHi)
1406 return HasLo ? Lo : Hi;
1407
1408 SDValue Vals[] = { Lo, Hi };
1409 return DAG.getMergeValues(Vals, DL);
1410}
1411
1413 SDValue InLo, InHi;
1414 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32);
1415 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
1416}
1417
1419 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op);
1420 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op);
1421 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
1422}
1423
1424// This function expands mips intrinsic nodes which have 64-bit input operands
1425// or output values.
1426//
1427// out64 = intrinsic-node in64
1428// =>
1429// lo = copy (extract-element (in64, 0))
1430// hi = copy (extract-element (in64, 1))
1431// mips-specific-node
1432// v0 = copy lo
1433// v1 = copy hi
1434// out64 = merge-values (v0, v1)
1435//
1437 SDLoc DL(Op);
1438 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other;
1440 unsigned OpNo = 0;
1441
1442 // See if Op has a chain input.
1443 if (HasChainIn)
1444 Ops.push_back(Op->getOperand(OpNo++));
1445
1446 // The next operand is the intrinsic opcode.
1447 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant);
1448
1449 // See if the next operand has type i64.
1450 SDValue Opnd = Op->getOperand(++OpNo), In64;
1451
1452 if (Opnd.getValueType() == MVT::i64)
1453 In64 = initAccumulator(Opnd, DL, DAG);
1454 else
1455 Ops.push_back(Opnd);
1456
1457 // Push the remaining operands.
1458 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo)
1459 Ops.push_back(Op->getOperand(OpNo));
1460
1461 // Add In64 to the end of the list.
1462 if (In64.getNode())
1463 Ops.push_back(In64);
1464
1465 // Scan output.
1466 SmallVector<EVT, 2> ResTys;
1467
1468 for (EVT Ty : Op->values())
1469 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty);
1470
1471 // Create node.
1472 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
1473 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
1474
1475 if (!HasChainIn)
1476 return Out;
1477
1478 assert(Val->getValueType(1) == MVT::Other);
1479 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
1480 return DAG.getMergeValues(Vals, DL);
1481}
1482
1483// Lower an MSA copy intrinsic into the specified SelectionDAG node
1485 SDLoc DL(Op);
1486 SDValue Vec = Op->getOperand(1);
1487 SDValue Idx = Op->getOperand(2);
1488 EVT ResTy = Op->getValueType(0);
1489 EVT EltTy = Vec->getValueType(0).getVectorElementType();
1490
1491 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx,
1492 DAG.getValueType(EltTy));
1493
1494 return Result;
1495}
1496
1497static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
1498 EVT ResVecTy = Op->getValueType(0);
1499 EVT ViaVecTy = ResVecTy;
1500 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1501 SDLoc DL(Op);
1502
1503 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and
1504 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating
1505 // lanes.
1506 SDValue LaneA = Op->getOperand(OpNr);
1507 SDValue LaneB;
1508
1509 if (ResVecTy == MVT::v2i64) {
1510 // In case of the index being passed as an immediate value, set the upper
1511 // lane to 0 so that the splati.d instruction can be matched.
1512 if (isa<ConstantSDNode>(LaneA))
1513 LaneB = DAG.getConstant(0, DL, MVT::i32);
1514 // Having the index passed in a register, set the upper lane to the same
1515 // value as the lower - this results in the BUILD_VECTOR node not being
1516 // expanded through stack. This way we are able to pattern match the set of
1517 // nodes created here to splat.d.
1518 else
1519 LaneB = LaneA;
1520 ViaVecTy = MVT::v4i32;
1521 if(BigEndian)
1522 std::swap(LaneA, LaneB);
1523 } else
1524 LaneB = LaneA;
1525
1526 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
1527 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
1528
1529 SDValue Result = DAG.getBuildVector(
1530 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1531
1532 if (ViaVecTy != ResVecTy) {
1533 SDValue One = DAG.getConstant(1, DL, ViaVecTy);
1534 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy,
1535 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One));
1536 }
1537
1538 return Result;
1539}
1540
1541static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG,
1542 bool IsSigned = false) {
1543 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1544 return DAG.getConstant(
1545 APInt(Op->getValueType(0).getScalarType().getSizeInBits(),
1546 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
1547 SDLoc(Op), Op->getValueType(0));
1548}
1549
1550static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
1551 bool BigEndian, SelectionDAG &DAG) {
1552 EVT ViaVecTy = VecTy;
1553 SDValue SplatValueA = SplatValue;
1554 SDValue SplatValueB = SplatValue;
1555 SDLoc DL(SplatValue);
1556
1557 if (VecTy == MVT::v2i64) {
1558 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's.
1559 ViaVecTy = MVT::v4i32;
1560
1561 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue);
1562 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue,
1563 DAG.getConstant(32, DL, MVT::i32));
1564 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB);
1565 }
1566
1567 // We currently hold the parts in little endian order. Swap them if
1568 // necessary.
1569 if (BigEndian)
1570 std::swap(SplatValueA, SplatValueB);
1571
1572 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1573 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1574 SplatValueA, SplatValueB, SplatValueA, SplatValueB,
1575 SplatValueA, SplatValueB, SplatValueA, SplatValueB };
1576
1577 SDValue Result = DAG.getBuildVector(
1578 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements()));
1579
1580 if (VecTy != ViaVecTy)
1581 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
1582
1583 return Result;
1584}
1585
1587 unsigned Opc, SDValue Imm,
1588 bool BigEndian) {
1589 EVT VecTy = Op->getValueType(0);
1590 SDValue Exp2Imm;
1591 SDLoc DL(Op);
1592
1593 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it
1594 // here for now.
1595 if (VecTy == MVT::v2i64) {
1596 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) {
1597 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue();
1598
1599 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL,
1600 MVT::i32);
1601 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32);
1602
1603 if (BigEndian)
1604 std::swap(BitImmLoOp, BitImmHiOp);
1605
1606 Exp2Imm = DAG.getNode(
1607 ISD::BITCAST, DL, MVT::v2i64,
1608 DAG.getBuildVector(MVT::v4i32, DL,
1609 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp}));
1610 }
1611 }
1612
1613 if (!Exp2Imm.getNode()) {
1614 // We couldnt constant fold, do a vector shift instead
1615
1616 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
1617 // only values 0-63 are valid.
1618 if (VecTy == MVT::v2i64)
1619 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm);
1620
1621 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG);
1622
1623 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy),
1624 Exp2Imm);
1625 }
1626
1627 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm);
1628}
1629
1631 SDLoc DL(Op);
1632 EVT ResTy = Op->getValueType(0);
1633 SDValue Vec = Op->getOperand(2);
1634 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian();
1635 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32;
1636 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1,
1637 DL, ResEltTy);
1638 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG);
1639
1640 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec);
1641}
1642
1644 EVT ResTy = Op->getValueType(0);
1645 SDLoc DL(Op);
1646 SDValue One = DAG.getConstant(1, DL, ResTy);
1647 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG));
1648
1649 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1),
1650 DAG.getNOT(DL, Bit, ResTy));
1651}
1652
1654 SDLoc DL(Op);
1655 EVT ResTy = Op->getValueType(0);
1656 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
1657 << Op->getConstantOperandAPInt(2);
1658 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
1659
1660 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask);
1661}
1662
1663SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1664 SelectionDAG &DAG) const {
1665 SDLoc DL(Op);
1666 unsigned Intrinsic = Op->getConstantOperandVal(0);
1667 switch (Intrinsic) {
1668 default:
1669 return SDValue();
1670 case Intrinsic::mips_shilo:
1671 return lowerDSPIntr(Op, DAG, MipsISD::SHILO);
1672 case Intrinsic::mips_dpau_h_qbl:
1673 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL);
1674 case Intrinsic::mips_dpau_h_qbr:
1675 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR);
1676 case Intrinsic::mips_dpsu_h_qbl:
1677 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL);
1678 case Intrinsic::mips_dpsu_h_qbr:
1679 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR);
1680 case Intrinsic::mips_dpa_w_ph:
1681 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH);
1682 case Intrinsic::mips_dps_w_ph:
1683 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH);
1684 case Intrinsic::mips_dpax_w_ph:
1685 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH);
1686 case Intrinsic::mips_dpsx_w_ph:
1687 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH);
1688 case Intrinsic::mips_mulsa_w_ph:
1689 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH);
1690 case Intrinsic::mips_mult:
1691 return lowerDSPIntr(Op, DAG, MipsISD::Mult);
1692 case Intrinsic::mips_multu:
1693 return lowerDSPIntr(Op, DAG, MipsISD::Multu);
1694 case Intrinsic::mips_madd:
1695 return lowerDSPIntr(Op, DAG, MipsISD::MAdd);
1696 case Intrinsic::mips_maddu:
1697 return lowerDSPIntr(Op, DAG, MipsISD::MAddu);
1698 case Intrinsic::mips_msub:
1699 return lowerDSPIntr(Op, DAG, MipsISD::MSub);
1700 case Intrinsic::mips_msubu:
1701 return lowerDSPIntr(Op, DAG, MipsISD::MSubu);
1702 case Intrinsic::mips_addv_b:
1703 case Intrinsic::mips_addv_h:
1704 case Intrinsic::mips_addv_w:
1705 case Intrinsic::mips_addv_d:
1706 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1707 Op->getOperand(2));
1708 case Intrinsic::mips_addvi_b:
1709 case Intrinsic::mips_addvi_h:
1710 case Intrinsic::mips_addvi_w:
1711 case Intrinsic::mips_addvi_d:
1712 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1),
1713 lowerMSASplatImm(Op, 2, DAG));
1714 case Intrinsic::mips_and_v:
1715 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1716 Op->getOperand(2));
1717 case Intrinsic::mips_andi_b:
1718 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1),
1719 lowerMSASplatImm(Op, 2, DAG));
1720 case Intrinsic::mips_bclr_b:
1721 case Intrinsic::mips_bclr_h:
1722 case Intrinsic::mips_bclr_w:
1723 case Intrinsic::mips_bclr_d:
1724 return lowerMSABitClear(Op, DAG);
1725 case Intrinsic::mips_bclri_b:
1726 case Intrinsic::mips_bclri_h:
1727 case Intrinsic::mips_bclri_w:
1728 case Intrinsic::mips_bclri_d:
1729 return lowerMSABitClearImm(Op, DAG);
1730 case Intrinsic::mips_binsli_b:
1731 case Intrinsic::mips_binsli_h:
1732 case Intrinsic::mips_binsli_w:
1733 case Intrinsic::mips_binsli_d: {
1734 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear)
1735 EVT VecTy = Op->getValueType(0);
1736 EVT EltTy = VecTy.getVectorElementType();
1737 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1738 report_fatal_error("Immediate out of range");
1740 Op->getConstantOperandVal(3) + 1);
1741 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1742 DAG.getConstant(Mask, DL, VecTy, true),
1743 Op->getOperand(2), Op->getOperand(1));
1744 }
1745 case Intrinsic::mips_binsri_b:
1746 case Intrinsic::mips_binsri_h:
1747 case Intrinsic::mips_binsri_w:
1748 case Intrinsic::mips_binsri_d: {
1749 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear)
1750 EVT VecTy = Op->getValueType(0);
1751 EVT EltTy = VecTy.getVectorElementType();
1752 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits())
1753 report_fatal_error("Immediate out of range");
1754 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(),
1755 Op->getConstantOperandVal(3) + 1);
1756 return DAG.getNode(ISD::VSELECT, DL, VecTy,
1757 DAG.getConstant(Mask, DL, VecTy, true),
1758 Op->getOperand(2), Op->getOperand(1));
1759 }
1760 case Intrinsic::mips_bmnz_v:
1761 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1762 Op->getOperand(2), Op->getOperand(1));
1763 case Intrinsic::mips_bmnzi_b:
1764 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1765 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
1766 Op->getOperand(1));
1767 case Intrinsic::mips_bmz_v:
1768 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
1769 Op->getOperand(1), Op->getOperand(2));
1770 case Intrinsic::mips_bmzi_b:
1771 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1772 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
1773 Op->getOperand(2));
1774 case Intrinsic::mips_bneg_b:
1775 case Intrinsic::mips_bneg_h:
1776 case Intrinsic::mips_bneg_w:
1777 case Intrinsic::mips_bneg_d: {
1778 EVT VecTy = Op->getValueType(0);
1779 SDValue One = DAG.getConstant(1, DL, VecTy);
1780
1781 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1),
1782 DAG.getNode(ISD::SHL, DL, VecTy, One,
1783 truncateVecElts(Op, DAG)));
1784 }
1785 case Intrinsic::mips_bnegi_b:
1786 case Intrinsic::mips_bnegi_h:
1787 case Intrinsic::mips_bnegi_w:
1788 case Intrinsic::mips_bnegi_d:
1789 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2),
1790 !Subtarget.isLittle());
1791 case Intrinsic::mips_bnz_b:
1792 case Intrinsic::mips_bnz_h:
1793 case Intrinsic::mips_bnz_w:
1794 case Intrinsic::mips_bnz_d:
1795 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0),
1796 Op->getOperand(1));
1797 case Intrinsic::mips_bnz_v:
1798 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0),
1799 Op->getOperand(1));
1800 case Intrinsic::mips_bsel_v:
1801 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1802 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1803 Op->getOperand(1), Op->getOperand(3),
1804 Op->getOperand(2));
1805 case Intrinsic::mips_bseli_b:
1806 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear)
1807 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
1808 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG),
1809 Op->getOperand(2));
1810 case Intrinsic::mips_bset_b:
1811 case Intrinsic::mips_bset_h:
1812 case Intrinsic::mips_bset_w:
1813 case Intrinsic::mips_bset_d: {
1814 EVT VecTy = Op->getValueType(0);
1815 SDValue One = DAG.getConstant(1, DL, VecTy);
1816
1817 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1),
1818 DAG.getNode(ISD::SHL, DL, VecTy, One,
1819 truncateVecElts(Op, DAG)));
1820 }
1821 case Intrinsic::mips_bseti_b:
1822 case Intrinsic::mips_bseti_h:
1823 case Intrinsic::mips_bseti_w:
1824 case Intrinsic::mips_bseti_d:
1825 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2),
1826 !Subtarget.isLittle());
1827 case Intrinsic::mips_bz_b:
1828 case Intrinsic::mips_bz_h:
1829 case Intrinsic::mips_bz_w:
1830 case Intrinsic::mips_bz_d:
1831 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0),
1832 Op->getOperand(1));
1833 case Intrinsic::mips_bz_v:
1834 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0),
1835 Op->getOperand(1));
1836 case Intrinsic::mips_ceq_b:
1837 case Intrinsic::mips_ceq_h:
1838 case Intrinsic::mips_ceq_w:
1839 case Intrinsic::mips_ceq_d:
1840 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1841 Op->getOperand(2), ISD::SETEQ);
1842 case Intrinsic::mips_ceqi_b:
1843 case Intrinsic::mips_ceqi_h:
1844 case Intrinsic::mips_ceqi_w:
1845 case Intrinsic::mips_ceqi_d:
1846 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1847 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ);
1848 case Intrinsic::mips_cle_s_b:
1849 case Intrinsic::mips_cle_s_h:
1850 case Intrinsic::mips_cle_s_w:
1851 case Intrinsic::mips_cle_s_d:
1852 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1853 Op->getOperand(2), ISD::SETLE);
1854 case Intrinsic::mips_clei_s_b:
1855 case Intrinsic::mips_clei_s_h:
1856 case Intrinsic::mips_clei_s_w:
1857 case Intrinsic::mips_clei_s_d:
1858 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1859 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE);
1860 case Intrinsic::mips_cle_u_b:
1861 case Intrinsic::mips_cle_u_h:
1862 case Intrinsic::mips_cle_u_w:
1863 case Intrinsic::mips_cle_u_d:
1864 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1865 Op->getOperand(2), ISD::SETULE);
1866 case Intrinsic::mips_clei_u_b:
1867 case Intrinsic::mips_clei_u_h:
1868 case Intrinsic::mips_clei_u_w:
1869 case Intrinsic::mips_clei_u_d:
1870 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1871 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE);
1872 case Intrinsic::mips_clt_s_b:
1873 case Intrinsic::mips_clt_s_h:
1874 case Intrinsic::mips_clt_s_w:
1875 case Intrinsic::mips_clt_s_d:
1876 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1877 Op->getOperand(2), ISD::SETLT);
1878 case Intrinsic::mips_clti_s_b:
1879 case Intrinsic::mips_clti_s_h:
1880 case Intrinsic::mips_clti_s_w:
1881 case Intrinsic::mips_clti_s_d:
1882 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1883 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT);
1884 case Intrinsic::mips_clt_u_b:
1885 case Intrinsic::mips_clt_u_h:
1886 case Intrinsic::mips_clt_u_w:
1887 case Intrinsic::mips_clt_u_d:
1888 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1889 Op->getOperand(2), ISD::SETULT);
1890 case Intrinsic::mips_clti_u_b:
1891 case Intrinsic::mips_clti_u_h:
1892 case Intrinsic::mips_clti_u_w:
1893 case Intrinsic::mips_clti_u_d:
1894 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1895 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT);
1896 case Intrinsic::mips_copy_s_b:
1897 case Intrinsic::mips_copy_s_h:
1898 case Intrinsic::mips_copy_s_w:
1899 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1900 case Intrinsic::mips_copy_s_d:
1901 if (Subtarget.hasMips64())
1902 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
1903 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
1904 else {
1905 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1906 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1907 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1908 Op->getValueType(0), Op->getOperand(1),
1909 Op->getOperand(2));
1910 }
1911 case Intrinsic::mips_copy_u_b:
1912 case Intrinsic::mips_copy_u_h:
1913 case Intrinsic::mips_copy_u_w:
1914 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1915 case Intrinsic::mips_copy_u_d:
1916 if (Subtarget.hasMips64())
1917 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
1918 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
1919 else {
1920 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type
1921 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out.
1922 // Note: When i64 is illegal, this results in copy_s.w instructions
1923 // instead of copy_u.w instructions. This makes no difference to the
1924 // behaviour since i64 is only illegal when the register file is 32-bit.
1925 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
1926 Op->getValueType(0), Op->getOperand(1),
1927 Op->getOperand(2));
1928 }
1929 case Intrinsic::mips_div_s_b:
1930 case Intrinsic::mips_div_s_h:
1931 case Intrinsic::mips_div_s_w:
1932 case Intrinsic::mips_div_s_d:
1933 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1),
1934 Op->getOperand(2));
1935 case Intrinsic::mips_div_u_b:
1936 case Intrinsic::mips_div_u_h:
1937 case Intrinsic::mips_div_u_w:
1938 case Intrinsic::mips_div_u_d:
1939 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
1940 Op->getOperand(2));
1941 case Intrinsic::mips_fadd_w:
1942 case Intrinsic::mips_fadd_d:
1943 // TODO: If intrinsics have fast-math-flags, propagate them.
1944 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
1945 Op->getOperand(2));
1946 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
1947 case Intrinsic::mips_fceq_w:
1948 case Intrinsic::mips_fceq_d:
1949 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1950 Op->getOperand(2), ISD::SETOEQ);
1951 case Intrinsic::mips_fcle_w:
1952 case Intrinsic::mips_fcle_d:
1953 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1954 Op->getOperand(2), ISD::SETOLE);
1955 case Intrinsic::mips_fclt_w:
1956 case Intrinsic::mips_fclt_d:
1957 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1958 Op->getOperand(2), ISD::SETOLT);
1959 case Intrinsic::mips_fcne_w:
1960 case Intrinsic::mips_fcne_d:
1961 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1962 Op->getOperand(2), ISD::SETONE);
1963 case Intrinsic::mips_fcor_w:
1964 case Intrinsic::mips_fcor_d:
1965 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1966 Op->getOperand(2), ISD::SETO);
1967 case Intrinsic::mips_fcueq_w:
1968 case Intrinsic::mips_fcueq_d:
1969 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1970 Op->getOperand(2), ISD::SETUEQ);
1971 case Intrinsic::mips_fcule_w:
1972 case Intrinsic::mips_fcule_d:
1973 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1974 Op->getOperand(2), ISD::SETULE);
1975 case Intrinsic::mips_fcult_w:
1976 case Intrinsic::mips_fcult_d:
1977 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1978 Op->getOperand(2), ISD::SETULT);
1979 case Intrinsic::mips_fcun_w:
1980 case Intrinsic::mips_fcun_d:
1981 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1982 Op->getOperand(2), ISD::SETUO);
1983 case Intrinsic::mips_fcune_w:
1984 case Intrinsic::mips_fcune_d:
1985 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
1986 Op->getOperand(2), ISD::SETUNE);
1987 case Intrinsic::mips_fdiv_w:
1988 case Intrinsic::mips_fdiv_d:
1989 // TODO: If intrinsics have fast-math-flags, propagate them.
1990 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
1991 Op->getOperand(2));
1992 case Intrinsic::mips_ffint_u_w:
1993 case Intrinsic::mips_ffint_u_d:
1994 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
1995 Op->getOperand(1));
1996 case Intrinsic::mips_ffint_s_w:
1997 case Intrinsic::mips_ffint_s_d:
1998 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0),
1999 Op->getOperand(1));
2000 case Intrinsic::mips_fill_b:
2001 case Intrinsic::mips_fill_h:
2002 case Intrinsic::mips_fill_w:
2003 case Intrinsic::mips_fill_d: {
2004 EVT ResTy = Op->getValueType(0);
2006 Op->getOperand(1));
2007
2008 // If ResTy is v2i64 then the type legalizer will break this node down into
2009 // an equivalent v4i32.
2010 return DAG.getBuildVector(ResTy, DL, Ops);
2011 }
2012 case Intrinsic::mips_fexp2_w:
2013 case Intrinsic::mips_fexp2_d: {
2014 // TODO: If intrinsics have fast-math-flags, propagate them.
2015 EVT ResTy = Op->getValueType(0);
2016 return DAG.getNode(
2017 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
2018 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2)));
2019 }
2020 case Intrinsic::mips_flog2_w:
2021 case Intrinsic::mips_flog2_d:
2022 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1));
2023 case Intrinsic::mips_fmadd_w:
2024 case Intrinsic::mips_fmadd_d:
2025 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
2026 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2027 case Intrinsic::mips_fmul_w:
2028 case Intrinsic::mips_fmul_d:
2029 // TODO: If intrinsics have fast-math-flags, propagate them.
2030 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
2031 Op->getOperand(2));
2032 case Intrinsic::mips_fmsub_w:
2033 case Intrinsic::mips_fmsub_d: {
2034 // TODO: If intrinsics have fast-math-flags, propagate them.
2035 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0),
2036 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2037 }
2038 case Intrinsic::mips_frint_w:
2039 case Intrinsic::mips_frint_d:
2040 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1));
2041 case Intrinsic::mips_fsqrt_w:
2042 case Intrinsic::mips_fsqrt_d:
2043 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
2044 case Intrinsic::mips_fsub_w:
2045 case Intrinsic::mips_fsub_d:
2046 // TODO: If intrinsics have fast-math-flags, propagate them.
2047 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
2048 Op->getOperand(2));
2049 case Intrinsic::mips_ftrunc_u_w:
2050 case Intrinsic::mips_ftrunc_u_d:
2051 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
2052 Op->getOperand(1));
2053 case Intrinsic::mips_ftrunc_s_w:
2054 case Intrinsic::mips_ftrunc_s_d:
2055 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0),
2056 Op->getOperand(1));
2057 case Intrinsic::mips_ilvev_b:
2058 case Intrinsic::mips_ilvev_h:
2059 case Intrinsic::mips_ilvev_w:
2060 case Intrinsic::mips_ilvev_d:
2061 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0),
2062 Op->getOperand(1), Op->getOperand(2));
2063 case Intrinsic::mips_ilvl_b:
2064 case Intrinsic::mips_ilvl_h:
2065 case Intrinsic::mips_ilvl_w:
2066 case Intrinsic::mips_ilvl_d:
2067 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0),
2068 Op->getOperand(1), Op->getOperand(2));
2069 case Intrinsic::mips_ilvod_b:
2070 case Intrinsic::mips_ilvod_h:
2071 case Intrinsic::mips_ilvod_w:
2072 case Intrinsic::mips_ilvod_d:
2073 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0),
2074 Op->getOperand(1), Op->getOperand(2));
2075 case Intrinsic::mips_ilvr_b:
2076 case Intrinsic::mips_ilvr_h:
2077 case Intrinsic::mips_ilvr_w:
2078 case Intrinsic::mips_ilvr_d:
2079 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0),
2080 Op->getOperand(1), Op->getOperand(2));
2081 case Intrinsic::mips_insert_b:
2082 case Intrinsic::mips_insert_h:
2083 case Intrinsic::mips_insert_w:
2084 case Intrinsic::mips_insert_d:
2085 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0),
2086 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2));
2087 case Intrinsic::mips_insve_b:
2088 case Intrinsic::mips_insve_h:
2089 case Intrinsic::mips_insve_w:
2090 case Intrinsic::mips_insve_d: {
2091 // Report an error for out of range values.
2092 int64_t Max;
2093 switch (Intrinsic) {
2094 case Intrinsic::mips_insve_b: Max = 15; break;
2095 case Intrinsic::mips_insve_h: Max = 7; break;
2096 case Intrinsic::mips_insve_w: Max = 3; break;
2097 case Intrinsic::mips_insve_d: Max = 1; break;
2098 default: llvm_unreachable("Unmatched intrinsic");
2099 }
2100 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2101 if (Value < 0 || Value > Max)
2102 report_fatal_error("Immediate out of range");
2103 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0),
2104 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3),
2105 DAG.getConstant(0, DL, MVT::i32));
2106 }
2107 case Intrinsic::mips_ldi_b:
2108 case Intrinsic::mips_ldi_h:
2109 case Intrinsic::mips_ldi_w:
2110 case Intrinsic::mips_ldi_d:
2111 return lowerMSASplatImm(Op, 1, DAG, true);
2112 case Intrinsic::mips_lsa:
2113 case Intrinsic::mips_dlsa: {
2114 EVT ResTy = Op->getValueType(0);
2115 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
2116 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy,
2117 Op->getOperand(2), Op->getOperand(3)));
2118 }
2119 case Intrinsic::mips_maddv_b:
2120 case Intrinsic::mips_maddv_h:
2121 case Intrinsic::mips_maddv_w:
2122 case Intrinsic::mips_maddv_d: {
2123 EVT ResTy = Op->getValueType(0);
2124 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1),
2125 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2126 Op->getOperand(2), Op->getOperand(3)));
2127 }
2128 case Intrinsic::mips_max_s_b:
2129 case Intrinsic::mips_max_s_h:
2130 case Intrinsic::mips_max_s_w:
2131 case Intrinsic::mips_max_s_d:
2132 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2133 Op->getOperand(1), Op->getOperand(2));
2134 case Intrinsic::mips_max_u_b:
2135 case Intrinsic::mips_max_u_h:
2136 case Intrinsic::mips_max_u_w:
2137 case Intrinsic::mips_max_u_d:
2138 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2139 Op->getOperand(1), Op->getOperand(2));
2140 case Intrinsic::mips_maxi_s_b:
2141 case Intrinsic::mips_maxi_s_h:
2142 case Intrinsic::mips_maxi_s_w:
2143 case Intrinsic::mips_maxi_s_d:
2144 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0),
2145 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2146 case Intrinsic::mips_maxi_u_b:
2147 case Intrinsic::mips_maxi_u_h:
2148 case Intrinsic::mips_maxi_u_w:
2149 case Intrinsic::mips_maxi_u_d:
2150 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0),
2151 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2152 case Intrinsic::mips_min_s_b:
2153 case Intrinsic::mips_min_s_h:
2154 case Intrinsic::mips_min_s_w:
2155 case Intrinsic::mips_min_s_d:
2156 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2157 Op->getOperand(1), Op->getOperand(2));
2158 case Intrinsic::mips_min_u_b:
2159 case Intrinsic::mips_min_u_h:
2160 case Intrinsic::mips_min_u_w:
2161 case Intrinsic::mips_min_u_d:
2162 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2163 Op->getOperand(1), Op->getOperand(2));
2164 case Intrinsic::mips_mini_s_b:
2165 case Intrinsic::mips_mini_s_h:
2166 case Intrinsic::mips_mini_s_w:
2167 case Intrinsic::mips_mini_s_d:
2168 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0),
2169 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true));
2170 case Intrinsic::mips_mini_u_b:
2171 case Intrinsic::mips_mini_u_h:
2172 case Intrinsic::mips_mini_u_w:
2173 case Intrinsic::mips_mini_u_d:
2174 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0),
2175 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2176 case Intrinsic::mips_mod_s_b:
2177 case Intrinsic::mips_mod_s_h:
2178 case Intrinsic::mips_mod_s_w:
2179 case Intrinsic::mips_mod_s_d:
2180 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1),
2181 Op->getOperand(2));
2182 case Intrinsic::mips_mod_u_b:
2183 case Intrinsic::mips_mod_u_h:
2184 case Intrinsic::mips_mod_u_w:
2185 case Intrinsic::mips_mod_u_d:
2186 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1),
2187 Op->getOperand(2));
2188 case Intrinsic::mips_mulv_b:
2189 case Intrinsic::mips_mulv_h:
2190 case Intrinsic::mips_mulv_w:
2191 case Intrinsic::mips_mulv_d:
2192 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1),
2193 Op->getOperand(2));
2194 case Intrinsic::mips_msubv_b:
2195 case Intrinsic::mips_msubv_h:
2196 case Intrinsic::mips_msubv_w:
2197 case Intrinsic::mips_msubv_d: {
2198 EVT ResTy = Op->getValueType(0);
2199 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1),
2200 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy,
2201 Op->getOperand(2), Op->getOperand(3)));
2202 }
2203 case Intrinsic::mips_nlzc_b:
2204 case Intrinsic::mips_nlzc_h:
2205 case Intrinsic::mips_nlzc_w:
2206 case Intrinsic::mips_nlzc_d:
2207 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1));
2208 case Intrinsic::mips_nor_v: {
2209 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2210 Op->getOperand(1), Op->getOperand(2));
2211 return DAG.getNOT(DL, Res, Res->getValueType(0));
2212 }
2213 case Intrinsic::mips_nori_b: {
2214 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2215 Op->getOperand(1),
2216 lowerMSASplatImm(Op, 2, DAG));
2217 return DAG.getNOT(DL, Res, Res->getValueType(0));
2218 }
2219 case Intrinsic::mips_or_v:
2220 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1),
2221 Op->getOperand(2));
2222 case Intrinsic::mips_ori_b:
2223 return DAG.getNode(ISD::OR, DL, Op->getValueType(0),
2224 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2225 case Intrinsic::mips_pckev_b:
2226 case Intrinsic::mips_pckev_h:
2227 case Intrinsic::mips_pckev_w:
2228 case Intrinsic::mips_pckev_d:
2229 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0),
2230 Op->getOperand(1), Op->getOperand(2));
2231 case Intrinsic::mips_pckod_b:
2232 case Intrinsic::mips_pckod_h:
2233 case Intrinsic::mips_pckod_w:
2234 case Intrinsic::mips_pckod_d:
2235 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0),
2236 Op->getOperand(1), Op->getOperand(2));
2237 case Intrinsic::mips_pcnt_b:
2238 case Intrinsic::mips_pcnt_h:
2239 case Intrinsic::mips_pcnt_w:
2240 case Intrinsic::mips_pcnt_d:
2241 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1));
2242 case Intrinsic::mips_sat_s_b:
2243 case Intrinsic::mips_sat_s_h:
2244 case Intrinsic::mips_sat_s_w:
2245 case Intrinsic::mips_sat_s_d:
2246 case Intrinsic::mips_sat_u_b:
2247 case Intrinsic::mips_sat_u_h:
2248 case Intrinsic::mips_sat_u_w:
2249 case Intrinsic::mips_sat_u_d: {
2250 // Report an error for out of range values.
2251 int64_t Max;
2252 switch (Intrinsic) {
2253 case Intrinsic::mips_sat_s_b:
2254 case Intrinsic::mips_sat_u_b: Max = 7; break;
2255 case Intrinsic::mips_sat_s_h:
2256 case Intrinsic::mips_sat_u_h: Max = 15; break;
2257 case Intrinsic::mips_sat_s_w:
2258 case Intrinsic::mips_sat_u_w: Max = 31; break;
2259 case Intrinsic::mips_sat_s_d:
2260 case Intrinsic::mips_sat_u_d: Max = 63; break;
2261 default: llvm_unreachable("Unmatched intrinsic");
2262 }
2263 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2264 if (Value < 0 || Value > Max)
2265 report_fatal_error("Immediate out of range");
2266 return SDValue();
2267 }
2268 case Intrinsic::mips_shf_b:
2269 case Intrinsic::mips_shf_h:
2270 case Intrinsic::mips_shf_w: {
2271 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2272 if (Value < 0 || Value > 255)
2273 report_fatal_error("Immediate out of range");
2274 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0),
2275 Op->getOperand(2), Op->getOperand(1));
2276 }
2277 case Intrinsic::mips_sldi_b:
2278 case Intrinsic::mips_sldi_h:
2279 case Intrinsic::mips_sldi_w:
2280 case Intrinsic::mips_sldi_d: {
2281 // Report an error for out of range values.
2282 int64_t Max;
2283 switch (Intrinsic) {
2284 case Intrinsic::mips_sldi_b: Max = 15; break;
2285 case Intrinsic::mips_sldi_h: Max = 7; break;
2286 case Intrinsic::mips_sldi_w: Max = 3; break;
2287 case Intrinsic::mips_sldi_d: Max = 1; break;
2288 default: llvm_unreachable("Unmatched intrinsic");
2289 }
2290 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue();
2291 if (Value < 0 || Value > Max)
2292 report_fatal_error("Immediate out of range");
2293 return SDValue();
2294 }
2295 case Intrinsic::mips_sll_b:
2296 case Intrinsic::mips_sll_h:
2297 case Intrinsic::mips_sll_w:
2298 case Intrinsic::mips_sll_d:
2299 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1),
2300 truncateVecElts(Op, DAG));
2301 case Intrinsic::mips_slli_b:
2302 case Intrinsic::mips_slli_h:
2303 case Intrinsic::mips_slli_w:
2304 case Intrinsic::mips_slli_d:
2305 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0),
2306 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2307 case Intrinsic::mips_splat_b:
2308 case Intrinsic::mips_splat_h:
2309 case Intrinsic::mips_splat_w:
2310 case Intrinsic::mips_splat_d:
2311 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle
2312 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because
2313 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32.
2314 // Instead we lower to MipsISD::VSHF and match from there.
2315 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2316 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1),
2317 Op->getOperand(1));
2318 case Intrinsic::mips_splati_b:
2319 case Intrinsic::mips_splati_h:
2320 case Intrinsic::mips_splati_w:
2321 case Intrinsic::mips_splati_d:
2322 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2323 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1),
2324 Op->getOperand(1));
2325 case Intrinsic::mips_sra_b:
2326 case Intrinsic::mips_sra_h:
2327 case Intrinsic::mips_sra_w:
2328 case Intrinsic::mips_sra_d:
2329 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1),
2330 truncateVecElts(Op, DAG));
2331 case Intrinsic::mips_srai_b:
2332 case Intrinsic::mips_srai_h:
2333 case Intrinsic::mips_srai_w:
2334 case Intrinsic::mips_srai_d:
2335 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0),
2336 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2337 case Intrinsic::mips_srari_b:
2338 case Intrinsic::mips_srari_h:
2339 case Intrinsic::mips_srari_w:
2340 case Intrinsic::mips_srari_d: {
2341 // Report an error for out of range values.
2342 int64_t Max;
2343 switch (Intrinsic) {
2344 case Intrinsic::mips_srari_b: Max = 7; break;
2345 case Intrinsic::mips_srari_h: Max = 15; break;
2346 case Intrinsic::mips_srari_w: Max = 31; break;
2347 case Intrinsic::mips_srari_d: Max = 63; break;
2348 default: llvm_unreachable("Unmatched intrinsic");
2349 }
2350 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2351 if (Value < 0 || Value > Max)
2352 report_fatal_error("Immediate out of range");
2353 return SDValue();
2354 }
2355 case Intrinsic::mips_srl_b:
2356 case Intrinsic::mips_srl_h:
2357 case Intrinsic::mips_srl_w:
2358 case Intrinsic::mips_srl_d:
2359 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1),
2360 truncateVecElts(Op, DAG));
2361 case Intrinsic::mips_srli_b:
2362 case Intrinsic::mips_srli_h:
2363 case Intrinsic::mips_srli_w:
2364 case Intrinsic::mips_srli_d:
2365 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0),
2366 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2367 case Intrinsic::mips_srlri_b:
2368 case Intrinsic::mips_srlri_h:
2369 case Intrinsic::mips_srlri_w:
2370 case Intrinsic::mips_srlri_d: {
2371 // Report an error for out of range values.
2372 int64_t Max;
2373 switch (Intrinsic) {
2374 case Intrinsic::mips_srlri_b: Max = 7; break;
2375 case Intrinsic::mips_srlri_h: Max = 15; break;
2376 case Intrinsic::mips_srlri_w: Max = 31; break;
2377 case Intrinsic::mips_srlri_d: Max = 63; break;
2378 default: llvm_unreachable("Unmatched intrinsic");
2379 }
2380 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue();
2381 if (Value < 0 || Value > Max)
2382 report_fatal_error("Immediate out of range");
2383 return SDValue();
2384 }
2385 case Intrinsic::mips_subv_b:
2386 case Intrinsic::mips_subv_h:
2387 case Intrinsic::mips_subv_w:
2388 case Intrinsic::mips_subv_d:
2389 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1),
2390 Op->getOperand(2));
2391 case Intrinsic::mips_subvi_b:
2392 case Intrinsic::mips_subvi_h:
2393 case Intrinsic::mips_subvi_w:
2394 case Intrinsic::mips_subvi_d:
2395 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0),
2396 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2397 case Intrinsic::mips_vshf_b:
2398 case Intrinsic::mips_vshf_h:
2399 case Intrinsic::mips_vshf_w:
2400 case Intrinsic::mips_vshf_d:
2401 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0),
2402 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
2403 case Intrinsic::mips_xor_v:
2404 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1),
2405 Op->getOperand(2));
2406 case Intrinsic::mips_xori_b:
2407 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0),
2408 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG));
2409 case Intrinsic::thread_pointer: {
2410 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2411 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT);
2412 }
2413 }
2414}
2415
2416static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr,
2417 const MipsSubtarget &Subtarget) {
2418 SDLoc DL(Op);
2419 SDValue ChainIn = Op->getOperand(0);
2420 SDValue Address = Op->getOperand(2);
2421 SDValue Offset = Op->getOperand(3);
2422 EVT ResTy = Op->getValueType(0);
2423 EVT PtrTy = Address->getValueType(0);
2424
2425 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2426 // however takes an i32 signed constant offset. The actual type of the
2427 // intrinsic is a scaled signed i10.
2428 if (Subtarget.isABI_N64())
2429 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2430
2431 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2432 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(),
2433 Align(16));
2434}
2435
2436SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2437 SelectionDAG &DAG) const {
2438 unsigned Intr = Op->getConstantOperandVal(1);
2439 switch (Intr) {
2440 default:
2441 return SDValue();
2442 case Intrinsic::mips_extp:
2443 return lowerDSPIntr(Op, DAG, MipsISD::EXTP);
2444 case Intrinsic::mips_extpdp:
2445 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP);
2446 case Intrinsic::mips_extr_w:
2447 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W);
2448 case Intrinsic::mips_extr_r_w:
2449 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W);
2450 case Intrinsic::mips_extr_rs_w:
2451 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W);
2452 case Intrinsic::mips_extr_s_h:
2453 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H);
2454 case Intrinsic::mips_mthlip:
2455 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP);
2456 case Intrinsic::mips_mulsaq_s_w_ph:
2457 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH);
2458 case Intrinsic::mips_maq_s_w_phl:
2459 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL);
2460 case Intrinsic::mips_maq_s_w_phr:
2461 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR);
2462 case Intrinsic::mips_maq_sa_w_phl:
2463 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL);
2464 case Intrinsic::mips_maq_sa_w_phr:
2465 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR);
2466 case Intrinsic::mips_dpaq_s_w_ph:
2467 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH);
2468 case Intrinsic::mips_dpsq_s_w_ph:
2469 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH);
2470 case Intrinsic::mips_dpaq_sa_l_w:
2471 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W);
2472 case Intrinsic::mips_dpsq_sa_l_w:
2473 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W);
2474 case Intrinsic::mips_dpaqx_s_w_ph:
2475 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH);
2476 case Intrinsic::mips_dpaqx_sa_w_ph:
2477 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH);
2478 case Intrinsic::mips_dpsqx_s_w_ph:
2479 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH);
2480 case Intrinsic::mips_dpsqx_sa_w_ph:
2481 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH);
2482 case Intrinsic::mips_ld_b:
2483 case Intrinsic::mips_ld_h:
2484 case Intrinsic::mips_ld_w:
2485 case Intrinsic::mips_ld_d:
2486 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget);
2487 }
2488}
2489
2491 const MipsSubtarget &Subtarget) {
2492 SDLoc DL(Op);
2493 SDValue ChainIn = Op->getOperand(0);
2494 SDValue Value = Op->getOperand(2);
2495 SDValue Address = Op->getOperand(3);
2496 SDValue Offset = Op->getOperand(4);
2497 EVT PtrTy = Address->getValueType(0);
2498
2499 // For N64 addresses have the underlying type MVT::i64. This intrinsic
2500 // however takes an i32 signed constant offset. The actual type of the
2501 // intrinsic is a scaled signed i10.
2502 if (Subtarget.isABI_N64())
2503 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset);
2504
2505 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset);
2506
2507 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(),
2508 Align(16));
2509}
2510
2511SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2512 SelectionDAG &DAG) const {
2513 unsigned Intr = Op->getConstantOperandVal(1);
2514 switch (Intr) {
2515 default:
2516 return SDValue();
2517 case Intrinsic::mips_st_b:
2518 case Intrinsic::mips_st_h:
2519 case Intrinsic::mips_st_w:
2520 case Intrinsic::mips_st_d:
2521 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget);
2522 }
2523}
2524
2525// Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT.
2526//
2527// The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We
2528// choose to sign-extend but we could have equally chosen zero-extend. The
2529// DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT
2530// result into this node later (possibly changing it to a zero-extend in the
2531// process).
2532SDValue MipsSETargetLowering::
2533lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
2534 SDLoc DL(Op);
2535 EVT ResTy = Op->getValueType(0);
2536 SDValue Op0 = Op->getOperand(0);
2537 EVT VecTy = Op0->getValueType(0);
2538
2539 if (!VecTy.is128BitVector())
2540 return SDValue();
2541
2542 if (ResTy.isInteger()) {
2543 SDValue Op1 = Op->getOperand(1);
2544 EVT EltTy = VecTy.getVectorElementType();
2545 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1,
2546 DAG.getValueType(EltTy));
2547 }
2548
2549 return Op;
2550}
2551
2552static bool isConstantOrUndef(const SDValue Op) {
2553 if (Op->isUndef())
2554 return true;
2556 return true;
2558 return true;
2559 return false;
2560}
2561
2563 for (unsigned i = 0; i < Op->getNumOperands(); ++i)
2564 if (isConstantOrUndef(Op->getOperand(i)))
2565 return true;
2566 return false;
2567}
2568
2569// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the
2570// backend.
2571//
2572// Lowers according to the following rules:
2573// - Constant splats are legal as-is as long as the SplatBitSize is a power of
2574// 2 less than or equal to 64 and the value fits into a signed 10-bit
2575// immediate
2576// - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize
2577// is a power of 2 less than or equal to 64 and the value does not fit into a
2578// signed 10-bit immediate
2579// - Non-constant splats are legal as-is.
2580// - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT.
2581// - All others are illegal and must be expanded.
2582SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op,
2583 SelectionDAG &DAG) const {
2584 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
2585 EVT ResTy = Op->getValueType(0);
2586 SDLoc DL(Op);
2587 APInt SplatValue, SplatUndef;
2588 unsigned SplatBitSize;
2589 bool HasAnyUndefs;
2590
2591 if (!Subtarget.hasMSA() || !ResTy.is128BitVector())
2592 return SDValue();
2593
2594 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2595 HasAnyUndefs, 8,
2596 !Subtarget.isLittle()) && SplatBitSize <= 64) {
2597 // We can only cope with 8, 16, 32, or 64-bit elements
2598 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
2599 SplatBitSize != 64)
2600 return SDValue();
2601
2602 // If the value isn't an integer type we will have to bitcast
2603 // from an integer type first. Also, if there are any undefs, we must
2604 // lower them to defined values first.
2605 if (ResTy.isInteger() && !HasAnyUndefs)
2606 return Op;
2607
2608 EVT ViaVecTy;
2609
2610 switch (SplatBitSize) {
2611 default:
2612 return SDValue();
2613 case 8:
2614 ViaVecTy = MVT::v16i8;
2615 break;
2616 case 16:
2617 ViaVecTy = MVT::v8i16;
2618 break;
2619 case 32:
2620 ViaVecTy = MVT::v4i32;
2621 break;
2622 case 64:
2623 // There's no fill.d to fall back on for 64-bit values
2624 return SDValue();
2625 }
2626
2627 // SelectionDAG::getConstant will promote SplatValue appropriately.
2628 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
2629
2630 // Bitcast to the type we originally wanted
2631 if (ViaVecTy != ResTy)
2632 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
2633
2634 return Result;
2635 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false))
2636 return Op;
2637 else if (!isConstantOrUndefBUILD_VECTOR(Node)) {
2638 // Use INSERT_VECTOR_ELT operations rather than expand to stores.
2639 // The resulting code is the same length as the expansion, but it doesn't
2640 // use memory operations
2641 EVT ResTy = Node->getValueType(0);
2642
2643 assert(ResTy.isVector());
2644
2645 unsigned NumElts = ResTy.getVectorNumElements();
2646 SDValue Vector = DAG.getUNDEF(ResTy);
2647 for (unsigned i = 0; i < NumElts; ++i) {
2649 Node->getOperand(i),
2650 DAG.getConstant(i, DL, MVT::i32));
2651 }
2652 return Vector;
2653 }
2654
2655 return SDValue();
2656}
2657
2658// Lower VECTOR_SHUFFLE into SHF (if possible).
2659//
2660// SHF splits the vector into blocks of four elements, then shuffles these
2661// elements according to a <4 x i2> constant (encoded as an integer immediate).
2662//
2663// It is therefore possible to lower into SHF when the mask takes the form:
2664// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
2665// When undef's appear they are treated as if they were whatever value is
2666// necessary in order to fit the above forms.
2667//
2668// For example:
2669// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
2670// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
2671// i32 7, i32 6, i32 5, i32 4>
2672// is lowered to:
2673// (SHF_H $w0, $w1, 27)
2674// where the 27 comes from:
2675// 3 + (2 << 2) + (1 << 4) + (0 << 6)
2677 SmallVector<int, 16> Indices,
2678 SelectionDAG &DAG) {
2679 int SHFIndices[4] = { -1, -1, -1, -1 };
2680
2681 if (Indices.size() < 4)
2682 return SDValue();
2683
2684 for (unsigned i = 0; i < 4; ++i) {
2685 for (unsigned j = i; j < Indices.size(); j += 4) {
2686 int Idx = Indices[j];
2687
2688 // Convert from vector index to 4-element subvector index
2689 // If an index refers to an element outside of the subvector then give up
2690 if (Idx != -1) {
2691 Idx -= 4 * (j / 4);
2692 if (Idx < 0 || Idx >= 4)
2693 return SDValue();
2694 }
2695
2696 // If the mask has an undef, replace it with the current index.
2697 // Note that it might still be undef if the current index is also undef
2698 if (SHFIndices[i] == -1)
2699 SHFIndices[i] = Idx;
2700
2701 // Check that non-undef values are the same as in the mask. If they
2702 // aren't then give up
2703 if (!(Idx == -1 || Idx == SHFIndices[i]))
2704 return SDValue();
2705 }
2706 }
2707
2708 // Calculate the immediate. Replace any remaining undefs with zero
2709 APInt Imm(32, 0);
2710 for (int i = 3; i >= 0; --i) {
2711 int Idx = SHFIndices[i];
2712
2713 if (Idx == -1)
2714 Idx = 0;
2715
2716 Imm <<= 2;
2717 Imm |= Idx & 0x3;
2718 }
2719
2720 SDLoc DL(Op);
2721 return DAG.getNode(MipsISD::SHF, DL, ResTy,
2722 DAG.getTargetConstant(Imm, DL, MVT::i32),
2723 Op->getOperand(0));
2724}
2725
2726/// Determine whether a range fits a regular pattern of values.
2727/// This function accounts for the possibility of jumping over the End iterator.
2728template <typename ValType>
2729static bool
2731 unsigned CheckStride,
2733 ValType ExpectedIndex, unsigned ExpectedIndexStride) {
2734 auto &I = Begin;
2735
2736 while (I != End) {
2737 if (*I != -1 && *I != ExpectedIndex)
2738 return false;
2739 ExpectedIndex += ExpectedIndexStride;
2740
2741 // Incrementing past End is undefined behaviour so we must increment one
2742 // step at a time and check for End at each step.
2743 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
2744 ; // Empty loop body.
2745 }
2746 return true;
2747}
2748
2749// Determine whether VECTOR_SHUFFLE is a SPLATI.
2750//
2751// It is a SPLATI when the mask is:
2752// <x, x, x, ...>
2753// where x is any valid index.
2754//
2755// When undef's appear in the mask they are treated as if they were whatever
2756// value is necessary in order to fit the above form.
2758 SmallVector<int, 16> Indices,
2759 SelectionDAG &DAG) {
2760 assert((Indices.size() % 2) == 0);
2761
2762 int SplatIndex = -1;
2763 for (const auto &V : Indices) {
2764 if (V != -1) {
2765 SplatIndex = V;
2766 break;
2767 }
2768 }
2769
2770 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex,
2771 0);
2772}
2773
2774// Lower VECTOR_SHUFFLE into ILVEV (if possible).
2775//
2776// ILVEV interleaves the even elements from each vector.
2777//
2778// It is possible to lower into ILVEV when the mask consists of two of the
2779// following forms interleaved:
2780// <0, 2, 4, ...>
2781// <n, n+2, n+4, ...>
2782// where n is the number of elements in the vector.
2783// For example:
2784// <0, 0, 2, 2, 4, 4, ...>
2785// <0, n, 2, n+2, 4, n+4, ...>
2786//
2787// When undef's appear in the mask they are treated as if they were whatever
2788// value is necessary in order to fit the above forms.
2790 SmallVector<int, 16> Indices,
2791 SelectionDAG &DAG) {
2792 assert((Indices.size() % 2) == 0);
2793
2794 SDValue Wt;
2795 SDValue Ws;
2796 const auto &Begin = Indices.begin();
2797 const auto &End = Indices.end();
2798
2799 // Check even elements are taken from the even elements of one half or the
2800 // other and pick an operand accordingly.
2801 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
2802 Wt = Op->getOperand(0);
2803 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2))
2804 Wt = Op->getOperand(1);
2805 else
2806 return SDValue();
2807
2808 // Check odd elements are taken from the even elements of one half or the
2809 // other and pick an operand accordingly.
2810 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
2811 Ws = Op->getOperand(0);
2812 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2))
2813 Ws = Op->getOperand(1);
2814 else
2815 return SDValue();
2816
2817 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt);
2818}
2819
2820// Lower VECTOR_SHUFFLE into ILVOD (if possible).
2821//
2822// ILVOD interleaves the odd elements from each vector.
2823//
2824// It is possible to lower into ILVOD when the mask consists of two of the
2825// following forms interleaved:
2826// <1, 3, 5, ...>
2827// <n+1, n+3, n+5, ...>
2828// where n is the number of elements in the vector.
2829// For example:
2830// <1, 1, 3, 3, 5, 5, ...>
2831// <1, n+1, 3, n+3, 5, n+5, ...>
2832//
2833// When undef's appear in the mask they are treated as if they were whatever
2834// value is necessary in order to fit the above forms.
2836 SmallVector<int, 16> Indices,
2837 SelectionDAG &DAG) {
2838 assert((Indices.size() % 2) == 0);
2839
2840 SDValue Wt;
2841 SDValue Ws;
2842 const auto &Begin = Indices.begin();
2843 const auto &End = Indices.end();
2844
2845 // Check even elements are taken from the odd elements of one half or the
2846 // other and pick an operand accordingly.
2847 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
2848 Wt = Op->getOperand(0);
2849 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2))
2850 Wt = Op->getOperand(1);
2851 else
2852 return SDValue();
2853
2854 // Check odd elements are taken from the odd elements of one half or the
2855 // other and pick an operand accordingly.
2856 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
2857 Ws = Op->getOperand(0);
2858 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2))
2859 Ws = Op->getOperand(1);
2860 else
2861 return SDValue();
2862
2863 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Ws, Wt);
2864}
2865
2866// Lower VECTOR_SHUFFLE into ILVR (if possible).
2867//
2868// ILVR interleaves consecutive elements from the right (lowest-indexed) half of
2869// each vector.
2870//
2871// It is possible to lower into ILVR when the mask consists of two of the
2872// following forms interleaved:
2873// <0, 1, 2, ...>
2874// <n, n+1, n+2, ...>
2875// where n is the number of elements in the vector.
2876// For example:
2877// <0, 0, 1, 1, 2, 2, ...>
2878// <0, n, 1, n+1, 2, n+2, ...>
2879//
2880// When undef's appear in the mask they are treated as if they were whatever
2881// value is necessary in order to fit the above forms.
2883 SmallVector<int, 16> Indices,
2884 SelectionDAG &DAG) {
2885 assert((Indices.size() % 2) == 0);
2886
2887 SDValue Wt;
2888 SDValue Ws;
2889 const auto &Begin = Indices.begin();
2890 const auto &End = Indices.end();
2891
2892 // Check even elements are taken from the right (lowest-indexed) elements of
2893 // one half or the other and pick an operand accordingly.
2894 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
2895 Wt = Op->getOperand(0);
2896 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1))
2897 Wt = Op->getOperand(1);
2898 else
2899 return SDValue();
2900
2901 // Check odd elements are taken from the right (lowest-indexed) elements of
2902 // one half or the other and pick an operand accordingly.
2903 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
2904 Ws = Op->getOperand(0);
2905 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1))
2906 Ws = Op->getOperand(1);
2907 else
2908 return SDValue();
2909
2910 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt);
2911}
2912
2913// Lower VECTOR_SHUFFLE into ILVL (if possible).
2914//
2915// ILVL interleaves consecutive elements from the left (highest-indexed) half
2916// of each vector.
2917//
2918// It is possible to lower into ILVL when the mask consists of two of the
2919// following forms interleaved:
2920// <x, x+1, x+2, ...>
2921// <n+x, n+x+1, n+x+2, ...>
2922// where n is the number of elements in the vector and x is half n.
2923// For example:
2924// <x, x, x+1, x+1, x+2, x+2, ...>
2925// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
2926//
2927// When undef's appear in the mask they are treated as if they were whatever
2928// value is necessary in order to fit the above forms.
2930 SmallVector<int, 16> Indices,
2931 SelectionDAG &DAG) {
2932 assert((Indices.size() % 2) == 0);
2933
2934 unsigned HalfSize = Indices.size() / 2;
2935 SDValue Wt;
2936 SDValue Ws;
2937 const auto &Begin = Indices.begin();
2938 const auto &End = Indices.end();
2939
2940 // Check even elements are taken from the left (highest-indexed) elements of
2941 // one half or the other and pick an operand accordingly.
2942 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
2943 Wt = Op->getOperand(0);
2944 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1))
2945 Wt = Op->getOperand(1);
2946 else
2947 return SDValue();
2948
2949 // Check odd elements are taken from the left (highest-indexed) elements of
2950 // one half or the other and pick an operand accordingly.
2951 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
2952 Ws = Op->getOperand(0);
2953 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize,
2954 1))
2955 Ws = Op->getOperand(1);
2956 else
2957 return SDValue();
2958
2959 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt);
2960}
2961
2962// Lower VECTOR_SHUFFLE into PCKEV (if possible).
2963//
2964// PCKEV copies the even elements of each vector into the result vector.
2965//
2966// It is possible to lower into PCKEV when the mask consists of two of the
2967// following forms concatenated:
2968// <0, 2, 4, ...>
2969// <n, n+2, n+4, ...>
2970// where n is the number of elements in the vector.
2971// For example:
2972// <0, 2, 4, ..., 0, 2, 4, ...>
2973// <0, 2, 4, ..., n, n+2, n+4, ...>
2974//
2975// When undef's appear in the mask they are treated as if they were whatever
2976// value is necessary in order to fit the above forms.
2978 SmallVector<int, 16> Indices,
2979 SelectionDAG &DAG) {
2980 assert((Indices.size() % 2) == 0);
2981
2982 SDValue Wt;
2983 SDValue Ws;
2984 const auto &Begin = Indices.begin();
2985 const auto &Mid = Indices.begin() + Indices.size() / 2;
2986 const auto &End = Indices.end();
2987
2988 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
2989 Wt = Op->getOperand(0);
2990 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2))
2991 Wt = Op->getOperand(1);
2992 else
2993 return SDValue();
2994
2995 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
2996 Ws = Op->getOperand(0);
2997 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2))
2998 Ws = Op->getOperand(1);
2999 else
3000 return SDValue();
3001
3002 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt);
3003}
3004
3005// Lower VECTOR_SHUFFLE into PCKOD (if possible).
3006//
3007// PCKOD copies the odd elements of each vector into the result vector.
3008//
3009// It is possible to lower into PCKOD when the mask consists of two of the
3010// following forms concatenated:
3011// <1, 3, 5, ...>
3012// <n+1, n+3, n+5, ...>
3013// where n is the number of elements in the vector.
3014// For example:
3015// <1, 3, 5, ..., 1, 3, 5, ...>
3016// <1, 3, 5, ..., n+1, n+3, n+5, ...>
3017//
3018// When undef's appear in the mask they are treated as if they were whatever
3019// value is necessary in order to fit the above forms.
3021 SmallVector<int, 16> Indices,
3022 SelectionDAG &DAG) {
3023 assert((Indices.size() % 2) == 0);
3024
3025 SDValue Wt;
3026 SDValue Ws;
3027 const auto &Begin = Indices.begin();
3028 const auto &Mid = Indices.begin() + Indices.size() / 2;
3029 const auto &End = Indices.end();
3030
3031 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
3032 Wt = Op->getOperand(0);
3033 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2))
3034 Wt = Op->getOperand(1);
3035 else
3036 return SDValue();
3037
3038 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
3039 Ws = Op->getOperand(0);
3040 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2))
3041 Ws = Op->getOperand(1);
3042 else
3043 return SDValue();
3044
3045 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt);
3046}
3047
3048// Lower VECTOR_SHUFFLE into VSHF.
3049//
3050// This mostly consists of converting the shuffle indices in Indices into a
3051// BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is
3052// also code to eliminate unused operands of the VECTOR_SHUFFLE. For example,
3053// if the type is v8i16 and all the indices are less than 8 then the second
3054// operand is unused and can be replaced with anything. We choose to replace it
3055// with the used operand since this reduces the number of instructions overall.
3056//
3057// NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats
3058// UNDEFs as same as SPLATI index.
3059// For other instances we use the last valid index if UNDEF is
3060// encountered.
3062 const SmallVector<int, 16> &Indices,
3063 const bool isSPLATI,
3064 SelectionDAG &DAG) {
3066 SDValue Op0;
3067 SDValue Op1;
3068 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger();
3069 EVT MaskEltTy = MaskVecTy.getVectorElementType();
3070 bool Using1stVec = false;
3071 bool Using2ndVec = false;
3072 SDLoc DL(Op);
3073 int ResTyNumElts = ResTy.getVectorNumElements();
3074
3075 assert(Indices[0] >= 0 &&
3076 "shuffle mask starts with an UNDEF, which is not expected");
3077
3078 for (int i = 0; i < ResTyNumElts; ++i) {
3079 // Idx == -1 means UNDEF
3080 int Idx = Indices[i];
3081
3082 if (0 <= Idx && Idx < ResTyNumElts)
3083 Using1stVec = true;
3084 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2)
3085 Using2ndVec = true;
3086 }
3087 int LastValidIndex = 0;
3088 for (size_t i = 0; i < Indices.size(); i++) {
3089 int Idx = Indices[i];
3090 if (Idx < 0) {
3091 // Continue using splati index or use the last valid index.
3092 Idx = isSPLATI ? Indices[0] : LastValidIndex;
3093 } else {
3094 LastValidIndex = Idx;
3095 }
3096 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy));
3097 }
3098
3099 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
3100
3101 if (Using1stVec && Using2ndVec) {
3102 Op0 = Op->getOperand(0);
3103 Op1 = Op->getOperand(1);
3104 } else if (Using1stVec)
3105 Op0 = Op1 = Op->getOperand(0);
3106 else if (Using2ndVec)
3107 Op0 = Op1 = Op->getOperand(1);
3108 else
3109 llvm_unreachable("shuffle vector mask references neither vector operand?");
3110
3111 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
3112 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
3113 // VSHF concatenates the vectors in a bitwise fashion:
3114 // <0b00, 0b01> + <0b10, 0b11> ->
3115 // 0b0100 + 0b1110 -> 0b01001110
3116 // <0b10, 0b11, 0b00, 0b01>
3117 // We must therefore swap the operands to get the correct result.
3118 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0);
3119}
3120
3121// Lower VECTOR_SHUFFLE into one of a number of instructions depending on the
3122// indices in the shuffle.
3123SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
3124 SelectionDAG &DAG) const {
3125 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op);
3126 EVT ResTy = Op->getValueType(0);
3127
3128 if (!ResTy.is128BitVector())
3129 return SDValue();
3130
3131 int ResTyNumElts = ResTy.getVectorNumElements();
3132 SmallVector<int, 16> Indices;
3133
3134 for (int i = 0; i < ResTyNumElts; ++i)
3135 Indices.push_back(Node->getMaskElt(i));
3136
3137 // splati.[bhwd] is preferable to the others but is matched from
3138 // MipsISD::VSHF.
3139 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG))
3140 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG);
3142 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG)))
3143 return Result;
3144 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG)))
3145 return Result;
3146 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG)))
3147 return Result;
3148 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG)))
3149 return Result;
3150 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG)))
3151 return Result;
3152 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG)))
3153 return Result;
3154 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG)))
3155 return Result;
3156 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG);
3157}
3158
3160MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI,
3161 MachineBasicBlock *BB) const {
3162 // $bb:
3163 // bposge32_pseudo $vr0
3164 // =>
3165 // $bb:
3166 // bposge32 $tbb
3167 // $fbb:
3168 // li $vr2, 0
3169 // b $sink
3170 // $tbb:
3171 // li $vr1, 1
3172 // $sink:
3173 // $vr0 = phi($vr2, $fbb, $vr1, $tbb)
3174
3175 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3176 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3177 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3178 DebugLoc DL = MI.getDebugLoc();
3179 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3181 MachineFunction *F = BB->getParent();
3182 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3183 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3184 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3185 F->insert(It, FBB);
3186 F->insert(It, TBB);
3187 F->insert(It, Sink);
3188
3189 // Transfer the remainder of BB and its successor edges to Sink.
3190 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3191 BB->end());
3192 Sink->transferSuccessorsAndUpdatePHIs(BB);
3193
3194 // Add successors.
3195 BB->addSuccessor(FBB);
3196 BB->addSuccessor(TBB);
3197 FBB->addSuccessor(Sink);
3198 TBB->addSuccessor(Sink);
3199
3200 // Insert the real bposge32 instruction to $BB.
3201 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB);
3202 // Insert the real bposge32c instruction to $BB.
3203 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB);
3204
3205 // Fill $FBB.
3206 Register VR2 = RegInfo.createVirtualRegister(RC);
3207 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2)
3208 .addReg(Mips::ZERO).addImm(0);
3209 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3210
3211 // Fill $TBB.
3212 Register VR1 = RegInfo.createVirtualRegister(RC);
3213 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1)
3214 .addReg(Mips::ZERO).addImm(1);
3215
3216 // Insert phi function to $Sink.
3217 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3218 MI.getOperand(0).getReg())
3219 .addReg(VR2)
3220 .addMBB(FBB)
3221 .addReg(VR1)
3222 .addMBB(TBB);
3223
3224 MI.eraseFromParent(); // The pseudo instruction is gone now.
3225 return Sink;
3226}
3227
3228MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo(
3229 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const {
3230 // $bb:
3231 // vany_nonzero $rd, $ws
3232 // =>
3233 // $bb:
3234 // bnz.b $ws, $tbb
3235 // b $fbb
3236 // $fbb:
3237 // li $rd1, 0
3238 // b $sink
3239 // $tbb:
3240 // li $rd2, 1
3241 // $sink:
3242 // $rd = phi($rd1, $fbb, $rd2, $tbb)
3243
3244 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3245 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3246 const TargetRegisterClass *RC = &Mips::GPR32RegClass;
3247 DebugLoc DL = MI.getDebugLoc();
3248 const BasicBlock *LLVM_BB = BB->getBasicBlock();
3250 MachineFunction *F = BB->getParent();
3251 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB);
3252 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB);
3253 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB);
3254 F->insert(It, FBB);
3255 F->insert(It, TBB);
3256 F->insert(It, Sink);
3257
3258 // Transfer the remainder of BB and its successor edges to Sink.
3259 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
3260 BB->end());
3261 Sink->transferSuccessorsAndUpdatePHIs(BB);
3262
3263 // Add successors.
3264 BB->addSuccessor(FBB);
3265 BB->addSuccessor(TBB);
3266 FBB->addSuccessor(Sink);
3267 TBB->addSuccessor(Sink);
3268
3269 // Insert the real bnz.b instruction to $BB.
3270 BuildMI(BB, DL, TII->get(BranchOp))
3271 .addReg(MI.getOperand(1).getReg())
3272 .addMBB(TBB);
3273
3274 // Fill $FBB.
3275 Register RD1 = RegInfo.createVirtualRegister(RC);
3276 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1)
3277 .addReg(Mips::ZERO).addImm(0);
3278 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink);
3279
3280 // Fill $TBB.
3281 Register RD2 = RegInfo.createVirtualRegister(RC);
3282 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2)
3283 .addReg(Mips::ZERO).addImm(1);
3284
3285 // Insert phi function to $Sink.
3286 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI),
3287 MI.getOperand(0).getReg())
3288 .addReg(RD1)
3289 .addMBB(FBB)
3290 .addReg(RD2)
3291 .addMBB(TBB);
3292
3293 MI.eraseFromParent(); // The pseudo instruction is gone now.
3294 return Sink;
3295}
3296
3297// Emit the COPY_FW pseudo instruction.
3298//
3299// copy_fw_pseudo $fd, $ws, n
3300// =>
3301// copy_u_w $rt, $ws, $n
3302// mtc1 $rt, $fd
3303//
3304// When n is zero, the equivalent operation can be performed with (potentially)
3305// zero instructions due to register overlaps. This optimization is never valid
3306// for lane 1 because it would require FR=0 mode which isn't supported by MSA.
3308MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI,
3309 MachineBasicBlock *BB) const {
3310 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3311 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3312 DebugLoc DL = MI.getDebugLoc();
3313 Register Fd = MI.getOperand(0).getReg();
3314 Register Ws = MI.getOperand(1).getReg();
3315 unsigned Lane = MI.getOperand(2).getImm();
3316
3317 if (Lane == 0) {
3318 unsigned Wt = Ws;
3319 if (!Subtarget.useOddSPReg()) {
3320 // We must copy to an even-numbered MSA register so that the
3321 // single-precision sub-register is also guaranteed to be even-numbered.
3322 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass);
3323
3324 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws);
3325 }
3326
3327 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, {}, Mips::sub_lo);
3328 } else {
3329 Register Wt = RegInfo.createVirtualRegister(
3330 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3331 : &Mips::MSA128WEvensRegClass);
3332
3333 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane);
3334 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, {}, Mips::sub_lo);
3335 }
3336
3337 MI.eraseFromParent(); // The pseudo instruction is gone now.
3338 return BB;
3339}
3340
3341// Emit the COPY_FD pseudo instruction.
3342//
3343// copy_fd_pseudo $fd, $ws, n
3344// =>
3345// splati.d $wt, $ws, $n
3346// copy $fd, $wt:sub_64
3347//
3348// When n is zero, the equivalent operation can be performed with (potentially)
3349// zero instructions due to register overlaps. This optimization is always
3350// valid because FR=1 mode which is the only supported mode in MSA.
3352MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI,
3353 MachineBasicBlock *BB) const {
3354 assert(Subtarget.isFP64bit());
3355
3356 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3357 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3358 Register Fd = MI.getOperand(0).getReg();
3359 Register Ws = MI.getOperand(1).getReg();
3360 unsigned Lane = MI.getOperand(2).getImm() * 2;
3361 DebugLoc DL = MI.getDebugLoc();
3362
3363 if (Lane == 0)
3364 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, {}, Mips::sub_64);
3365 else {
3366 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3367
3368 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1);
3369 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, {}, Mips::sub_64);
3370 }
3371
3372 MI.eraseFromParent(); // The pseudo instruction is gone now.
3373 return BB;
3374}
3375
3376// Emit the INSERT_FW pseudo instruction.
3377//
3378// insert_fw_pseudo $wd, $wd_in, $n, $fs
3379// =>
3380// subreg_to_reg $wt:sub_lo, $fs
3381// insve_w $wd[$n], $wd_in, $wt[0]
3383MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI,
3384 MachineBasicBlock *BB) const {
3385 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3386 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3387 DebugLoc DL = MI.getDebugLoc();
3388 Register Wd = MI.getOperand(0).getReg();
3389 Register Wd_in = MI.getOperand(1).getReg();
3390 unsigned Lane = MI.getOperand(2).getImm();
3391 Register Fs = MI.getOperand(3).getReg();
3392 Register Wt = RegInfo.createVirtualRegister(
3393 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3394 : &Mips::MSA128WEvensRegClass);
3395
3396 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3397 .addReg(Fs)
3398 .addImm(Mips::sub_lo);
3399 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd)
3400 .addReg(Wd_in)
3401 .addImm(Lane)
3402 .addReg(Wt)
3403 .addImm(0);
3404
3405 MI.eraseFromParent(); // The pseudo instruction is gone now.
3406 return BB;
3407}
3408
3409// Emit the INSERT_FD pseudo instruction.
3410//
3411// insert_fd_pseudo $wd, $fs, n
3412// =>
3413// subreg_to_reg $wt:sub_64, $fs
3414// insve_d $wd[$n], $wd_in, $wt[0]
3416MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI,
3417 MachineBasicBlock *BB) const {
3418 assert(Subtarget.isFP64bit());
3419
3420 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3421 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3422 DebugLoc DL = MI.getDebugLoc();
3423 Register Wd = MI.getOperand(0).getReg();
3424 Register Wd_in = MI.getOperand(1).getReg();
3425 unsigned Lane = MI.getOperand(2).getImm();
3426 Register Fs = MI.getOperand(3).getReg();
3427 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3428
3429 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3430 .addReg(Fs)
3431 .addImm(Mips::sub_64);
3432 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd)
3433 .addReg(Wd_in)
3434 .addImm(Lane)
3435 .addReg(Wt)
3436 .addImm(0);
3437
3438 MI.eraseFromParent(); // The pseudo instruction is gone now.
3439 return BB;
3440}
3441
3442// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
3443//
3444// For integer:
3445// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
3446// =>
3447// (SLL $lanetmp1, $lane, <log2size)
3448// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3449// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
3450// (NEG $lanetmp2, $lanetmp1)
3451// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3452//
3453// For floating point:
3454// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
3455// =>
3456// (SUBREG_TO_REG $wt, $fs, <subreg>)
3457// (SLL $lanetmp1, $lane, <log2size)
3458// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
3459// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
3460// (NEG $lanetmp2, $lanetmp1)
3461// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
3462MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX(
3463 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes,
3464 bool IsFP) const {
3465 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3466 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3467 DebugLoc DL = MI.getDebugLoc();
3468 Register Wd = MI.getOperand(0).getReg();
3469 Register SrcVecReg = MI.getOperand(1).getReg();
3470 Register LaneReg = MI.getOperand(2).getReg();
3471 Register SrcValReg = MI.getOperand(3).getReg();
3472
3473 const TargetRegisterClass *VecRC = nullptr;
3474 // FIXME: This should be true for N32 too.
3475 const TargetRegisterClass *GPRRC =
3476 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3477 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0;
3478 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL;
3479 unsigned EltLog2Size;
3480 unsigned InsertOp = 0;
3481 unsigned InsveOp = 0;
3482 switch (EltSizeInBytes) {
3483 default:
3484 llvm_unreachable("Unexpected size");
3485 case 1:
3486 EltLog2Size = 0;
3487 InsertOp = Mips::INSERT_B;
3488 InsveOp = Mips::INSVE_B;
3489 VecRC = &Mips::MSA128BRegClass;
3490 break;
3491 case 2:
3492 EltLog2Size = 1;
3493 InsertOp = Mips::INSERT_H;
3494 InsveOp = Mips::INSVE_H;
3495 VecRC = &Mips::MSA128HRegClass;
3496 break;
3497 case 4:
3498 EltLog2Size = 2;
3499 InsertOp = Mips::INSERT_W;
3500 InsveOp = Mips::INSVE_W;
3501 VecRC = &Mips::MSA128WRegClass;
3502 break;
3503 case 8:
3504 EltLog2Size = 3;
3505 InsertOp = Mips::INSERT_D;
3506 InsveOp = Mips::INSVE_D;
3507 VecRC = &Mips::MSA128DRegClass;
3508 break;
3509 }
3510
3511 if (IsFP) {
3512 Register Wt = RegInfo.createVirtualRegister(VecRC);
3513 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
3514 .addReg(SrcValReg)
3515 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
3516 SrcValReg = Wt;
3517 }
3518
3519 // Convert the lane index into a byte index
3520 if (EltSizeInBytes != 1) {
3521 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
3522 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1)
3523 .addReg(LaneReg)
3524 .addImm(EltLog2Size);
3525 LaneReg = LaneTmp1;
3526 }
3527
3528 // Rotate bytes around so that the desired lane is element zero
3529 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC);
3530 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
3531 .addReg(SrcVecReg)
3532 .addReg(SrcVecReg)
3533 .addReg(LaneReg, {}, SubRegIdx);
3534
3535 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC);
3536 if (IsFP) {
3537 // Use insve.df to insert to element zero
3538 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
3539 .addReg(WdTmp1)
3540 .addImm(0)
3541 .addReg(SrcValReg)
3542 .addImm(0);
3543 } else {
3544 // Use insert.df to insert to element zero
3545 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
3546 .addReg(WdTmp1)
3547 .addReg(SrcValReg)
3548 .addImm(0);
3549 }
3550
3551 // Rotate elements the rest of the way for a full rotation.
3552 // sld.df inteprets $rt modulo the number of columns so we only need to negate
3553 // the lane index to do this.
3554 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
3555 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB),
3556 LaneTmp2)
3557 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO)
3558 .addReg(LaneReg);
3559 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
3560 .addReg(WdTmp2)
3561 .addReg(WdTmp2)
3562 .addReg(LaneTmp2, {}, SubRegIdx);
3563
3564 MI.eraseFromParent(); // The pseudo instruction is gone now.
3565 return BB;
3566}
3567
3568// Emit the FILL_FW pseudo instruction.
3569//
3570// fill_fw_pseudo $wd, $fs
3571// =>
3572// implicit_def $wt1
3573// insert_subreg $wt2:subreg_lo, $wt1, $fs
3574// splati.w $wd, $wt2[0]
3576MipsSETargetLowering::emitFILL_FW(MachineInstr &MI,
3577 MachineBasicBlock *BB) const {
3578 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3579 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3580 DebugLoc DL = MI.getDebugLoc();
3581 Register Wd = MI.getOperand(0).getReg();
3582 Register Fs = MI.getOperand(1).getReg();
3583 Register Wt1 = RegInfo.createVirtualRegister(
3584 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3585 : &Mips::MSA128WEvensRegClass);
3586 Register Wt2 = RegInfo.createVirtualRegister(
3587 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass
3588 : &Mips::MSA128WEvensRegClass);
3589
3590 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3591 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3592 .addReg(Wt1)
3593 .addReg(Fs)
3594 .addImm(Mips::sub_lo);
3595 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0);
3596
3597 MI.eraseFromParent(); // The pseudo instruction is gone now.
3598 return BB;
3599}
3600
3601// Emit the FILL_FD pseudo instruction.
3602//
3603// fill_fd_pseudo $wd, $fs
3604// =>
3605// implicit_def $wt1
3606// insert_subreg $wt2:subreg_64, $wt1, $fs
3607// splati.d $wd, $wt2[0]
3609MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
3610 MachineBasicBlock *BB) const {
3611 assert(Subtarget.isFP64bit());
3612
3613 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3614 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3615 DebugLoc DL = MI.getDebugLoc();
3616 Register Wd = MI.getOperand(0).getReg();
3617 Register Fs = MI.getOperand(1).getReg();
3618 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3619 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3620
3621 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1);
3622 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2)
3623 .addReg(Wt1)
3624 .addReg(Fs)
3625 .addImm(Mips::sub_64);
3626 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0);
3627
3628 MI.eraseFromParent(); // The pseudo instruction is gone now.
3629 return BB;
3630}
3631
3632// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
3633// register.
3634//
3635// STF16 MSA128F16:$wd, mem_simm10:$addr
3636// =>
3637// copy_u.h $rtemp,$wd[0]
3638// sh $rtemp, $addr
3639//
3640// Safety: We can't use st.h & co as they would over write the memory after
3641// the destination. It would require half floats be allocated 16 bytes(!) of
3642// space.
3644MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
3645 MachineBasicBlock *BB) const {
3646
3647 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3648 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3649 DebugLoc DL = MI.getDebugLoc();
3650 Register Ws = MI.getOperand(0).getReg();
3651 Register Rt = MI.getOperand(1).getReg();
3652 const MachineMemOperand &MMO = **MI.memoperands_begin();
3653 unsigned Imm = MMO.getOffset();
3654
3655 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3656 // spill and reload can expand as a GPR64 operand. Examine the
3657 // operand in detail and default to ABI.
3658 const TargetRegisterClass *RC =
3659 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3660 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3661 : &Mips::GPR64RegClass);
3662 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3663 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3664
3665 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
3666 if(!UsingMips32) {
3667 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass);
3668 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp)
3669 .addReg(Rs)
3670 .addImm(Mips::sub_32);
3671 Rs = Tmp;
3672 }
3673 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
3674 .addReg(Rs)
3675 .addReg(Rt)
3676 .addImm(Imm)
3678 &MMO, MMO.getOffset(), MMO.getSize()));
3679
3680 MI.eraseFromParent();
3681 return BB;
3682}
3683
3684// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
3685//
3686// LD_F16 MSA128F16:$wd, mem_simm10:$addr
3687// =>
3688// lh $rtemp, $addr
3689// fill.h $wd, $rtemp
3690//
3691// Safety: We can't use ld.h & co as they over-read from the source.
3692// Additionally, if the address is not modulo 16, 2 cases can occur:
3693// a) Segmentation fault as the load instruction reads from a memory page
3694// memory it's not supposed to.
3695// b) The load crosses an implementation specific boundary, requiring OS
3696// intervention.
3698MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
3699 MachineBasicBlock *BB) const {
3700
3701 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3702 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3703 DebugLoc DL = MI.getDebugLoc();
3704 Register Wd = MI.getOperand(0).getReg();
3705
3706 // Caution: A load via the GOT can expand to a GPR32 operand, a load via
3707 // spill and reload can expand as a GPR64 operand. Examine the
3708 // operand in detail and default to ABI.
3709 const TargetRegisterClass *RC =
3710 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
3711 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
3712 : &Mips::GPR64RegClass);
3713
3714 const bool UsingMips32 = RC == &Mips::GPR32RegClass;
3715 Register Rt = RegInfo.createVirtualRegister(RC);
3716
3717 MachineInstrBuilder MIB =
3718 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
3719 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
3720 MIB.add(MO);
3721
3722 if(!UsingMips32) {
3723 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
3724 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp)
3725 .addReg(Rt, {}, Mips::sub_32);
3726 Rt = Tmp;
3727 }
3728
3729 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
3730
3731 MI.eraseFromParent();
3732 return BB;
3733}
3734
3735// Emit the FPROUND_PSEUDO instruction.
3736//
3737// Round an FGR64Opnd, FGR32Opnd to an f16.
3738//
3739// Safety: Cycle the operand through the GPRs so the result always ends up
3740// the correct MSA register.
3741//
3742// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
3743// / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
3744// (which they can be, as the MSA registers are defined to alias the
3745// FPU's 64 bit and 32 bit registers) the result can be accessed using
3746// the correct register class. That requires operands be tie-able across
3747// register classes which have a sub/super register class relationship.
3748//
3749// For FPG32Opnd:
3750//
3751// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
3752// =>
3753// mfc1 $rtemp, $fs
3754// fill.w $rtemp, $wtemp
3755// fexdo.w $wd, $wtemp, $wtemp
3756//
3757// For FPG64Opnd on mips32r2+:
3758//
3759// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3760// =>
3761// mfc1 $rtemp, $fs
3762// fill.w $rtemp, $wtemp
3763// mfhc1 $rtemp2, $fs
3764// insert.w $wtemp[1], $rtemp2
3765// insert.w $wtemp[3], $rtemp2
3766// fexdo.w $wtemp2, $wtemp, $wtemp
3767// fexdo.h $wd, $temp2, $temp2
3768//
3769// For FGR64Opnd on mips64r2+:
3770//
3771// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
3772// =>
3773// dmfc1 $rtemp, $fs
3774// fill.d $rtemp, $wtemp
3775// fexdo.w $wtemp2, $wtemp, $wtemp
3776// fexdo.h $wd, $wtemp2, $wtemp2
3777//
3778// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
3779// undef bits are "just right" and the exception enable bits are
3780// set. By using fill.w to replicate $fs into all elements over
3781// insert.w for one element, we avoid that potiential case. If
3782// fexdo.[hw] causes an exception in, the exception is valid and it
3783// occurs for all elements.
3785MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
3787 bool IsFGR64) const {
3788
3789 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3790 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3791 // it.
3792 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3793
3794 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3795 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3796
3797 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3798 DebugLoc DL = MI.getDebugLoc();
3799 Register Wd = MI.getOperand(0).getReg();
3800 Register Fs = MI.getOperand(1).getReg();
3801
3802 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3803 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3804 const TargetRegisterClass *GPRRC =
3805 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3806 unsigned MFC1Opc = IsFGR64onMips64
3807 ? Mips::DMFC1
3808 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1);
3809 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
3810
3811 // Perform the register class copy as mentioned above.
3812 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3813 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
3814 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
3815 unsigned WPHI = Wtemp;
3816
3817 if (IsFGR64onMips32) {
3818 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3819 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
3820 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3821 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3822 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
3823 .addReg(Wtemp)
3824 .addReg(Rtemp2)
3825 .addImm(1);
3826 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
3827 .addReg(Wtemp2)
3828 .addReg(Rtemp2)
3829 .addImm(3);
3830 WPHI = Wtemp3;
3831 }
3832
3833 if (IsFGR64) {
3834 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3835 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
3836 .addReg(WPHI)
3837 .addReg(WPHI);
3838 WPHI = Wtemp2;
3839 }
3840
3841 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
3842
3843 MI.eraseFromParent();
3844 return BB;
3845}
3846
3847// Emit the FPEXTEND_PSEUDO instruction.
3848//
3849// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
3850//
3851// Safety: Cycle the result through the GPRs so the result always ends up
3852// the correct floating point register.
3853//
3854// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
3855// / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
3856// (which they can be, as the MSA registers are defined to alias the
3857// FPU's 64 bit and 32 bit registers) the result can be accessed using
3858// the correct register class. That requires operands be tie-able across
3859// register classes which have a sub/super register class relationship. I
3860// haven't checked.
3861//
3862// For FGR32Opnd:
3863//
3864// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
3865// =>
3866// fexupr.w $wtemp, $ws
3867// copy_s.w $rtemp, $ws[0]
3868// mtc1 $rtemp, $fd
3869//
3870// For FGR64Opnd on Mips64:
3871//
3872// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3873// =>
3874// fexupr.w $wtemp, $ws
3875// fexupr.d $wtemp2, $wtemp
3876// copy_s.d $rtemp, $wtemp2s[0]
3877// dmtc1 $rtemp, $fd
3878//
3879// For FGR64Opnd on Mips32:
3880//
3881// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
3882// =>
3883// fexupr.w $wtemp, $ws
3884// fexupr.d $wtemp2, $wtemp
3885// copy_s.w $rtemp, $wtemp2[0]
3886// mtc1 $rtemp, $ftemp
3887// copy_s.w $rtemp2, $wtemp2[1]
3888// $fd = mthc1 $rtemp2, $ftemp
3890MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
3892 bool IsFGR64) const {
3893
3894 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
3895 // here. It's technically doable to support MIPS32 here, but the ISA forbids
3896 // it.
3897 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
3898
3899 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
3900 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
3901
3902 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3903 DebugLoc DL = MI.getDebugLoc();
3904 Register Fd = MI.getOperand(0).getReg();
3905 Register Ws = MI.getOperand(1).getReg();
3906
3907 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3908 const TargetRegisterClass *GPRRC =
3909 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
3910 unsigned MTC1Opc = IsFGR64onMips64
3911 ? Mips::DMTC1
3912 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1);
3913 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
3914
3915 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
3916 Register WPHI = Wtemp;
3917
3918 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
3919 if (IsFGR64) {
3920 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
3921 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
3922 }
3923
3924 // Perform the safety regclass copy mentioned above.
3925 Register Rtemp = RegInfo.createVirtualRegister(GPRRC);
3926 Register FPRPHI = IsFGR64onMips32
3927 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
3928 : Fd;
3929 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
3930 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
3931
3932 if (IsFGR64onMips32) {
3933 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
3934 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
3935 .addReg(WPHI)
3936 .addImm(1);
3937 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
3938 .addReg(FPRPHI)
3939 .addReg(Rtemp2);
3940 }
3941
3942 MI.eraseFromParent();
3943 return BB;
3944}
3945
3946// Emit the FEXP2_W_1 pseudo instructions.
3947//
3948// fexp2_w_1_pseudo $wd, $wt
3949// =>
3950// ldi.w $ws, 1
3951// fexp2.w $wd, $ws, $wt
3953MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI,
3954 MachineBasicBlock *BB) const {
3955 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3956 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3957 const TargetRegisterClass *RC = &Mips::MSA128WRegClass;
3958 Register Ws1 = RegInfo.createVirtualRegister(RC);
3959 Register Ws2 = RegInfo.createVirtualRegister(RC);
3960 DebugLoc DL = MI.getDebugLoc();
3961
3962 // Splat 1.0 into a vector
3963 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1);
3964 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1);
3965
3966 // Emit 1.0 * fexp2(Wt)
3967 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg())
3968 .addReg(Ws2)
3969 .addReg(MI.getOperand(1).getReg());
3970
3971 MI.eraseFromParent(); // The pseudo instruction is gone now.
3972 return BB;
3973}
3974
3975// Emit the FEXP2_D_1 pseudo instructions.
3976//
3977// fexp2_d_1_pseudo $wd, $wt
3978// =>
3979// ldi.d $ws, 1
3980// fexp2.d $wd, $ws, $wt
3982MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI,
3983 MachineBasicBlock *BB) const {
3984 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3985 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
3986 const TargetRegisterClass *RC = &Mips::MSA128DRegClass;
3987 Register Ws1 = RegInfo.createVirtualRegister(RC);
3988 Register Ws2 = RegInfo.createVirtualRegister(RC);
3989 DebugLoc DL = MI.getDebugLoc();
3990
3991 // Splat 1.0 into a vector
3992 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1);
3993 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1);
3994
3995 // Emit 1.0 * fexp2(Wt)
3996 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg())
3997 .addReg(Ws2)
3998 .addReg(MI.getOperand(1).getReg());
3999
4000 MI.eraseFromParent(); // The pseudo instruction is gone now.
4001 return BB;
4002}
static SDValue performSHLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
If the operand is a bitwise AND with a constant RHS, and the shift has a constant RHS and is the only...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool fitsRegularPattern(typename SmallVectorImpl< ValType >::const_iterator Begin, unsigned CheckStride, typename SmallVectorImpl< ValType >::const_iterator End, ValType ExpectedIndex, unsigned ExpectedIndexStride)
Determine whether a range fits a regular pattern of values.
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const LoongArchSubtarget &Subtarget)
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Promote Memory to Register
Definition Mem2Reg.cpp:110
static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc, SDValue Imm, bool BigEndian)
static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG)
static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering::DAGCombinerInfo &DCI, const MipsSETargetLowering *TL, const MipsSubtarget &Subtarget)
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc)
static cl::opt< bool > NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), cl::desc("Expand double precision loads and " "stores to their single precision " "counterparts"))
static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, bool BigEndian, SelectionDAG &DAG)
static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG)
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian)
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG)
static bool isBitwiseInverse(SDValue N, SDValue OfNode)
static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget)
static bool isVectorAllOnes(SDValue N)
static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC)
static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG)
static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, EVT ShiftTy, SelectionDAG &DAG)
static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, bool IsSigned=false)
static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndef(const SDValue Op)
static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, const SmallVector< int, 16 > &Indices, const bool isSPLATI, SelectionDAG &DAG)
static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG)
static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, SelectionDAG &DAG, const MipsSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op)
static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, const MipsSubtarget &Subtarget)
static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, SmallVector< int, 16 > Indices, SelectionDAG &DAG)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static cl::opt< unsigned > MaxSteps("has-predecessor-max-steps", cl::Hidden, cl::init(8192), cl::desc("DAG combiner limit number of steps when searching DAG " "for predecessor nodes"))
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
unsigned logBase2() const
Definition APInt.h:1776
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getInRegsParamsCount() const
uint64_t getZExtValue() const
const SDValue & getBasePtr() const
const Triple & getTargetTriple() const
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
BasicBlockListType::iterator iterator
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
Flags getFlags() const
Return the raw flags of the source value,.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
MipsFunctionInfo - This class is derived from MachineFunction private Mips target-specific informatio...
unsigned getIncomingArgSize() const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given floating-point type and Register class.
void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC)
Enable MSA support for the given integer type and Register class.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
const TargetRegisterClass * getRepRegClassFor(MVT VT) const override
Return the 'representative' register class for the specified value type.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Determine if the target supports unaligned memory accesses.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
MipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
bool hasMips32r6() const
bool isLittle() const
bool hasDSPR2() const
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override
Return the type to use for a scalar shift opcode, given the shifted amount type.
MipsTargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const
virtual void getOpndList(SmallVectorImpl< SDValue > &Ops, std::deque< std::pair< unsigned, SDValue > > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const
This function fills Ops, which is the list of operands that will later be used when a function call n...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
const MipsSubtarget & Subtarget
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getNumOperands() const
Return the number of values used by this operation.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
LLVM_ABI void printrWithDepth(raw_ostream &O, const SelectionDAG *G=nullptr, unsigned depth=100) const
Print a SelectionDAG node and children up to depth "depth." The given SelectionDAG allows target-spec...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVMContext * getContext() const
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::const_iterator const_iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getValue() const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
LLVM_ABI bool isLittleEndian() const
Tests whether the target triple is little endian.
Definition Triple.cpp:2118
LLVM Value Representation.
Definition Value.h:75
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ BRCOND
BRCOND - Conditional branch.
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LLVM_ABI bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
const MipsTargetLowering * createMipsSETargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI)
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This class contains a discriminated union of information about pointers in memory operands,...
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.