LLVM  14.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/KnownBits.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "riscv-lower"
45 
46 STATISTIC(NumTailCalls, "Number of tail calls");
47 
49  const RISCVSubtarget &STI)
50  : TargetLowering(TM), Subtarget(STI) {
51 
52  if (Subtarget.isRV32E())
53  report_fatal_error("Codegen not yet implemented for RV32E");
54 
55  RISCVABI::ABI ABI = Subtarget.getTargetABI();
56  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
57 
59  !Subtarget.hasStdExtF()) {
60  errs() << "Hard-float 'f' ABI can't be used for a target that "
61  "doesn't support the F instruction set extension (ignoring "
62  "target-abi)\n";
64  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65  !Subtarget.hasStdExtD()) {
66  errs() << "Hard-float 'd' ABI can't be used for a target that "
67  "doesn't support the D instruction set extension (ignoring "
68  "target-abi)\n";
70  }
71 
72  switch (ABI) {
73  default:
74  report_fatal_error("Don't know how to lower this ABI");
78  case RISCVABI::ABI_LP64:
81  break;
82  }
83 
84  MVT XLenVT = Subtarget.getXLenVT();
85 
86  // Set up the register classes.
87  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
88 
89  if (Subtarget.hasStdExtZfh())
90  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91  if (Subtarget.hasStdExtF())
92  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93  if (Subtarget.hasStdExtD())
94  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
95 
96  static const MVT::SimpleValueType BoolVecVTs[] = {
99  static const MVT::SimpleValueType IntVecVTs[] = {
105  static const MVT::SimpleValueType F16VecVTs[] = {
108  static const MVT::SimpleValueType F32VecVTs[] = {
110  static const MVT::SimpleValueType F64VecVTs[] = {
112 
113  if (Subtarget.hasVInstructions()) {
114  auto addRegClassForRVV = [this](MVT VT) {
115  unsigned Size = VT.getSizeInBits().getKnownMinValue();
116  assert(Size <= 512 && isPowerOf2_32(Size));
117  const TargetRegisterClass *RC;
118  if (Size <= 64)
119  RC = &RISCV::VRRegClass;
120  else if (Size == 128)
121  RC = &RISCV::VRM2RegClass;
122  else if (Size == 256)
123  RC = &RISCV::VRM4RegClass;
124  else
125  RC = &RISCV::VRM8RegClass;
126 
127  addRegisterClass(VT, RC);
128  };
129 
130  for (MVT VT : BoolVecVTs)
131  addRegClassForRVV(VT);
132  for (MVT VT : IntVecVTs) {
133  if (VT.getVectorElementType() == MVT::i64 &&
134  !Subtarget.hasVInstructionsI64())
135  continue;
136  addRegClassForRVV(VT);
137  }
138 
139  if (Subtarget.hasVInstructionsF16())
140  for (MVT VT : F16VecVTs)
141  addRegClassForRVV(VT);
142 
143  if (Subtarget.hasVInstructionsF32())
144  for (MVT VT : F32VecVTs)
145  addRegClassForRVV(VT);
146 
147  if (Subtarget.hasVInstructionsF64())
148  for (MVT VT : F64VecVTs)
149  addRegClassForRVV(VT);
150 
151  if (Subtarget.useRVVForFixedLengthVectors()) {
152  auto addRegClassForFixedVectors = [this](MVT VT) {
153  MVT ContainerVT = getContainerForFixedLengthVector(VT);
154  unsigned RCID = getRegClassIDForVecVT(ContainerVT);
155  const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
156  addRegisterClass(VT, TRI.getRegClass(RCID));
157  };
159  if (useRVVForFixedLengthVectorVT(VT))
160  addRegClassForFixedVectors(VT);
161 
163  if (useRVVForFixedLengthVectorVT(VT))
164  addRegClassForFixedVectors(VT);
165  }
166  }
167 
168  // Compute derived properties from the register classes.
170 
172 
173  for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
174  setLoadExtAction(N, XLenVT, MVT::i1, Promote);
175 
176  // TODO: add all necessary setOperationAction calls.
178 
183 
186 
191 
193  if (!Subtarget.hasStdExtZbb()) {
196  }
197 
198  if (Subtarget.is64Bit()) {
204 
209  } else {
210  setLibcallName(RTLIB::SHL_I128, nullptr);
211  setLibcallName(RTLIB::SRL_I128, nullptr);
212  setLibcallName(RTLIB::SRA_I128, nullptr);
213  setLibcallName(RTLIB::MUL_I128, nullptr);
214  setLibcallName(RTLIB::MULO_I64, nullptr);
215  }
216 
217  if (!Subtarget.hasStdExtM()) {
225  } else {
226  if (Subtarget.is64Bit()) {
229 
239  } else {
241  }
242  }
243 
248 
252 
253  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
254  Subtarget.hasStdExtZbkb()) {
255  if (Subtarget.is64Bit()) {
258  }
259  } else {
262  }
263 
264  if (Subtarget.hasStdExtZbp()) {
265  // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
266  // more combining.
270  // BSWAP i8 doesn't exist.
273 
274  if (Subtarget.is64Bit()) {
277  }
278  } else {
279  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
280  // pattern match it directly in isel.
282  (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
283  ? Legal
284  : Expand);
285  }
286 
287  if (Subtarget.hasStdExtZbb()) {
292 
293  if (Subtarget.is64Bit()) {
298  }
299  } else {
303  }
304 
305  if (Subtarget.hasStdExtZbt()) {
309 
310  if (Subtarget.is64Bit()) {
313  }
314  } else {
316  }
317 
318  static const ISD::CondCode FPCCToExpand[] = {
322 
323  static const ISD::NodeType FPOpToExpand[] = {
326 
327  if (Subtarget.hasStdExtZfh())
329 
330  if (Subtarget.hasStdExtZfh()) {
351  for (auto CC : FPCCToExpand)
356 
375 
376  // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
377  // complete support for all operations in LegalizeDAG.
378 
379  // We need to custom promote this.
380  if (Subtarget.is64Bit())
382  }
383 
384  if (Subtarget.hasStdExtF()) {
403  for (auto CC : FPCCToExpand)
408  for (auto Op : FPOpToExpand)
412  }
413 
414  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
416 
417  if (Subtarget.hasStdExtD()) {
438  for (auto CC : FPCCToExpand)
445  for (auto Op : FPOpToExpand)
449  }
450 
451  if (Subtarget.is64Bit()) {
456  }
457 
458  if (Subtarget.hasStdExtF()) {
461 
466 
469  }
470 
475 
477 
478  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
479  // Unfortunately this can't be determined just from the ISA naming string.
481  Subtarget.is64Bit() ? Legal : Custom);
482 
486  if (Subtarget.is64Bit())
488 
489  if (Subtarget.hasStdExtA()) {
492  } else {
494  }
495 
497 
498  if (Subtarget.hasVInstructions()) {
500 
502 
503  // RVV intrinsics may have illegal operands.
504  // We also need to custom legalize vmv.x.s.
509  if (Subtarget.is64Bit()) {
511  } else {
514  }
515 
518 
519  static const unsigned IntegerVPOps[] = {
520  ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
521  ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
522  ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
523  ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
524  ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
525  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
526  ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
527  ISD::VP_MERGE, ISD::VP_SELECT};
528 
529  static const unsigned FloatingPointVPOps[] = {
530  ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
531  ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
532  ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
533  ISD::VP_SELECT};
534 
535  if (!Subtarget.is64Bit()) {
536  // We must custom-lower certain vXi64 operations on RV32 due to the vector
537  // element type being illegal.
540 
549 
550  setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
551  setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
552  setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
553  setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
554  setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
555  setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
556  setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
557  setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
558  }
559 
560  for (MVT VT : BoolVecVTs) {
562 
563  // Mask VTs are custom-expanded into a series of standard nodes
568 
571 
575  setOperationAction(ISD::VP_MERGE, VT, Expand);
576  setOperationAction(ISD::VP_SELECT, VT, Expand);
577 
578  setOperationAction(ISD::VP_AND, VT, Custom);
579  setOperationAction(ISD::VP_OR, VT, Custom);
580  setOperationAction(ISD::VP_XOR, VT, Custom);
581 
585 
586  setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
587  setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
588  setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
589 
590  // RVV has native int->float & float->int conversions where the
591  // element type sizes are within one power-of-two of each other. Any
592  // wider distances between type sizes have to be lowered as sequences
593  // which progressively narrow the gap in stages.
598 
599  // Expand all extending loads to types larger than this, and truncating
600  // stores from types larger than this.
601  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
602  setTruncStoreAction(OtherVT, VT, Expand);
603  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
604  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
605  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
606  }
607  }
608 
609  for (MVT VT : IntVecVTs) {
610  if (VT.getVectorElementType() == MVT::i64 &&
611  !Subtarget.hasVInstructionsI64())
612  continue;
613 
616 
617  // Vectors implement MULHS/MULHU.
620 
621  // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
622  if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
625  }
626 
631 
634 
638 
640 
641  // Custom-lower extensions and truncations from/to mask types.
645 
646  // RVV has native int->float & float->int conversions where the
647  // element type sizes are within one power-of-two of each other. Any
648  // wider distances between type sizes have to be lowered as sequences
649  // which progressively narrow the gap in stages.
654 
659 
660  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
661  // nodes which truncate by one power of two at a time.
663 
664  // Custom-lower insert/extract operations to simplify patterns.
667 
668  // Custom-lower reduction operations to set up the corresponding custom
669  // nodes' operands.
678 
679  for (unsigned VPOpc : IntegerVPOps)
680  setOperationAction(VPOpc, VT, Custom);
681 
684 
689 
690  setOperationAction(ISD::VP_LOAD, VT, Custom);
691  setOperationAction(ISD::VP_STORE, VT, Custom);
692  setOperationAction(ISD::VP_GATHER, VT, Custom);
693  setOperationAction(ISD::VP_SCATTER, VT, Custom);
694 
698 
701 
704 
705  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
706  setTruncStoreAction(VT, OtherVT, Expand);
707  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
708  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
709  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
710  }
711 
712  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
713  // type that can represent the value exactly.
714  if (VT.getVectorElementType() != MVT::i64) {
715  MVT FloatEltVT =
717  EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
718  if (isTypeLegal(FloatVT)) {
721  }
722  }
723  }
724 
725  // Expand various CCs to best match the RVV ISA, which natively supports UNE
726  // but no other unordered comparisons, and supports all ordered comparisons
727  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
728  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
729  // and we pattern-match those back to the "original", swapping operands once
730  // more. This way we catch both operations and both "vf" and "fv" forms with
731  // fewer patterns.
732  static const ISD::CondCode VFPCCToExpand[] = {
736  };
737 
738  // Sets common operation actions on RVV floating-point vector types.
739  const auto SetCommonVFPActions = [&](MVT VT) {
741  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
742  // sizes are within one power-of-two of each other. Therefore conversions
743  // between vXf16 and vXf64 must be lowered as sequences which convert via
744  // vXf32.
747  // Custom-lower insert/extract operations to simplify patterns.
750  // Expand various condition codes (explained above).
751  for (auto CC : VFPCCToExpand)
752  setCondCodeAction(CC, VT, Expand);
753 
756 
760 
765 
767 
770 
775 
776  setOperationAction(ISD::VP_LOAD, VT, Custom);
777  setOperationAction(ISD::VP_STORE, VT, Custom);
778  setOperationAction(ISD::VP_GATHER, VT, Custom);
779  setOperationAction(ISD::VP_SCATTER, VT, Custom);
780 
783 
787 
789 
790  for (unsigned VPOpc : FloatingPointVPOps)
791  setOperationAction(VPOpc, VT, Custom);
792  };
793 
794  // Sets common extload/truncstore actions on RVV floating-point vector
795  // types.
796  const auto SetCommonVFPExtLoadTruncStoreActions =
797  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
798  for (auto SmallVT : SmallerVTs) {
799  setTruncStoreAction(VT, SmallVT, Expand);
800  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
801  }
802  };
803 
804  if (Subtarget.hasVInstructionsF16())
805  for (MVT VT : F16VecVTs)
806  SetCommonVFPActions(VT);
807 
808  for (MVT VT : F32VecVTs) {
809  if (Subtarget.hasVInstructionsF32())
810  SetCommonVFPActions(VT);
811  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
812  }
813 
814  for (MVT VT : F64VecVTs) {
815  if (Subtarget.hasVInstructionsF64())
816  SetCommonVFPActions(VT);
817  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
818  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
819  }
820 
821  if (Subtarget.useRVVForFixedLengthVectors()) {
823  if (!useRVVForFixedLengthVectorVT(VT))
824  continue;
825 
826  // By default everything must be expanded.
827  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
829  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
830  setTruncStoreAction(VT, OtherVT, Expand);
831  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
832  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
833  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
834  }
835 
836  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
839 
842 
845 
848 
850 
852 
854 
856 
860 
861  setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
862  setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
863  setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
864 
869 
870  // Operations below are different for between masks and other vectors.
871  if (VT.getVectorElementType() == MVT::i1) {
872  setOperationAction(ISD::VP_AND, VT, Custom);
873  setOperationAction(ISD::VP_OR, VT, Custom);
874  setOperationAction(ISD::VP_XOR, VT, Custom);
878  continue;
879  }
880 
881  // Use SPLAT_VECTOR to prevent type legalization from destroying the
882  // splats when type legalizing i64 scalar on RV32.
883  // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
884  // improvements first.
885  if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
888  }
889 
892 
897 
898  setOperationAction(ISD::VP_LOAD, VT, Custom);
899  setOperationAction(ISD::VP_STORE, VT, Custom);
900  setOperationAction(ISD::VP_GATHER, VT, Custom);
901  setOperationAction(ISD::VP_SCATTER, VT, Custom);
902 
916 
922 
923  // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
924  if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
927  }
928 
933 
936 
940 
941  // Custom-lower reduction operations to set up the corresponding custom
942  // nodes' operands.
948 
949  for (unsigned VPOpc : IntegerVPOps)
950  setOperationAction(VPOpc, VT, Custom);
951 
952  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
953  // type that can represent the value exactly.
954  if (VT.getVectorElementType() != MVT::i64) {
955  MVT FloatEltVT =
957  EVT FloatVT =
958  MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
959  if (isTypeLegal(FloatVT)) {
962  }
963  }
964  }
965 
967  if (!useRVVForFixedLengthVectorVT(VT))
968  continue;
969 
970  // By default everything must be expanded.
971  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
973  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
974  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
975  setTruncStoreAction(VT, OtherVT, Expand);
976  }
977 
978  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
981 
987 
994 
995  setOperationAction(ISD::VP_LOAD, VT, Custom);
996  setOperationAction(ISD::VP_STORE, VT, Custom);
997  setOperationAction(ISD::VP_GATHER, VT, Custom);
998  setOperationAction(ISD::VP_SCATTER, VT, Custom);
999 
1011 
1014 
1018 
1019  for (auto CC : VFPCCToExpand)
1020  setCondCodeAction(CC, VT, Expand);
1021 
1025 
1027 
1032 
1033  for (unsigned VPOpc : FloatingPointVPOps)
1034  setOperationAction(VPOpc, VT, Custom);
1035  }
1036 
1037  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1042  if (Subtarget.hasStdExtZfh())
1044  if (Subtarget.hasStdExtF())
1046  if (Subtarget.hasStdExtD())
1048  }
1049  }
1050 
1051  // Function alignments.
1052  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
1053  setMinFunctionAlignment(FunctionAlignment);
1054  setPrefFunctionAlignment(FunctionAlignment);
1055 
1057 
1058  // Jumps are expensive, compared to logic
1060 
1067  if (Subtarget.hasStdExtF()) {
1073  }
1074  if (Subtarget.hasVInstructions()) {
1078  setTargetDAGCombine(ISD::VP_GATHER);
1079  setTargetDAGCombine(ISD::VP_SCATTER);
1084  }
1085 }
1086 
1089  EVT VT) const {
1090  if (!VT.isVector())
1091  return getPointerTy(DL);
1092  if (Subtarget.hasVInstructions() &&
1093  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1096 }
1097 
1098 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1099  return Subtarget.getXLenVT();
1100 }
1101 
1103  const CallInst &I,
1104  MachineFunction &MF,
1105  unsigned Intrinsic) const {
1106  auto &DL = I.getModule()->getDataLayout();
1107  switch (Intrinsic) {
1108  default:
1109  return false;
1110  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1111  case Intrinsic::riscv_masked_atomicrmw_add_i32:
1112  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1113  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1114  case Intrinsic::riscv_masked_atomicrmw_max_i32:
1115  case Intrinsic::riscv_masked_atomicrmw_min_i32:
1116  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1117  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1118  case Intrinsic::riscv_masked_cmpxchg_i32: {
1119  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
1121  Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
1122  Info.ptrVal = I.getArgOperand(0);
1123  Info.offset = 0;
1124  Info.align = Align(4);
1127  return true;
1128  }
1129  case Intrinsic::riscv_masked_strided_load:
1131  Info.ptrVal = I.getArgOperand(1);
1132  Info.memVT = getValueType(DL, I.getType()->getScalarType());
1133  Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1136  return true;
1137  case Intrinsic::riscv_masked_strided_store:
1138  Info.opc = ISD::INTRINSIC_VOID;
1139  Info.ptrVal = I.getArgOperand(1);
1140  Info.memVT =
1141  getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1142  Info.align = Align(
1143  DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1144  8);
1147  return true;
1148  }
1149 }
1150 
1152  const AddrMode &AM, Type *Ty,
1153  unsigned AS,
1154  Instruction *I) const {
1155  // No global is ever allowed as a base.
1156  if (AM.BaseGV)
1157  return false;
1158 
1159  // Require a 12-bit signed offset.
1160  if (!isInt<12>(AM.BaseOffs))
1161  return false;
1162 
1163  switch (AM.Scale) {
1164  case 0: // "r+i" or just "i", depending on HasBaseReg.
1165  break;
1166  case 1:
1167  if (!AM.HasBaseReg) // allow "r+i".
1168  break;
1169  return false; // disallow "r+r" or "r+r+i".
1170  default:
1171  return false;
1172  }
1173 
1174  return true;
1175 }
1176 
1178  return isInt<12>(Imm);
1179 }
1180 
1182  return isInt<12>(Imm);
1183 }
1184 
1185 // On RV32, 64-bit integers are split into their high and low parts and held
1186 // in two different registers, so the trunc is free since the low register can
1187 // just be used.
1188 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1189  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1190  return false;
1191  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1192  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1193  return (SrcBits == 64 && DestBits == 32);
1194 }
1195 
1196 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1197  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1198  !SrcVT.isInteger() || !DstVT.isInteger())
1199  return false;
1200  unsigned SrcBits = SrcVT.getSizeInBits();
1201  unsigned DestBits = DstVT.getSizeInBits();
1202  return (SrcBits == 64 && DestBits == 32);
1203 }
1204 
1206  // Zexts are free if they can be combined with a load.
1207  // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1208  // poorly with type legalization of compares preferring sext.
1209  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1210  EVT MemVT = LD->getMemoryVT();
1211  if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1212  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1213  LD->getExtensionType() == ISD::ZEXTLOAD))
1214  return true;
1215  }
1216 
1217  return TargetLowering::isZExtFree(Val, VT2);
1218 }
1219 
1221  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1222 }
1223 
1225  return Subtarget.hasStdExtZbb();
1226 }
1227 
1229  return Subtarget.hasStdExtZbb();
1230 }
1231 
1233  EVT VT = Y.getValueType();
1234 
1235  // FIXME: Support vectors once we have tests.
1236  if (VT.isVector())
1237  return false;
1238 
1239  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
1240  Subtarget.hasStdExtZbkb()) &&
1241  !isa<ConstantSDNode>(Y);
1242 }
1243 
1244 /// Check if sinking \p I's operands to I's basic block is profitable, because
1245 /// the operands can be folded into a target instruction, e.g.
1246 /// splats of scalars can fold into vector instructions.
1248  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1249  using namespace llvm::PatternMatch;
1250 
1251  if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1252  return false;
1253 
1254  auto IsSinker = [&](Instruction *I, int Operand) {
1255  switch (I->getOpcode()) {
1256  case Instruction::Add:
1257  case Instruction::Sub:
1258  case Instruction::Mul:
1259  case Instruction::And:
1260  case Instruction::Or:
1261  case Instruction::Xor:
1262  case Instruction::FAdd:
1263  case Instruction::FSub:
1264  case Instruction::FMul:
1265  case Instruction::FDiv:
1266  case Instruction::ICmp:
1267  case Instruction::FCmp:
1268  return true;
1269  case Instruction::Shl:
1270  case Instruction::LShr:
1271  case Instruction::AShr:
1272  case Instruction::UDiv:
1273  case Instruction::SDiv:
1274  case Instruction::URem:
1275  case Instruction::SRem:
1276  return Operand == 1;
1277  case Instruction::Call:
1278  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1279  switch (II->getIntrinsicID()) {
1280  case Intrinsic::fma:
1281  return Operand == 0 || Operand == 1;
1282  // FIXME: Our patterns can only match vx/vf instructions when the splat
1283  // it on the RHS, because TableGen doesn't recognize our VP operations
1284  // as commutative.
1285  case Intrinsic::vp_add:
1286  case Intrinsic::vp_mul:
1287  case Intrinsic::vp_and:
1288  case Intrinsic::vp_or:
1289  case Intrinsic::vp_xor:
1290  case Intrinsic::vp_fadd:
1291  case Intrinsic::vp_fmul:
1292  case Intrinsic::vp_shl:
1293  case Intrinsic::vp_lshr:
1294  case Intrinsic::vp_ashr:
1295  case Intrinsic::vp_udiv:
1296  case Intrinsic::vp_sdiv:
1297  case Intrinsic::vp_urem:
1298  case Intrinsic::vp_srem:
1299  return Operand == 1;
1300  // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
1301  // explicit patterns for both LHS and RHS (as 'vr' versions).
1302  case Intrinsic::vp_sub:
1303  case Intrinsic::vp_fsub:
1304  case Intrinsic::vp_fdiv:
1305  return Operand == 0 || Operand == 1;
1306  default:
1307  return false;
1308  }
1309  }
1310  return false;
1311  default:
1312  return false;
1313  }
1314  };
1315 
1316  for (auto OpIdx : enumerate(I->operands())) {
1317  if (!IsSinker(I, OpIdx.index()))
1318  continue;
1319 
1320  Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1321  // Make sure we are not already sinking this operand
1322  if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1323  continue;
1324 
1325  // We are looking for a splat that can be sunk.
1327  m_Undef(), m_ZeroMask())))
1328  continue;
1329 
1330  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1331  // and vector registers
1332  for (Use &U : Op->uses()) {
1333  Instruction *Insn = cast<Instruction>(U.getUser());
1334  if (!IsSinker(Insn, U.getOperandNo()))
1335  return false;
1336  }
1337 
1338  Ops.push_back(&Op->getOperandUse(0));
1339  Ops.push_back(&OpIdx.value());
1340  }
1341  return true;
1342 }
1343 
1345  bool ForCodeSize) const {
1346  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1347  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1348  return false;
1349  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1350  return false;
1351  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1352  return false;
1353  return Imm.isZero();
1354 }
1355 
1357  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1358  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1359  (VT == MVT::f64 && Subtarget.hasStdExtD());
1360 }
1361 
1363  CallingConv::ID CC,
1364  EVT VT) const {
1365  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1366  // We might still end up using a GPR but that will be decided based on ABI.
1367  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1368  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1369  return MVT::f32;
1370 
1372 }
1373 
1375  CallingConv::ID CC,
1376  EVT VT) const {
1377  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1378  // We might still end up using a GPR but that will be decided based on ABI.
1379  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1380  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1381  return 1;
1382 
1384 }
1385 
1386 // Changes the condition code and swaps operands if necessary, so the SetCC
1387 // operation matches one of the comparisons supported directly by branches
1388 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1389 // with 1/-1.
1391  ISD::CondCode &CC, SelectionDAG &DAG) {
1392  // Convert X > -1 to X >= 0.
1393  if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1394  RHS = DAG.getConstant(0, DL, RHS.getValueType());
1395  CC = ISD::SETGE;
1396  return;
1397  }
1398  // Convert X < 1 to 0 >= X.
1399  if (CC == ISD::SETLT && isOneConstant(RHS)) {
1400  RHS = LHS;
1401  LHS = DAG.getConstant(0, DL, RHS.getValueType());
1402  CC = ISD::SETGE;
1403  return;
1404  }
1405 
1406  switch (CC) {
1407  default:
1408  break;
1409  case ISD::SETGT:
1410  case ISD::SETLE:
1411  case ISD::SETUGT:
1412  case ISD::SETULE:
1414  std::swap(LHS, RHS);
1415  break;
1416  }
1417 }
1418 
1420  assert(VT.isScalableVector() && "Expecting a scalable vector type");
1421  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1422  if (VT.getVectorElementType() == MVT::i1)
1423  KnownSize *= 8;
1424 
1425  switch (KnownSize) {
1426  default:
1427  llvm_unreachable("Invalid LMUL.");
1428  case 8:
1429  return RISCVII::VLMUL::LMUL_F8;
1430  case 16:
1431  return RISCVII::VLMUL::LMUL_F4;
1432  case 32:
1433  return RISCVII::VLMUL::LMUL_F2;
1434  case 64:
1435  return RISCVII::VLMUL::LMUL_1;
1436  case 128:
1437  return RISCVII::VLMUL::LMUL_2;
1438  case 256:
1439  return RISCVII::VLMUL::LMUL_4;
1440  case 512:
1441  return RISCVII::VLMUL::LMUL_8;
1442  }
1443 }
1444 
1446  switch (LMul) {
1447  default:
1448  llvm_unreachable("Invalid LMUL.");
1453  return RISCV::VRRegClassID;
1455  return RISCV::VRM2RegClassID;
1457  return RISCV::VRM4RegClassID;
1459  return RISCV::VRM8RegClassID;
1460  }
1461 }
1462 
1464  RISCVII::VLMUL LMUL = getLMUL(VT);
1465  if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1469  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1470  "Unexpected subreg numbering");
1471  return RISCV::sub_vrm1_0 + Index;
1472  }
1473  if (LMUL == RISCVII::VLMUL::LMUL_2) {
1474  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1475  "Unexpected subreg numbering");
1476  return RISCV::sub_vrm2_0 + Index;
1477  }
1478  if (LMUL == RISCVII::VLMUL::LMUL_4) {
1479  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1480  "Unexpected subreg numbering");
1481  return RISCV::sub_vrm4_0 + Index;
1482  }
1483  llvm_unreachable("Invalid vector type.");
1484 }
1485 
1487  if (VT.getVectorElementType() == MVT::i1)
1488  return RISCV::VRRegClassID;
1489  return getRegClassIDForLMUL(getLMUL(VT));
1490 }
1491 
1492 // Attempt to decompose a subvector insert/extract between VecVT and
1493 // SubVecVT via subregister indices. Returns the subregister index that
1494 // can perform the subvector insert/extract with the given element index, as
1495 // well as the index corresponding to any leftover subvectors that must be
1496 // further inserted/extracted within the register class for SubVecVT.
1497 std::pair<unsigned, unsigned>
1499  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1500  const RISCVRegisterInfo *TRI) {
1501  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1502  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1503  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1504  "Register classes not ordered");
1505  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1506  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1507  // Try to compose a subregister index that takes us from the incoming
1508  // LMUL>1 register class down to the outgoing one. At each step we half
1509  // the LMUL:
1510  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1511  // Note that this is not guaranteed to find a subregister index, such as
1512  // when we are extracting from one VR type to another.
1513  unsigned SubRegIdx = RISCV::NoSubRegister;
1514  for (const unsigned RCID :
1515  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1516  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1517  VecVT = VecVT.getHalfNumVectorElementsVT();
1518  bool IsHi =
1519  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1520  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1521  getSubregIndexByMVT(VecVT, IsHi));
1522  if (IsHi)
1523  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1524  }
1525  return {SubRegIdx, InsertExtractIdx};
1526 }
1527 
1528 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1529 // stores for those types.
1530 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1531  return !Subtarget.useRVVForFixedLengthVectors() ||
1533 }
1534 
1536  if (ScalarTy->isPointerTy())
1537  return true;
1538 
1539  if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1540  ScalarTy->isIntegerTy(32))
1541  return true;
1542 
1543  if (ScalarTy->isIntegerTy(64))
1544  return Subtarget.hasVInstructionsI64();
1545 
1546  if (ScalarTy->isHalfTy())
1547  return Subtarget.hasVInstructionsF16();
1548  if (ScalarTy->isFloatTy())
1549  return Subtarget.hasVInstructionsF32();
1550  if (ScalarTy->isDoubleTy())
1551  return Subtarget.hasVInstructionsF64();
1552 
1553  return false;
1554 }
1555 
1557  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1558  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1559  "Unexpected opcode");
1560  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1561  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1563  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1564  if (!II)
1565  return SDValue();
1566  return Op.getOperand(II->VLOperand + 1 + HasChain);
1567 }
1568 
1569 static bool useRVVForFixedLengthVectorVT(MVT VT,
1570  const RISCVSubtarget &Subtarget) {
1571  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1572  if (!Subtarget.useRVVForFixedLengthVectors())
1573  return false;
1574 
1575  // We only support a set of vector types with a consistent maximum fixed size
1576  // across all supported vector element types to avoid legalization issues.
1577  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1578  // fixed-length vector type we support is 1024 bytes.
1579  if (VT.getFixedSizeInBits() > 1024 * 8)
1580  return false;
1581 
1582  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1583 
1584  MVT EltVT = VT.getVectorElementType();
1585 
1586  // Don't use RVV for vectors we cannot scalarize if required.
1587  switch (EltVT.SimpleTy) {
1588  // i1 is supported but has different rules.
1589  default:
1590  return false;
1591  case MVT::i1:
1592  // Masks can only use a single register.
1593  if (VT.getVectorNumElements() > MinVLen)
1594  return false;
1595  MinVLen /= 8;
1596  break;
1597  case MVT::i8:
1598  case MVT::i16:
1599  case MVT::i32:
1600  break;
1601  case MVT::i64:
1602  if (!Subtarget.hasVInstructionsI64())
1603  return false;
1604  break;
1605  case MVT::f16:
1606  if (!Subtarget.hasVInstructionsF16())
1607  return false;
1608  break;
1609  case MVT::f32:
1610  if (!Subtarget.hasVInstructionsF32())
1611  return false;
1612  break;
1613  case MVT::f64:
1614  if (!Subtarget.hasVInstructionsF64())
1615  return false;
1616  break;
1617  }
1618 
1619  // Reject elements larger than ELEN.
1620  if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
1621  return false;
1622 
1623  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1624  // Don't use RVV for types that don't fit.
1625  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1626  return false;
1627 
1628  // TODO: Perhaps an artificial restriction, but worth having whilst getting
1629  // the base fixed length RVV support in place.
1630  if (!VT.isPow2VectorType())
1631  return false;
1632 
1633  return true;
1634 }
1635 
1636 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1638 }
1639 
1640 // Return the largest legal scalable vector type that matches VT's element type.
1642  const RISCVSubtarget &Subtarget) {
1643  // This may be called before legal types are setup.
1644  assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1645  useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1646  "Expected legal fixed length vector!");
1647 
1648  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1649  unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
1650 
1651  MVT EltVT = VT.getVectorElementType();
1652  switch (EltVT.SimpleTy) {
1653  default:
1654  llvm_unreachable("unexpected element type for RVV container");
1655  case MVT::i1:
1656  case MVT::i8:
1657  case MVT::i16:
1658  case MVT::i32:
1659  case MVT::i64:
1660  case MVT::f16:
1661  case MVT::f32:
1662  case MVT::f64: {
1663  // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1664  // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1665  // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1666  unsigned NumElts =
1667  (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1668  NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1669  assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1670  return MVT::getScalableVectorVT(EltVT, NumElts);
1671  }
1672  }
1673 }
1674 
1676  const RISCVSubtarget &Subtarget) {
1678  Subtarget);
1679 }
1680 
1683 }
1684 
1685 // Grow V to consume an entire RVV register.
1687  const RISCVSubtarget &Subtarget) {
1688  assert(VT.isScalableVector() &&
1689  "Expected to convert into a scalable vector!");
1691  "Expected a fixed length vector operand!");
1692  SDLoc DL(V);
1693  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1694  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1695 }
1696 
1697 // Shrink V so it's just big enough to maintain a VT's worth of data.
1699  const RISCVSubtarget &Subtarget) {
1700  assert(VT.isFixedLengthVector() &&
1701  "Expected to convert into a fixed length vector!");
1703  "Expected a scalable vector operand!");
1704  SDLoc DL(V);
1705  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1706  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1707 }
1708 
1709 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1710 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1711 // the vector type that it is contained in.
1712 static std::pair<SDValue, SDValue>
1713 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1714  const RISCVSubtarget &Subtarget) {
1715  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1716  MVT XLenVT = Subtarget.getXLenVT();
1717  SDValue VL = VecVT.isFixedLengthVector()
1718  ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1719  : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1720  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1721  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1722  return {Mask, VL};
1723 }
1724 
1725 // As above but assuming the given type is a scalable vector type.
1726 static std::pair<SDValue, SDValue>
1728  const RISCVSubtarget &Subtarget) {
1729  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1730  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1731 }
1732 
1733 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1734 // of either is (currently) supported. This can get us into an infinite loop
1735 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1736 // as a ..., etc.
1737 // Until either (or both) of these can reliably lower any node, reporting that
1738 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1739 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1740 // which is not desirable.
1742  EVT VT, unsigned DefinedValues) const {
1743  return false;
1744 }
1745 
1747  // Only splats are currently supported.
1748  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1749  return true;
1750 
1751  return false;
1752 }
1753 
1755  const RISCVSubtarget &Subtarget) {
1756  // RISCV FP-to-int conversions saturate to the destination register size, but
1757  // don't produce 0 for nan. We can use a conversion instruction and fix the
1758  // nan case with a compare and a select.
1759  SDValue Src = Op.getOperand(0);
1760 
1761  EVT DstVT = Op.getValueType();
1762  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1763 
1764  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1765  unsigned Opc;
1766  if (SatVT == DstVT)
1767  Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1768  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1769  Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1770  else
1771  return SDValue();
1772  // FIXME: Support other SatVTs by clamping before or after the conversion.
1773 
1774  SDLoc DL(Op);
1775  SDValue FpToInt = DAG.getNode(
1776  Opc, DL, DstVT, Src,
1777  DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1778 
1779  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1780  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1781 }
1782 
1783 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1784 // and back. Taking care to avoid converting values that are nan or already
1785 // correct.
1786 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1787 // have FRM dependencies modeled yet.
1789  MVT VT = Op.getSimpleValueType();
1790  assert(VT.isVector() && "Unexpected type");
1791 
1792  SDLoc DL(Op);
1793 
1794  // Freeze the source since we are increasing the number of uses.
1795  SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
1796 
1797  // Truncate to integer and convert back to FP.
1799  SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
1800  Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1801 
1803 
1804  if (Op.getOpcode() == ISD::FCEIL) {
1805  // If the truncated value is the greater than or equal to the original
1806  // value, we've computed the ceil. Otherwise, we went the wrong way and
1807  // need to increase by 1.
1808  // FIXME: This should use a masked operation. Handle here or in isel?
1809  SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
1810  DAG.getConstantFP(1.0, DL, VT));
1811  SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
1812  Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1813  } else if (Op.getOpcode() == ISD::FFLOOR) {
1814  // If the truncated value is the less than or equal to the original value,
1815  // we've computed the floor. Otherwise, we went the wrong way and need to
1816  // decrease by 1.
1817  // FIXME: This should use a masked operation. Handle here or in isel?
1818  SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
1819  DAG.getConstantFP(1.0, DL, VT));
1820  SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
1821  Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1822  }
1823 
1824  // Restore the original sign so that -0.0 is preserved.
1825  Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1826 
1827  // Determine the largest integer that can be represented exactly. This and
1828  // values larger than it don't have any fractional bits so don't need to
1829  // be converted.
1830  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1831  unsigned Precision = APFloat::semanticsPrecision(FltSem);
1832  APFloat MaxVal = APFloat(FltSem);
1833  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1834  /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1835  SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1836 
1837  // If abs(Src) was larger than MaxVal or nan, keep it.
1838  SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1839  SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1840  return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1841 }
1842 
1844  const RISCVSubtarget &Subtarget) {
1845  MVT VT = Op.getSimpleValueType();
1846  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1847 
1848  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1849 
1850  SDLoc DL(Op);
1851  SDValue Mask, VL;
1852  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1853 
1854  unsigned Opc =
1856  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1857  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1858 }
1859 
1860 struct VIDSequence {
1861  int64_t StepNumerator;
1863  int64_t Addend;
1864 };
1865 
1866 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1867 // to the (non-zero) step S and start value X. This can be then lowered as the
1868 // RVV sequence (VID * S) + X, for example.
1869 // The step S is represented as an integer numerator divided by a positive
1870 // denominator. Note that the implementation currently only identifies
1871 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1872 // cannot detect 2/3, for example.
1873 // Note that this method will also match potentially unappealing index
1874 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1875 // determine whether this is worth generating code for.
1877  unsigned NumElts = Op.getNumOperands();
1878  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1879  if (!Op.getValueType().isInteger())
1880  return None;
1881 
1882  Optional<unsigned> SeqStepDenom;
1883  Optional<int64_t> SeqStepNum, SeqAddend;
1885  unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1886  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1887  // Assume undef elements match the sequence; we just have to be careful
1888  // when interpolating across them.
1889  if (Op.getOperand(Idx).isUndef())
1890  continue;
1891  // The BUILD_VECTOR must be all constants.
1892  if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1893  return None;
1894 
1895  uint64_t Val = Op.getConstantOperandVal(Idx) &
1896  maskTrailingOnes<uint64_t>(EltSizeInBits);
1897 
1898  if (PrevElt) {
1899  // Calculate the step since the last non-undef element, and ensure
1900  // it's consistent across the entire sequence.
1901  unsigned IdxDiff = Idx - PrevElt->second;
1902  int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1903 
1904  // A zero-value value difference means that we're somewhere in the middle
1905  // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1906  // step change before evaluating the sequence.
1907  if (ValDiff != 0) {
1908  int64_t Remainder = ValDiff % IdxDiff;
1909  // Normalize the step if it's greater than 1.
1910  if (Remainder != ValDiff) {
1911  // The difference must cleanly divide the element span.
1912  if (Remainder != 0)
1913  return None;
1914  ValDiff /= IdxDiff;
1915  IdxDiff = 1;
1916  }
1917 
1918  if (!SeqStepNum)
1919  SeqStepNum = ValDiff;
1920  else if (ValDiff != SeqStepNum)
1921  return None;
1922 
1923  if (!SeqStepDenom)
1924  SeqStepDenom = IdxDiff;
1925  else if (IdxDiff != *SeqStepDenom)
1926  return None;
1927  }
1928  }
1929 
1930  // Record and/or check any addend.
1931  if (SeqStepNum && SeqStepDenom) {
1932  uint64_t ExpectedVal =
1933  (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1934  int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1935  if (!SeqAddend)
1936  SeqAddend = Addend;
1937  else if (SeqAddend != Addend)
1938  return None;
1939  }
1940 
1941  // Record this non-undef element for later.
1942  if (!PrevElt || PrevElt->first != Val)
1943  PrevElt = std::make_pair(Val, Idx);
1944  }
1945  // We need to have logged both a step and an addend for this to count as
1946  // a legal index sequence.
1947  if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1948  return None;
1949 
1950  return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1951 }
1952 
1954  const RISCVSubtarget &Subtarget) {
1955  MVT VT = Op.getSimpleValueType();
1956  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1957 
1958  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1959 
1960  SDLoc DL(Op);
1961  SDValue Mask, VL;
1962  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1963 
1964  MVT XLenVT = Subtarget.getXLenVT();
1965  unsigned NumElts = Op.getNumOperands();
1966 
1967  if (VT.getVectorElementType() == MVT::i1) {
1968  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1969  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1970  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1971  }
1972 
1973  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1974  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1975  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1976  }
1977 
1978  // Lower constant mask BUILD_VECTORs via an integer vector type, in
1979  // scalar integer chunks whose bit-width depends on the number of mask
1980  // bits and XLEN.
1981  // First, determine the most appropriate scalar integer type to use. This
1982  // is at most XLenVT, but may be shrunk to a smaller vector element type
1983  // according to the size of the final vector - use i8 chunks rather than
1984  // XLenVT if we're producing a v8i1. This results in more consistent
1985  // codegen across RV32 and RV64.
1986  unsigned NumViaIntegerBits =
1987  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1988  NumViaIntegerBits = std::min(NumViaIntegerBits,
1989  Subtarget.getMaxELENForFixedLengthVectors());
1990  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1991  // If we have to use more than one INSERT_VECTOR_ELT then this
1992  // optimization is likely to increase code size; avoid peforming it in
1993  // such a case. We can use a load from a constant pool in this case.
1994  if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1995  return SDValue();
1996  // Now we can create our integer vector type. Note that it may be larger
1997  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1998  MVT IntegerViaVecVT =
1999  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
2000  divideCeil(NumElts, NumViaIntegerBits));
2001 
2002  uint64_t Bits = 0;
2003  unsigned BitPos = 0, IntegerEltIdx = 0;
2004  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
2005 
2006  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
2007  // Once we accumulate enough bits to fill our scalar type, insert into
2008  // our vector and clear our accumulated data.
2009  if (I != 0 && I % NumViaIntegerBits == 0) {
2010  if (NumViaIntegerBits <= 32)
2011  Bits = SignExtend64(Bits, 32);
2012  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2013  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
2014  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2015  Bits = 0;
2016  BitPos = 0;
2017  IntegerEltIdx++;
2018  }
2019  SDValue V = Op.getOperand(I);
2020  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2021  Bits |= ((uint64_t)BitValue << BitPos);
2022  }
2023 
2024  // Insert the (remaining) scalar value into position in our integer
2025  // vector type.
2026  if (NumViaIntegerBits <= 32)
2027  Bits = SignExtend64(Bits, 32);
2028  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2029  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2030  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2031 
2032  if (NumElts < NumViaIntegerBits) {
2033  // If we're producing a smaller vector than our minimum legal integer
2034  // type, bitcast to the equivalent (known-legal) mask type, and extract
2035  // our final mask.
2036  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2037  Vec = DAG.getBitcast(MVT::v8i1, Vec);
2038  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2039  DAG.getConstant(0, DL, XLenVT));
2040  } else {
2041  // Else we must have produced an integer type with the same size as the
2042  // mask type; bitcast for the final result.
2043  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2044  Vec = DAG.getBitcast(VT, Vec);
2045  }
2046 
2047  return Vec;
2048  }
2049 
2050  // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2051  // vector type, we have a legal equivalently-sized i8 type, so we can use
2052  // that.
2053  MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2054  SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2055 
2056  SDValue WideVec;
2057  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2058  // For a splat, perform a scalar truncate before creating the wider
2059  // vector.
2060  assert(Splat.getValueType() == XLenVT &&
2061  "Unexpected type for i1 splat value");
2062  Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2063  DAG.getConstant(1, DL, XLenVT));
2064  WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2065  } else {
2066  SmallVector<SDValue, 8> Ops(Op->op_values());
2067  WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2068  SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2069  WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2070  }
2071 
2072  return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2073  }
2074 
2075  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2076  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2078  Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
2079  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2080  }
2081 
2082  // Try and match index sequences, which we can lower to the vid instruction
2083  // with optional modifications. An all-undef vector is matched by
2084  // getSplatValue, above.
2085  if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2086  int64_t StepNumerator = SimpleVID->StepNumerator;
2087  unsigned StepDenominator = SimpleVID->StepDenominator;
2088  int64_t Addend = SimpleVID->Addend;
2089 
2090  assert(StepNumerator != 0 && "Invalid step");
2091  bool Negate = false;
2092  int64_t SplatStepVal = StepNumerator;
2093  unsigned StepOpcode = ISD::MUL;
2094  if (StepNumerator != 1) {
2095  if (isPowerOf2_64(std::abs(StepNumerator))) {
2096  Negate = StepNumerator < 0;
2097  StepOpcode = ISD::SHL;
2098  SplatStepVal = Log2_64(std::abs(StepNumerator));
2099  }
2100  }
2101 
2102  // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2103  // threshold since it's the immediate value many RVV instructions accept.
2104  // There is no vmul.vi instruction so ensure multiply constant can fit in
2105  // a single addi instruction.
2106  if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2107  (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2108  isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
2109  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2110  // Convert right out of the scalable type so we can use standard ISD
2111  // nodes for the rest of the computation. If we used scalable types with
2112  // these, we'd lose the fixed-length vector info and generate worse
2113  // vsetvli code.
2114  VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2115  if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2116  (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2117  SDValue SplatStep = DAG.getSplatVector(
2118  VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2119  VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2120  }
2121  if (StepDenominator != 1) {
2122  SDValue SplatStep = DAG.getSplatVector(
2123  VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2124  VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2125  }
2126  if (Addend != 0 || Negate) {
2127  SDValue SplatAddend =
2128  DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2129  VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2130  }
2131  return VID;
2132  }
2133  }
2134 
2135  // Attempt to detect "hidden" splats, which only reveal themselves as splats
2136  // when re-interpreted as a vector with a larger element type. For example,
2137  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2138  // could be instead splat as
2139  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
2140  // TODO: This optimization could also work on non-constant splats, but it
2141  // would require bit-manipulation instructions to construct the splat value.
2143  unsigned EltBitSize = VT.getScalarSizeInBits();
2144  const auto *BV = cast<BuildVectorSDNode>(Op);
2145  if (VT.isInteger() && EltBitSize < 64 &&
2147  BV->getRepeatedSequence(Sequence) &&
2148  (Sequence.size() * EltBitSize) <= 64) {
2149  unsigned SeqLen = Sequence.size();
2150  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2151  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2152  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2153  ViaIntVT == MVT::i64) &&
2154  "Unexpected sequence type");
2155 
2156  unsigned EltIdx = 0;
2157  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2158  uint64_t SplatValue = 0;
2159  // Construct the amalgamated value which can be splatted as this larger
2160  // vector type.
2161  for (const auto &SeqV : Sequence) {
2162  if (!SeqV.isUndef())
2163  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2164  << (EltIdx * EltBitSize));
2165  EltIdx++;
2166  }
2167 
2168  // On RV64, sign-extend from 32 to 64 bits where possible in order to
2169  // achieve better constant materializion.
2170  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2171  SplatValue = SignExtend64(SplatValue, 32);
2172 
2173  // Since we can't introduce illegal i64 types at this stage, we can only
2174  // perform an i64 splat on RV32 if it is its own sign-extended value. That
2175  // way we can use RVV instructions to splat.
2176  assert((ViaIntVT.bitsLE(XLenVT) ||
2177  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2178  "Unexpected bitcast sequence");
2179  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2180  SDValue ViaVL =
2181  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2182  MVT ViaContainerVT =
2183  getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2184  SDValue Splat =
2185  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2186  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2187  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2188  return DAG.getBitcast(VT, Splat);
2189  }
2190  }
2191 
2192  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2193  // which constitute a large proportion of the elements. In such cases we can
2194  // splat a vector with the dominant element and make up the shortfall with
2195  // INSERT_VECTOR_ELTs.
2196  // Note that this includes vectors of 2 elements by association. The
2197  // upper-most element is the "dominant" one, allowing us to use a splat to
2198  // "insert" the upper element, and an insert of the lower element at position
2199  // 0, which improves codegen.
2200  SDValue DominantValue;
2201  unsigned MostCommonCount = 0;
2202  DenseMap<SDValue, unsigned> ValueCounts;
2203  unsigned NumUndefElts =
2204  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2205 
2206  // Track the number of scalar loads we know we'd be inserting, estimated as
2207  // any non-zero floating-point constant. Other kinds of element are either
2208  // already in registers or are materialized on demand. The threshold at which
2209  // a vector load is more desirable than several scalar materializion and
2210  // vector-insertion instructions is not known.
2211  unsigned NumScalarLoads = 0;
2212 
2213  for (SDValue V : Op->op_values()) {
2214  if (V.isUndef())
2215  continue;
2216 
2217  ValueCounts.insert(std::make_pair(V, 0));
2218  unsigned &Count = ValueCounts[V];
2219 
2220  if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2221  NumScalarLoads += !CFP->isExactlyValue(+0.0);
2222 
2223  // Is this value dominant? In case of a tie, prefer the highest element as
2224  // it's cheaper to insert near the beginning of a vector than it is at the
2225  // end.
2226  if (++Count >= MostCommonCount) {
2227  DominantValue = V;
2228  MostCommonCount = Count;
2229  }
2230  }
2231 
2232  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2233  unsigned NumDefElts = NumElts - NumUndefElts;
2234  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2235 
2236  // Don't perform this optimization when optimizing for size, since
2237  // materializing elements and inserting them tends to cause code bloat.
2238  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2239  ((MostCommonCount > DominantValueCountThreshold) ||
2240  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2241  // Start by splatting the most common element.
2242  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2243 
2244  DenseSet<SDValue> Processed{DominantValue};
2245  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2246  for (const auto &OpIdx : enumerate(Op->ops())) {
2247  const SDValue &V = OpIdx.value();
2248  if (V.isUndef() || !Processed.insert(V).second)
2249  continue;
2250  if (ValueCounts[V] == 1) {
2251  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2252  DAG.getConstant(OpIdx.index(), DL, XLenVT));
2253  } else {
2254  // Blend in all instances of this value using a VSELECT, using a
2255  // mask where each bit signals whether that element is the one
2256  // we're after.
2258  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2259  return DAG.getConstant(V == V1, DL, XLenVT);
2260  });
2261  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2262  DAG.getBuildVector(SelMaskTy, DL, Ops),
2263  DAG.getSplatBuildVector(VT, DL, V), Vec);
2264  }
2265  }
2266 
2267  return Vec;
2268  }
2269 
2270  return SDValue();
2271 }
2272 
2274  SDValue Hi, SDValue VL, SelectionDAG &DAG) {
2275  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2276  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2277  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2278  // If Hi constant is all the same sign bit as Lo, lower this as a custom
2279  // node in order to try and match RVV vector/scalar instructions.
2280  if ((LoC >> 31) == HiC)
2281  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
2282 
2283  // If vl is equal to VLMax and Hi constant is equal to Lo, we could use
2284  // vmv.v.x whose EEW = 32 to lower it.
2285  auto *Const = dyn_cast<ConstantSDNode>(VL);
2286  if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) {
2287  MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2288  // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2289  // access the subtarget here now.
2290  auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL);
2291  return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2292  }
2293  }
2294 
2295  // Fall back to a stack store and stride x0 vector load.
2296  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
2297 }
2298 
2299 // Called by type legalization to handle splat of i64 on RV32.
2300 // FIXME: We can optimize this when the type has sign or zero bits in one
2301 // of the halves.
2302 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
2303  SDValue VL, SelectionDAG &DAG) {
2304  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2306  DAG.getConstant(0, DL, MVT::i32));
2308  DAG.getConstant(1, DL, MVT::i32));
2309  return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
2310 }
2311 
2312 // This function lowers a splat of a scalar operand Splat with the vector
2313 // length VL. It ensures the final sequence is type legal, which is useful when
2314 // lowering a splat after type legalization.
2316  SelectionDAG &DAG,
2317  const RISCVSubtarget &Subtarget) {
2318  if (VT.isFloatingPoint()) {
2319  // If VL is 1, we could use vfmv.s.f.
2320  if (isOneConstant(VL))
2321  return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
2322  Scalar, VL);
2323  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
2324  }
2325 
2326  MVT XLenVT = Subtarget.getXLenVT();
2327 
2328  // Simplest case is that the operand needs to be promoted to XLenVT.
2329  if (Scalar.getValueType().bitsLE(XLenVT)) {
2330  // If the operand is a constant, sign extend to increase our chances
2331  // of being able to use a .vi instruction. ANY_EXTEND would become a
2332  // a zero extend and the simm5 check in isel would fail.
2333  // FIXME: Should we ignore the upper bits in isel instead?
2334  unsigned ExtOpc =
2335  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2336  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2337  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2338  // If VL is 1 and the scalar value won't benefit from immediate, we could
2339  // use vmv.s.x.
2340  if (isOneConstant(VL) &&
2341  (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2342  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
2343  VL);
2344  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
2345  }
2346 
2347  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2348  "Unexpected scalar for splat lowering!");
2349 
2350  if (isOneConstant(VL) && isNullConstant(Scalar))
2351  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
2352  DAG.getConstant(0, DL, XLenVT), VL);
2353 
2354  // Otherwise use the more complicated splatting algorithm.
2355  return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
2356 }
2357 
2358 // Is the mask a slidedown that shifts in undefs.
2360  int Size = Mask.size();
2361 
2362  // Elements shifted in should be undef.
2363  auto CheckUndefs = [&](int Shift) {
2364  for (int i = Size - Shift; i != Size; ++i)
2365  if (Mask[i] >= 0)
2366  return false;
2367  return true;
2368  };
2369 
2370  // Elements should be shifted or undef.
2371  auto MatchShift = [&](int Shift) {
2372  for (int i = 0; i != Size - Shift; ++i)
2373  if (Mask[i] >= 0 && Mask[i] != Shift + i)
2374  return false;
2375  return true;
2376  };
2377 
2378  // Try all possible shifts.
2379  for (int Shift = 1; Shift != Size; ++Shift)
2380  if (CheckUndefs(Shift) && MatchShift(Shift))
2381  return Shift;
2382 
2383  // No match.
2384  return -1;
2385 }
2386 
2387 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2388  const RISCVSubtarget &Subtarget) {
2389  // We need to be able to widen elements to the next larger integer type.
2390  if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
2391  return false;
2392 
2393  int Size = Mask.size();
2394  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2395 
2396  int Srcs[] = {-1, -1};
2397  for (int i = 0; i != Size; ++i) {
2398  // Ignore undef elements.
2399  if (Mask[i] < 0)
2400  continue;
2401 
2402  // Is this an even or odd element.
2403  int Pol = i % 2;
2404 
2405  // Ensure we consistently use the same source for this element polarity.
2406  int Src = Mask[i] / Size;
2407  if (Srcs[Pol] < 0)
2408  Srcs[Pol] = Src;
2409  if (Srcs[Pol] != Src)
2410  return false;
2411 
2412  // Make sure the element within the source is appropriate for this element
2413  // in the destination.
2414  int Elt = Mask[i] % Size;
2415  if (Elt != i / 2)
2416  return false;
2417  }
2418 
2419  // We need to find a source for each polarity and they can't be the same.
2420  if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2421  return false;
2422 
2423  // Swap the sources if the second source was in the even polarity.
2424  SwapSources = Srcs[0] > Srcs[1];
2425 
2426  return true;
2427 }
2428 
2430  const RISCVSubtarget &Subtarget) {
2431  SDValue V1 = Op.getOperand(0);
2432  SDValue V2 = Op.getOperand(1);
2433  SDLoc DL(Op);
2434  MVT XLenVT = Subtarget.getXLenVT();
2435  MVT VT = Op.getSimpleValueType();
2436  unsigned NumElts = VT.getVectorNumElements();
2437  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2438 
2439  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2440 
2441  SDValue TrueMask, VL;
2442  std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2443 
2444  if (SVN->isSplat()) {
2445  const int Lane = SVN->getSplatIndex();
2446  if (Lane >= 0) {
2447  MVT SVT = VT.getVectorElementType();
2448 
2449  // Turn splatted vector load into a strided load with an X0 stride.
2450  SDValue V = V1;
2451  // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2452  // with undef.
2453  // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2454  int Offset = Lane;
2455  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2456  int OpElements =
2458  V = V.getOperand(Offset / OpElements);
2459  Offset %= OpElements;
2460  }
2461 
2462  // We need to ensure the load isn't atomic or volatile.
2463  if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2464  auto *Ld = cast<LoadSDNode>(V);
2465  Offset *= SVT.getStoreSize();
2466  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2468 
2469  // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2470  if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2471  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2472  SDValue IntID =
2473  DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2474  SDValue Ops[] = {Ld->getChain(),
2475  IntID,
2476  DAG.getUNDEF(ContainerVT),
2477  NewAddr,
2478  DAG.getRegister(RISCV::X0, XLenVT),
2479  VL};
2480  SDValue NewLoad = DAG.getMemIntrinsicNode(
2481  ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2483  Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2484  DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2485  return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2486  }
2487 
2488  // Otherwise use a scalar load and splat. This will give the best
2489  // opportunity to fold a splat into the operation. ISel can turn it into
2490  // the x0 strided load if we aren't able to fold away the select.
2491  if (SVT.isFloatingPoint())
2492  V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2493  Ld->getPointerInfo().getWithOffset(Offset),
2494  Ld->getOriginalAlign(),
2495  Ld->getMemOperand()->getFlags());
2496  else
2497  V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2498  Ld->getPointerInfo().getWithOffset(Offset), SVT,
2499  Ld->getOriginalAlign(),
2500  Ld->getMemOperand()->getFlags());
2501  DAG.makeEquivalentMemoryOrdering(Ld, V);
2502 
2503  unsigned Opc =
2505  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
2506  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2507  }
2508 
2509  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2510  assert(Lane < (int)NumElts && "Unexpected lane!");
2511  SDValue Gather =
2512  DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2513  DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2514  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2515  }
2516  }
2517 
2518  ArrayRef<int> Mask = SVN->getMask();
2519 
2520  // Try to match as a slidedown.
2521  int SlideAmt = matchShuffleAsSlideDown(Mask);
2522  if (SlideAmt >= 0) {
2523  // TODO: Should we reduce the VL to account for the upper undef elements?
2524  // Requires additional vsetvlis, but might be faster to execute.
2525  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2526  SDValue SlideDown =
2527  DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
2528  DAG.getUNDEF(ContainerVT), V1,
2529  DAG.getConstant(SlideAmt, DL, XLenVT),
2530  TrueMask, VL);
2531  return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
2532  }
2533 
2534  // Detect an interleave shuffle and lower to
2535  // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2536  bool SwapSources;
2537  if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2538  // Swap sources if needed.
2539  if (SwapSources)
2540  std::swap(V1, V2);
2541 
2542  // Extract the lower half of the vectors.
2543  MVT HalfVT = VT.getHalfNumVectorElementsVT();
2544  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2545  DAG.getConstant(0, DL, XLenVT));
2546  V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2547  DAG.getConstant(0, DL, XLenVT));
2548 
2549  // Double the element width and halve the number of elements in an int type.
2550  unsigned EltBits = VT.getScalarSizeInBits();
2551  MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2552  MVT WideIntVT =
2553  MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2554  // Convert this to a scalable vector. We need to base this on the
2555  // destination size to ensure there's always a type with a smaller LMUL.
2556  MVT WideIntContainerVT =
2557  getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2558 
2559  // Convert sources to scalable vectors with the same element count as the
2560  // larger type.
2561  MVT HalfContainerVT = MVT::getVectorVT(
2562  VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2563  V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2564  V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2565 
2566  // Cast sources to integer.
2567  MVT IntEltVT = MVT::getIntegerVT(EltBits);
2568  MVT IntHalfVT =
2569  MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2570  V1 = DAG.getBitcast(IntHalfVT, V1);
2571  V2 = DAG.getBitcast(IntHalfVT, V2);
2572 
2573  // Freeze V2 since we use it twice and we need to be sure that the add and
2574  // multiply see the same value.
2575  V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
2576 
2577  // Recreate TrueMask using the widened type's element count.
2578  MVT MaskVT =
2579  MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
2580  TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2581 
2582  // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2583  SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
2584  V2, TrueMask, VL);
2585  // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2586  SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2587  DAG.getAllOnesConstant(DL, XLenVT));
2588  SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
2589  V2, Multiplier, TrueMask, VL);
2590  // Add the new copies to our previous addition giving us 2^eltbits copies of
2591  // V2. This is equivalent to shifting V2 left by eltbits. This should
2592  // combine with the vwmulu.vv above to form vwmaccu.vv.
2593  Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2594  TrueMask, VL);
2595  // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2596  // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2597  // vector VT.
2598  ContainerVT =
2600  WideIntContainerVT.getVectorElementCount() * 2);
2601  Add = DAG.getBitcast(ContainerVT, Add);
2602  return convertFromScalableVector(VT, Add, DAG, Subtarget);
2603  }
2604 
2605  // Detect shuffles which can be re-expressed as vector selects; these are
2606  // shuffles in which each element in the destination is taken from an element
2607  // at the corresponding index in either source vectors.
2608  bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2609  int MaskIndex = MaskIdx.value();
2610  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2611  });
2612 
2613  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2614 
2615  SmallVector<SDValue> MaskVals;
2616  // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2617  // merged with a second vrgather.
2618  SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2619 
2620  // By default we preserve the original operand order, and use a mask to
2621  // select LHS as true and RHS as false. However, since RVV vector selects may
2622  // feature splats but only on the LHS, we may choose to invert our mask and
2623  // instead select between RHS and LHS.
2624  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2625  bool InvertMask = IsSelect == SwapOps;
2626 
2627  // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2628  // half.
2629  DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2630 
2631  // Now construct the mask that will be used by the vselect or blended
2632  // vrgather operation. For vrgathers, construct the appropriate indices into
2633  // each vector.
2634  for (int MaskIndex : Mask) {
2635  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2636  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2637  if (!IsSelect) {
2638  bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2639  GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2640  ? DAG.getConstant(MaskIndex, DL, XLenVT)
2641  : DAG.getUNDEF(XLenVT));
2642  GatherIndicesRHS.push_back(
2643  IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2644  : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2645  if (IsLHSOrUndefIndex && MaskIndex >= 0)
2646  ++LHSIndexCounts[MaskIndex];
2647  if (!IsLHSOrUndefIndex)
2648  ++RHSIndexCounts[MaskIndex - NumElts];
2649  }
2650  }
2651 
2652  if (SwapOps) {
2653  std::swap(V1, V2);
2654  std::swap(GatherIndicesLHS, GatherIndicesRHS);
2655  }
2656 
2657  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2658  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2659  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2660 
2661  if (IsSelect)
2662  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2663 
2664  if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2665  // On such a large vector we're unable to use i8 as the index type.
2666  // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2667  // may involve vector splitting if we're already at LMUL=8, or our
2668  // user-supplied maximum fixed-length LMUL.
2669  return SDValue();
2670  }
2671 
2672  unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2673  unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2674  MVT IndexVT = VT.changeTypeToInteger();
2675  // Since we can't introduce illegal index types at this stage, use i16 and
2676  // vrgatherei16 if the corresponding index type for plain vrgather is greater
2677  // than XLenVT.
2678  if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2679  GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2680  IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2681  }
2682 
2683  MVT IndexContainerVT =
2684  ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2685 
2686  SDValue Gather;
2687  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2688  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2689  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2690  Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2691  } else {
2692  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2693  // If only one index is used, we can use a "splat" vrgather.
2694  // TODO: We can splat the most-common index and fix-up any stragglers, if
2695  // that's beneficial.
2696  if (LHSIndexCounts.size() == 1) {
2697  int SplatIndex = LHSIndexCounts.begin()->getFirst();
2698  Gather =
2699  DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2700  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2701  } else {
2702  SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2703  LHSIndices =
2704  convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2705 
2706  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2707  TrueMask, VL);
2708  }
2709  }
2710 
2711  // If a second vector operand is used by this shuffle, blend it in with an
2712  // additional vrgather.
2713  if (!V2.isUndef()) {
2714  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2715  // If only one index is used, we can use a "splat" vrgather.
2716  // TODO: We can splat the most-common index and fix-up any stragglers, if
2717  // that's beneficial.
2718  if (RHSIndexCounts.size() == 1) {
2719  int SplatIndex = RHSIndexCounts.begin()->getFirst();
2720  V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2721  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2722  } else {
2723  SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2724  RHSIndices =
2725  convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2726  V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2727  VL);
2728  }
2729 
2730  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2731  SelectMask =
2732  convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2733 
2734  Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2735  Gather, VL);
2736  }
2737 
2738  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2739 }
2740 
2741 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2742  SDLoc DL, SelectionDAG &DAG,
2743  const RISCVSubtarget &Subtarget) {
2744  if (VT.isScalableVector())
2745  return DAG.getFPExtendOrRound(Op, DL, VT);
2746  assert(VT.isFixedLengthVector() &&
2747  "Unexpected value type for RVV FP extend/round lowering");
2748  SDValue Mask, VL;
2749  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2750  unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2753  return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2754 }
2755 
2756 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
2757 // the exponent.
2759  MVT VT = Op.getSimpleValueType();
2760  unsigned EltSize = VT.getScalarSizeInBits();
2761  SDValue Src = Op.getOperand(0);
2762  SDLoc DL(Op);
2763 
2764  // We need a FP type that can represent the value.
2765  // TODO: Use f16 for i8 when possible?
2766  MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
2767  MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
2768 
2769  // Legal types should have been checked in the RISCVTargetLowering
2770  // constructor.
2771  // TODO: Splitting may make sense in some cases.
2772  assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
2773  "Expected legal float type!");
2774 
2775  // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
2776  // The trailing zero count is equal to log2 of this single bit value.
2777  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
2778  SDValue Neg =
2779  DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
2780  Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
2781  }
2782 
2783  // We have a legal FP type, convert to it.
2784  SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
2785  // Bitcast to integer and shift the exponent to the LSB.
2786  EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
2787  SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
2788  unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
2789  SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
2790  DAG.getConstant(ShiftAmt, DL, IntVT));
2791  // Truncate back to original type to allow vnsrl.
2792  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
2793  // The exponent contains log2 of the value in biased form.
2794  unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
2795 
2796  // For trailing zeros, we just need to subtract the bias.
2797  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
2798  return DAG.getNode(ISD::SUB, DL, VT, Trunc,
2799  DAG.getConstant(ExponentBias, DL, VT));
2800 
2801  // For leading zeros, we need to remove the bias and convert from log2 to
2802  // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
2803  unsigned Adjust = ExponentBias + (EltSize - 1);
2804  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
2805 }
2806 
2807 // While RVV has alignment restrictions, we should always be able to load as a
2808 // legal equivalently-sized byte-typed vector instead. This method is
2809 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2810 // the load is already correctly-aligned, it returns SDValue().
2811 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2812  SelectionDAG &DAG) const {
2813  auto *Load = cast<LoadSDNode>(Op);
2814  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2815 
2817  Load->getMemoryVT(),
2818  *Load->getMemOperand()))
2819  return SDValue();
2820 
2821  SDLoc DL(Op);
2822  MVT VT = Op.getSimpleValueType();
2823  unsigned EltSizeBits = VT.getScalarSizeInBits();
2824  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2825  "Unexpected unaligned RVV load type");
2826  MVT NewVT =
2827  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2828  assert(NewVT.isValid() &&
2829  "Expecting equally-sized RVV vector types to be legal");
2830  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2831  Load->getPointerInfo(), Load->getOriginalAlign(),
2832  Load->getMemOperand()->getFlags());
2833  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2834 }
2835 
2836 // While RVV has alignment restrictions, we should always be able to store as a
2837 // legal equivalently-sized byte-typed vector instead. This method is
2838 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2839 // returns SDValue() if the store is already correctly aligned.
2840 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2841  SelectionDAG &DAG) const {
2842  auto *Store = cast<StoreSDNode>(Op);
2843  assert(Store && Store->getValue().getValueType().isVector() &&
2844  "Expected vector store");
2845 
2847  Store->getMemoryVT(),
2848  *Store->getMemOperand()))
2849  return SDValue();
2850 
2851  SDLoc DL(Op);
2852  SDValue StoredVal = Store->getValue();
2853  MVT VT = StoredVal.getSimpleValueType();
2854  unsigned EltSizeBits = VT.getScalarSizeInBits();
2855  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2856  "Unexpected unaligned RVV store type");
2857  MVT NewVT =
2858  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2859  assert(NewVT.isValid() &&
2860  "Expecting equally-sized RVV vector types to be legal");
2861  StoredVal = DAG.getBitcast(NewVT, StoredVal);
2862  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2863  Store->getPointerInfo(), Store->getOriginalAlign(),
2864  Store->getMemOperand()->getFlags());
2865 }
2866 
2868  SelectionDAG &DAG) const {
2869  switch (Op.getOpcode()) {
2870  default:
2871  report_fatal_error("unimplemented operand");
2872  case ISD::GlobalAddress:
2873  return lowerGlobalAddress(Op, DAG);
2874  case ISD::BlockAddress:
2875  return lowerBlockAddress(Op, DAG);
2876  case ISD::ConstantPool:
2877  return lowerConstantPool(Op, DAG);
2878  case ISD::JumpTable:
2879  return lowerJumpTable(Op, DAG);
2880  case ISD::GlobalTLSAddress:
2881  return lowerGlobalTLSAddress(Op, DAG);
2882  case ISD::SELECT:
2883  return lowerSELECT(Op, DAG);
2884  case ISD::BRCOND:
2885  return lowerBRCOND(Op, DAG);
2886  case ISD::VASTART:
2887  return lowerVASTART(Op, DAG);
2888  case ISD::FRAMEADDR:
2889  return lowerFRAMEADDR(Op, DAG);
2890  case ISD::RETURNADDR:
2891  return lowerRETURNADDR(Op, DAG);
2892  case ISD::SHL_PARTS:
2893  return lowerShiftLeftParts(Op, DAG);
2894  case ISD::SRA_PARTS:
2895  return lowerShiftRightParts(Op, DAG, true);
2896  case ISD::SRL_PARTS:
2897  return lowerShiftRightParts(Op, DAG, false);
2898  case ISD::BITCAST: {
2899  SDLoc DL(Op);
2900  EVT VT = Op.getValueType();
2901  SDValue Op0 = Op.getOperand(0);
2902  EVT Op0VT = Op0.getValueType();
2903  MVT XLenVT = Subtarget.getXLenVT();
2904  if (VT.isFixedLengthVector()) {
2905  // We can handle fixed length vector bitcasts with a simple replacement
2906  // in isel.
2907  if (Op0VT.isFixedLengthVector())
2908  return Op;
2909  // When bitcasting from scalar to fixed-length vector, insert the scalar
2910  // into a one-element vector of the result type, and perform a vector
2911  // bitcast.
2912  if (!Op0VT.isVector()) {
2913  EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2914  if (!isTypeLegal(BVT))
2915  return SDValue();
2916  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2917  DAG.getUNDEF(BVT), Op0,
2918  DAG.getConstant(0, DL, XLenVT)));
2919  }
2920  return SDValue();
2921  }
2922  // Custom-legalize bitcasts from fixed-length vector types to scalar types
2923  // thus: bitcast the vector to a one-element vector type whose element type
2924  // is the same as the result type, and extract the first element.
2925  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2926  EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
2927  if (!isTypeLegal(BVT))
2928  return SDValue();
2929  SDValue BVec = DAG.getBitcast(BVT, Op0);
2930  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2931  DAG.getConstant(0, DL, XLenVT));
2932  }
2933  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2934  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2935  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2936  return FPConv;
2937  }
2938  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2939  Subtarget.hasStdExtF()) {
2940  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2941  SDValue FPConv =
2942  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2943  return FPConv;
2944  }
2945  return SDValue();
2946  }
2948  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2950  return LowerINTRINSIC_W_CHAIN(Op, DAG);
2951  case ISD::INTRINSIC_VOID:
2952  return LowerINTRINSIC_VOID(Op, DAG);
2953  case ISD::BSWAP:
2954  case ISD::BITREVERSE: {
2955  // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2956  assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2957  MVT VT = Op.getSimpleValueType();
2958  SDLoc DL(Op);
2959  // Start with the maximum immediate value which is the bitwidth - 1.
2960  unsigned Imm = VT.getSizeInBits() - 1;
2961  // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2962  if (Op.getOpcode() == ISD::BSWAP)
2963  Imm &= ~0x7U;
2964  return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2965  DAG.getConstant(Imm, DL, VT));
2966  }
2967  case ISD::FSHL:
2968  case ISD::FSHR: {
2969  MVT VT = Op.getSimpleValueType();
2970  assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2971  SDLoc DL(Op);
2972  // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2973  // use log(XLen) bits. Mask the shift amount accordingly to prevent
2974  // accidentally setting the extra bit.
2975  unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2976  SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2977  DAG.getConstant(ShAmtWidth, DL, VT));
2978  // fshl and fshr concatenate their operands in the same order. fsr and fsl
2979  // instruction use different orders. fshl will return its first operand for
2980  // shift of zero, fshr will return its second operand. fsl and fsr both
2981  // return rs1 so the ISD nodes need to have different operand orders.
2982  // Shift amount is in rs2.
2983  SDValue Op0 = Op.getOperand(0);
2984  SDValue Op1 = Op.getOperand(1);
2985  unsigned Opc = RISCVISD::FSL;
2986  if (Op.getOpcode() == ISD::FSHR) {
2987  std::swap(Op0, Op1);
2988  Opc = RISCVISD::FSR;
2989  }
2990  return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
2991  }
2992  case ISD::TRUNCATE: {
2993  SDLoc DL(Op);
2994  MVT VT = Op.getSimpleValueType();
2995  // Only custom-lower vector truncates
2996  if (!VT.isVector())
2997  return Op;
2998 
2999  // Truncates to mask types are handled differently
3000  if (VT.getVectorElementType() == MVT::i1)
3001  return lowerVectorMaskTrunc(Op, DAG);
3002 
3003  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
3004  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
3005  // truncate by one power of two at a time.
3006  MVT DstEltVT = VT.getVectorElementType();
3007 
3008  SDValue Src = Op.getOperand(0);
3009  MVT SrcVT = Src.getSimpleValueType();
3010  MVT SrcEltVT = SrcVT.getVectorElementType();
3011 
3012  assert(DstEltVT.bitsLT(SrcEltVT) &&
3013  isPowerOf2_64(DstEltVT.getSizeInBits()) &&
3014  isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
3015  "Unexpected vector truncate lowering");
3016 
3017  MVT ContainerVT = SrcVT;
3018  if (SrcVT.isFixedLengthVector()) {
3019  ContainerVT = getContainerForFixedLengthVector(SrcVT);
3020  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3021  }
3022 
3023  SDValue Result = Src;
3024  SDValue Mask, VL;
3025  std::tie(Mask, VL) =
3026  getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
3027  LLVMContext &Context = *DAG.getContext();
3028  const ElementCount Count = ContainerVT.getVectorElementCount();
3029  do {
3030  SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
3031  EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
3032  Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
3033  Mask, VL);
3034  } while (SrcEltVT != DstEltVT);
3035 
3036  if (SrcVT.isFixedLengthVector())
3037  Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
3038 
3039  return Result;
3040  }
3041  case ISD::ANY_EXTEND:
3042  case ISD::ZERO_EXTEND:
3043  if (Op.getOperand(0).getValueType().isVector() &&
3044  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3045  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3046  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3047  case ISD::SIGN_EXTEND:
3048  if (Op.getOperand(0).getValueType().isVector() &&
3049  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3050  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3051  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3053  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3055  return lowerINSERT_VECTOR_ELT(Op, DAG);
3057  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3058  case ISD::VSCALE: {
3059  MVT VT = Op.getSimpleValueType();
3060  SDLoc DL(Op);
3061  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3062  // We define our scalable vector types for lmul=1 to use a 64 bit known
3063  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3064  // vscale as VLENB / 8.
3065  static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3066  if (isa<ConstantSDNode>(Op.getOperand(0))) {
3067  // We assume VLENB is a multiple of 8. We manually choose the best shift
3068  // here because SimplifyDemandedBits isn't always able to simplify it.
3069  uint64_t Val = Op.getConstantOperandVal(0);
3070  if (isPowerOf2_64(Val)) {
3071  uint64_t Log2 = Log2_64(Val);
3072  if (Log2 < 3)
3073  return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3074  DAG.getConstant(3 - Log2, DL, VT));
3075  if (Log2 > 3)
3076  return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3077  DAG.getConstant(Log2 - 3, DL, VT));
3078  return VLENB;
3079  }
3080  // If the multiplier is a multiple of 8, scale it down to avoid needing
3081  // to shift the VLENB value.
3082  if ((Val % 8) == 0)
3083  return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3084  DAG.getConstant(Val / 8, DL, VT));
3085  }
3086 
3087  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3088  DAG.getConstant(3, DL, VT));
3089  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3090  }
3091  case ISD::FPOWI: {
3092  // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3093  // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3094  if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3095  Op.getOperand(1).getValueType() == MVT::i32) {
3096  SDLoc DL(Op);
3097  SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3098  SDValue Powi =
3099  DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3100  return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3101  DAG.getIntPtrConstant(0, DL));
3102  }
3103  return SDValue();
3104  }
3105  case ISD::FP_EXTEND: {
3106  // RVV can only do fp_extend to types double the size as the source. We
3107  // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
3108  // via f32.
3109  SDLoc DL(Op);
3110  MVT VT = Op.getSimpleValueType();
3111  SDValue Src = Op.getOperand(0);
3112  MVT SrcVT = Src.getSimpleValueType();
3113 
3114  // Prepare any fixed-length vector operands.
3115  MVT ContainerVT = VT;
3116  if (SrcVT.isFixedLengthVector()) {
3117  ContainerVT = getContainerForFixedLengthVector(VT);
3118  MVT SrcContainerVT =
3119  ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
3120  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3121  }
3122 
3123  if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
3124  SrcVT.getVectorElementType() != MVT::f16) {
3125  // For scalable vectors, we only need to close the gap between
3126  // vXf16->vXf64.
3127  if (!VT.isFixedLengthVector())
3128  return Op;
3129  // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
3130  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3131  return convertFromScalableVector(VT, Src, DAG, Subtarget);
3132  }
3133 
3134  MVT InterVT = VT.changeVectorElementType(MVT::f32);
3135  MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
3136  SDValue IntermediateExtend = getRVVFPExtendOrRound(
3137  Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
3138 
3139  SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
3140  DL, DAG, Subtarget);
3141  if (VT.isFixedLengthVector())
3142  return convertFromScalableVector(VT, Extend, DAG, Subtarget);
3143  return Extend;
3144  }
3145  case ISD::FP_ROUND: {
3146  // RVV can only do fp_round to types half the size as the source. We
3147  // custom-lower f64->f16 rounds via RVV's round-to-odd float
3148  // conversion instruction.
3149  SDLoc DL(Op);
3150  MVT VT = Op.getSimpleValueType();
3151  SDValue Src = Op.getOperand(0);
3152  MVT SrcVT = Src.getSimpleValueType();
3153 
3154  // Prepare any fixed-length vector operands.
3155  MVT ContainerVT = VT;
3156  if (VT.isFixedLengthVector()) {
3157  MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3158  ContainerVT =
3159  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3160  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3161  }
3162 
3163  if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
3164  SrcVT.getVectorElementType() != MVT::f64) {
3165  // For scalable vectors, we only need to close the gap between
3166  // vXf64<->vXf16.
3167  if (!VT.isFixedLengthVector())
3168  return Op;
3169  // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
3170  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
3171  return convertFromScalableVector(VT, Src, DAG, Subtarget);
3172  }
3173 
3174  SDValue Mask, VL;
3175  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3176 
3177  MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
3178  SDValue IntermediateRound =
3179  DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
3180  SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
3181  DL, DAG, Subtarget);
3182 
3183  if (VT.isFixedLengthVector())
3184  return convertFromScalableVector(VT, Round, DAG, Subtarget);
3185  return Round;
3186  }
3187  case ISD::FP_TO_SINT:
3188  case ISD::FP_TO_UINT:
3189  case ISD::SINT_TO_FP:
3190  case ISD::UINT_TO_FP: {
3191  // RVV can only do fp<->int conversions to types half/double the size as
3192  // the source. We custom-lower any conversions that do two hops into
3193  // sequences.
3194  MVT VT = Op.getSimpleValueType();
3195  if (!VT.isVector())
3196  return Op;
3197  SDLoc DL(Op);
3198  SDValue Src = Op.getOperand(0);
3199  MVT EltVT = VT.getVectorElementType();
3200  MVT SrcVT = Src.getSimpleValueType();
3201  MVT SrcEltVT = SrcVT.getVectorElementType();
3202  unsigned EltSize = EltVT.getSizeInBits();
3203  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3204  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3205  "Unexpected vector element types");
3206 
3207  bool IsInt2FP = SrcEltVT.isInteger();
3208  // Widening conversions
3209  if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
3210  if (IsInt2FP) {
3211  // Do a regular integer sign/zero extension then convert to float.
3213  VT.getVectorElementCount());
3214  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3216  : ISD::SIGN_EXTEND;
3217  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3218  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3219  }
3220  // FP2Int
3221  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3222  // Do one doubling fp_extend then complete the operation by converting
3223  // to int.
3224  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3225  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3226  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3227  }
3228 
3229  // Narrowing conversions
3230  if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
3231  if (IsInt2FP) {
3232  // One narrowing int_to_fp, then an fp_round.
3233  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3234  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3235  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3236  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3237  }
3238  // FP2Int
3239  // One narrowing fp_to_int, then truncate the integer. If the float isn't
3240  // representable by the integer, the result is poison.
3241  MVT IVecVT =
3243  VT.getVectorElementCount());
3244  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3245  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3246  }
3247 
3248  // Scalable vectors can exit here. Patterns will handle equally-sized
3249  // conversions halving/doubling ones.
3250  if (!VT.isFixedLengthVector())
3251  return Op;
3252 
3253  // For fixed-length vectors we lower to a custom "VL" node.
3254  unsigned RVVOpc = 0;
3255  switch (Op.getOpcode()) {
3256  default:
3257  llvm_unreachable("Impossible opcode");
3258  case ISD::FP_TO_SINT:
3259  RVVOpc = RISCVISD::FP_TO_SINT_VL;
3260  break;
3261  case ISD::FP_TO_UINT:
3262  RVVOpc = RISCVISD::FP_TO_UINT_VL;
3263  break;
3264  case ISD::SINT_TO_FP:
3265  RVVOpc = RISCVISD::SINT_TO_FP_VL;
3266  break;
3267  case ISD::UINT_TO_FP:
3268  RVVOpc = RISCVISD::UINT_TO_FP_VL;
3269  break;
3270  }
3271 
3272  MVT ContainerVT, SrcContainerVT;
3273  // Derive the reference container type from the larger vector type.
3274  if (SrcEltSize > EltSize) {
3275  SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3276  ContainerVT =
3277  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3278  } else {
3279  ContainerVT = getContainerForFixedLengthVector(VT);
3280  SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3281  }
3282 
3283  SDValue Mask, VL;
3284  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3285 
3286  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3287  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3288  return convertFromScalableVector(VT, Src, DAG, Subtarget);
3289  }
3290  case ISD::FP_TO_SINT_SAT:
3291  case ISD::FP_TO_UINT_SAT:
3292  return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3293  case ISD::FTRUNC:
3294  case ISD::FCEIL:
3295  case ISD::FFLOOR:
3296  return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
3297  case ISD::VECREDUCE_ADD:
3298  case ISD::VECREDUCE_UMAX:
3299  case ISD::VECREDUCE_SMAX:
3300  case ISD::VECREDUCE_UMIN:
3301  case ISD::VECREDUCE_SMIN:
3302  return lowerVECREDUCE(Op, DAG);
3303  case ISD::VECREDUCE_AND:
3304  case ISD::VECREDUCE_OR:
3305  case ISD::VECREDUCE_XOR:
3306  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3307  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3308  return lowerVECREDUCE(Op, DAG);
3309  case ISD::VECREDUCE_FADD:
3311  case ISD::VECREDUCE_FMIN:
3312  case ISD::VECREDUCE_FMAX:
3313  return lowerFPVECREDUCE(Op, DAG);
3314  case ISD::VP_REDUCE_ADD:
3315  case ISD::VP_REDUCE_UMAX:
3316  case ISD::VP_REDUCE_SMAX:
3317  case ISD::VP_REDUCE_UMIN:
3318  case ISD::VP_REDUCE_SMIN:
3319  case ISD::VP_REDUCE_FADD:
3320  case ISD::VP_REDUCE_SEQ_FADD:
3321  case ISD::VP_REDUCE_FMIN:
3322  case ISD::VP_REDUCE_FMAX:
3323  return lowerVPREDUCE(Op, DAG);
3324  case ISD::VP_REDUCE_AND:
3325  case ISD::VP_REDUCE_OR:
3326  case ISD::VP_REDUCE_XOR:
3327  if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3328  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3329  return lowerVPREDUCE(Op, DAG);
3330  case ISD::INSERT_SUBVECTOR:
3331  return lowerINSERT_SUBVECTOR(Op, DAG);
3333  return lowerEXTRACT_SUBVECTOR(Op, DAG);
3334  case ISD::STEP_VECTOR:
3335  return lowerSTEP_VECTOR(Op, DAG);
3336  case ISD::VECTOR_REVERSE:
3337  return lowerVECTOR_REVERSE(Op, DAG);
3338  case ISD::BUILD_VECTOR:
3339  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3340  case ISD::SPLAT_VECTOR:
3341  if (Op.getValueType().getVectorElementType() == MVT::i1)
3342  return lowerVectorMaskSplat(Op, DAG);
3343  return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
3344  case ISD::VECTOR_SHUFFLE:
3345  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3346  case ISD::CONCAT_VECTORS: {
3347  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3348  // better than going through the stack, as the default expansion does.
3349  SDLoc DL(Op);
3350  MVT VT = Op.getSimpleValueType();
3351  unsigned NumOpElts =
3352  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3353  SDValue Vec = DAG.getUNDEF(VT);
3354  for (const auto &OpIdx : enumerate(Op->ops())) {
3355  SDValue SubVec = OpIdx.value();
3356  // Don't insert undef subvectors.
3357  if (SubVec.isUndef())
3358  continue;
3359  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3360  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3361  }
3362  return Vec;
3363  }
3364  case ISD::LOAD:
3365  if (auto V = expandUnalignedRVVLoad(Op, DAG))
3366  return V;
3367  if (Op.getValueType().isFixedLengthVector())
3368  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3369  return Op;
3370  case ISD::STORE:
3371  if (auto V = expandUnalignedRVVStore(Op, DAG))
3372  return V;
3373  if (Op.getOperand(1).getValueType().isFixedLengthVector())
3374  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3375  return Op;
3376  case ISD::MLOAD:
3377  case ISD::VP_LOAD:
3378  return lowerMaskedLoad(Op, DAG);
3379  case ISD::MSTORE:
3380  case ISD::VP_STORE:
3381  return lowerMaskedStore(Op, DAG);
3382  case ISD::SETCC:
3383  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3384  case ISD::ADD:
3385  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
3386  case ISD::SUB:
3387  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
3388  case ISD::MUL:
3389  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
3390  case ISD::MULHS:
3391  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
3392  case ISD::MULHU:
3393  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
3394  case ISD::AND:
3395  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3397  case ISD::OR:
3398  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3399  RISCVISD::OR_VL);
3400  case ISD::XOR:
3401  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3403  case ISD::SDIV:
3404  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
3405  case ISD::SREM:
3406  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
3407  case ISD::UDIV:
3408  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
3409  case ISD::UREM:
3410  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
3411  case ISD::SHL:
3412  case ISD::SRA:
3413  case ISD::SRL:
3414  if (Op.getSimpleValueType().isFixedLengthVector())
3415  return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3416  // This can be called for an i32 shift amount that needs to be promoted.
3417  assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3418  "Unexpected custom legalisation");
3419  return SDValue();
3420  case ISD::SADDSAT:
3421  return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
3422  case ISD::UADDSAT:
3423  return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
3424  case ISD::SSUBSAT:
3425  return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
3426  case ISD::USUBSAT:
3427  return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
3428  case ISD::FADD:
3429  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
3430  case ISD::FSUB:
3431  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
3432  case ISD::FMUL:
3433  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
3434  case ISD::FDIV:
3435  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
3436  case ISD::FNEG:
3437  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3438  case ISD::FABS:
3439  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3440  case ISD::FSQRT:
3441  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3442  case ISD::FMA:
3443  return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
3444  case ISD::SMIN:
3445  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
3446  case ISD::SMAX:
3447  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
3448  case ISD::UMIN:
3449  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
3450  case ISD::UMAX:
3451  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
3452  case ISD::FMINNUM:
3453  return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
3454  case ISD::FMAXNUM:
3455  return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
3456  case ISD::ABS:
3457  return lowerABS(Op, DAG);
3458  case ISD::CTLZ_ZERO_UNDEF:
3459  case ISD::CTTZ_ZERO_UNDEF:
3460  return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3461  case ISD::VSELECT:
3462  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3463  case ISD::FCOPYSIGN:
3464  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3465  case ISD::MGATHER:
3466  case ISD::VP_GATHER:
3467  return lowerMaskedGather(Op, DAG);
3468  case ISD::MSCATTER:
3469  case ISD::VP_SCATTER:
3470  return lowerMaskedScatter(Op, DAG);
3471  case ISD::FLT_ROUNDS_:
3472  return lowerGET_ROUNDING(Op, DAG);
3473  case ISD::SET_ROUNDING:
3474  return lowerSET_ROUNDING(Op, DAG);
3475  case ISD::VP_SELECT:
3476  return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3477  case ISD::VP_MERGE:
3478  return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
3479  case ISD::VP_ADD:
3480  return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
3481  case ISD::VP_SUB:
3482  return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
3483  case ISD::VP_MUL:
3484  return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
3485  case ISD::VP_SDIV:
3486  return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
3487  case ISD::VP_UDIV:
3488  return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
3489  case ISD::VP_SREM:
3490  return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
3491  case ISD::VP_UREM:
3492  return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
3493  case ISD::VP_AND:
3494  return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3495  case ISD::VP_OR:
3496  return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3497  case ISD::VP_XOR:
3498  return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3499  case ISD::VP_ASHR:
3500  return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
3501  case ISD::VP_LSHR:
3502  return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
3503  case ISD::VP_SHL:
3504  return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
3505  case ISD::VP_FADD:
3506  return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
3507  case ISD::VP_FSUB:
3508  return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
3509  case ISD::VP_FMUL:
3510  return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
3511  case ISD::VP_FDIV:
3512  return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
3513  }
3514 }
3515 
3517  SelectionDAG &DAG, unsigned Flags) {
3518  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3519 }
3520 
3522  SelectionDAG &DAG, unsigned Flags) {
3523  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3524  Flags);
3525 }
3526 
3528  SelectionDAG &DAG, unsigned Flags) {
3529  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3530  N->getOffset(), Flags);
3531 }
3532 
3534  SelectionDAG &DAG, unsigned Flags) {
3535  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3536 }
3537 
3538 template <class NodeTy>
3540  bool IsLocal) const {
3541  SDLoc DL(N);
3542  EVT Ty = getPointerTy(DAG.getDataLayout());
3543 
3544  if (isPositionIndependent()) {
3545  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3546  if (IsLocal)
3547  // Use PC-relative addressing to access the symbol. This generates the
3548  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3549  // %pcrel_lo(auipc)).
3550  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3551 
3552  // Use PC-relative addressing to access the GOT for this symbol, then load
3553  // the address from the GOT. This generates the pattern (PseudoLA sym),
3554  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3555  return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
3556  }
3557 
3558  switch (getTargetMachine().getCodeModel()) {
3559  default:
3560  report_fatal_error("Unsupported code model for lowering");
3561  case CodeModel::Small: {
3562  // Generate a sequence for accessing addresses within the first 2 GiB of
3563  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3564  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3565  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3566  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3567  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
3568  }
3569  case CodeModel::Medium: {
3570  // Generate a sequence for accessing addresses within any 2GiB range within
3571  // the address space. This generates the pattern (PseudoLLA sym), which
3572  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3573  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3574  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
3575  }
3576  }
3577 }
3578 
3579 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3580  SelectionDAG &DAG) const {
3581  SDLoc DL(Op);
3582  EVT Ty = Op.getValueType();
3583  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3584  int64_t Offset = N->getOffset();
3585  MVT XLenVT = Subtarget.getXLenVT();
3586 
3587  const GlobalValue *GV = N->getGlobal();
3588  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3589  SDValue Addr = getAddr(N, DAG, IsLocal);
3590 
3591  // In order to maximise the opportunity for common subexpression elimination,
3592  // emit a separate ADD node for the global address offset instead of folding
3593  // it in the global address node. Later peephole optimisations may choose to
3594  // fold it back in when profitable.
3595  if (Offset != 0)
3596  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3597  DAG.getConstant(Offset, DL, XLenVT));
3598  return Addr;
3599 }
3600 
3601 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3602  SelectionDAG &DAG) const {
3603  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3604 
3605  return getAddr(N, DAG);
3606 }
3607 
3608 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3609  SelectionDAG &DAG) const {
3610  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3611 
3612  return getAddr(N, DAG);
3613 }
3614 
3615 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3616  SelectionDAG &DAG) const {
3617  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3618 
3619  return getAddr(N, DAG);
3620 }
3621 
3622 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3623  SelectionDAG &DAG,
3624  bool UseGOT) const {
3625  SDLoc DL(N);
3626  EVT Ty = getPointerTy(DAG.getDataLayout());
3627  const GlobalValue *GV = N->getGlobal();
3628  MVT XLenVT = Subtarget.getXLenVT();
3629 
3630  if (UseGOT) {
3631  // Use PC-relative addressing to access the GOT for this TLS symbol, then
3632  // load the address from the GOT and add the thread pointer. This generates
3633  // the pattern (PseudoLA_TLS_IE sym), which expands to
3634  // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3635  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3636  SDValue Load =
3637  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
3638 
3639  // Add the thread pointer.
3640  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3641  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3642  }
3643 
3644  // Generate a sequence for accessing the address relative to the thread
3645  // pointer, with the appropriate adjustment for the thread pointer offset.
3646  // This generates the pattern
3647  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3648  SDValue AddrHi =
3650  SDValue AddrAdd =
3652  SDValue AddrLo =
3654 
3655  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
3656  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3657  SDValue MNAdd = SDValue(
3658  DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
3659  0);
3660  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
3661 }
3662 
3663 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3664  SelectionDAG &DAG) const {
3665  SDLoc DL(N);
3666  EVT Ty = getPointerTy(DAG.getDataLayout());
3667  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3668  const GlobalValue *GV = N->getGlobal();
3669 
3670  // Use a PC-relative addressing mode to access the global dynamic GOT address.
3671  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3672  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3673  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3674  SDValue Load =
3675  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3676 
3677  // Prepare argument list to generate call.
3678  ArgListTy Args;
3679  ArgListEntry Entry;
3680  Entry.Node = Load;
3681  Entry.Ty = CallTy;
3682  Args.push_back(Entry);
3683 
3684  // Setup call to __tls_get_addr.
3686  CLI.setDebugLoc(DL)
3687  .setChain(DAG.getEntryNode())
3688  .setLibCallee(CallingConv::C, CallTy,
3689  DAG.getExternalSymbol("__tls_get_addr", Ty),
3690  std::move(Args));
3691 
3692  return LowerCallTo(CLI).first;
3693 }
3694 
3695 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3696  SelectionDAG &DAG) const {
3697  SDLoc DL(Op);
3698  EVT Ty = Op.getValueType();
3699  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3700  int64_t Offset = N->getOffset();
3701  MVT XLenVT = Subtarget.getXLenVT();
3702 
3704 
3707  report_fatal_error("In GHC calling convention TLS is not supported");
3708 
3709  SDValue Addr;
3710  switch (Model) {
3711  case TLSModel::LocalExec:
3712  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3713  break;
3714  case TLSModel::InitialExec:
3715  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3716  break;
3719  Addr = getDynamicTLSAddr(N, DAG);
3720  break;
3721  }
3722 
3723  // In order to maximise the opportunity for common subexpression elimination,
3724  // emit a separate ADD node for the global address offset instead of folding
3725  // it in the global address node. Later peephole optimisations may choose to
3726  // fold it back in when profitable.
3727  if (Offset != 0)
3728  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3729  DAG.getConstant(Offset, DL, XLenVT));
3730  return Addr;
3731 }
3732 
3733 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3734  SDValue CondV = Op.getOperand(0);
3735  SDValue TrueV = Op.getOperand(1);
3736  SDValue FalseV = Op.getOperand(2);
3737  SDLoc DL(Op);
3738  MVT VT = Op.getSimpleValueType();
3739  MVT XLenVT = Subtarget.getXLenVT();
3740 
3741  // Lower vector SELECTs to VSELECTs by splatting the condition.
3742  if (VT.isVector()) {
3743  MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3744  SDValue CondSplat = VT.isScalableVector()
3745  ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3746  : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3747  return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3748  }
3749 
3750  // If the result type is XLenVT and CondV is the output of a SETCC node
3751  // which also operated on XLenVT inputs, then merge the SETCC node into the
3752  // lowered RISCVISD::SELECT_CC to take advantage of the integer
3753  // compare+branch instructions. i.e.:
3754  // (select (setcc lhs, rhs, cc), truev, falsev)
3755  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3756  if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3757  CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3758  SDValue LHS = CondV.getOperand(0);
3759  SDValue RHS = CondV.getOperand(1);
3760  const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3761  ISD::CondCode CCVal = CC->get();
3762 
3763  // Special case for a select of 2 constants that have a diffence of 1.
3764  // Normally this is done by DAGCombine, but if the select is introduced by
3765  // type legalization or op legalization, we miss it. Restricting to SETLT
3766  // case for now because that is what signed saturating add/sub need.
3767  // FIXME: We don't need the condition to be SETLT or even a SETCC,
3768  // but we would probably want to swap the true/false values if the condition
3769  // is SETGE/SETLE to avoid an XORI.
3770  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3771  CCVal == ISD::SETLT) {
3772  const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3773  const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3774  if (TrueVal - 1 == FalseVal)
3775  return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3776  if (TrueVal + 1 == FalseVal)
3777  return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3778  }
3779 
3780  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3781 
3782  SDValue TargetCC = DAG.getCondCode(CCVal);
3783  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3784  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3785  }
3786 
3787  // Otherwise:
3788  // (select condv, truev, falsev)
3789  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3790  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3791  SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3792 
3793  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3794 
3795  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3796 }
3797 
3798 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3799  SDValue CondV = Op.getOperand(1);
3800  SDLoc DL(Op);
3801  MVT XLenVT = Subtarget.getXLenVT();
3802 
3803  if (CondV.getOpcode() == ISD::SETCC &&
3804  CondV.getOperand(0).getValueType() == XLenVT) {
3805  SDValue LHS = CondV.getOperand(0);
3806  SDValue RHS = CondV.getOperand(1);
3807  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3808 
3809  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3810 
3811  SDValue TargetCC = DAG.getCondCode(CCVal);
3812  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3813  LHS, RHS, TargetCC, Op.getOperand(2));
3814  }
3815 
3816  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3817  CondV, DAG.getConstant(0, DL, XLenVT),
3818  DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3819 }
3820 
3821 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3822  MachineFunction &MF = DAG.getMachineFunction();
3824 
3825  SDLoc DL(Op);
3826  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3827  getPointerTy(MF.getDataLayout()));
3828 
3829  // vastart just stores the address of the VarArgsFrameIndex slot into the
3830  // memory location argument.
3831  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3832  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3833  MachinePointerInfo(SV));
3834 }
3835 
3836 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3837  SelectionDAG &DAG) const {
3838  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3839  MachineFunction &MF = DAG.getMachineFunction();
3840  MachineFrameInfo &MFI = MF.getFrameInfo();
3841  MFI.setFrameAddressIsTaken(true);
3842  Register FrameReg = RI.getFrameRegister(MF);
3843  int XLenInBytes = Subtarget.getXLen() / 8;
3844 
3845  EVT VT = Op.getValueType();
3846  SDLoc DL(Op);
3847  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3848  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3849  while (Depth--) {
3850  int Offset = -(XLenInBytes * 2);
3851  SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3852  DAG.getIntPtrConstant(Offset, DL));
3853  FrameAddr =
3854  DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3855  }
3856  return FrameAddr;
3857 }
3858 
3859 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3860  SelectionDAG &DAG) const {
3861  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3862  MachineFunction &MF = DAG.getMachineFunction();
3863  MachineFrameInfo &MFI = MF.getFrameInfo();
3864  MFI.setReturnAddressIsTaken(true);
3865  MVT XLenVT = Subtarget.getXLenVT();
3866  int XLenInBytes = Subtarget.getXLen() / 8;
3867 
3869  return SDValue();
3870 
3871  EVT VT = Op.getValueType();
3872  SDLoc DL(Op);
3873  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3874  if (Depth) {
3875  int Off = -XLenInBytes;
3876  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3877  SDValue Offset = DAG.getConstant(Off, DL, VT);
3878  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3879  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3880  MachinePointerInfo());
3881  }
3882 
3883  // Return the value of the return address register, marking it an implicit
3884  // live-in.
3885  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3886  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3887 }
3888 
3889 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3890  SelectionDAG &DAG) const {
3891  SDLoc DL(Op);
3892  SDValue Lo = Op.getOperand(0);
3893  SDValue Hi = Op.getOperand(1);
3894  SDValue Shamt = Op.getOperand(2);
3895  EVT VT = Lo.getValueType();
3896 
3897  // if Shamt-XLEN < 0: // Shamt < XLEN
3898  // Lo = Lo << Shamt
3899  // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3900  // else:
3901  // Lo = 0
3902  // Hi = Lo << (Shamt-XLEN)
3903 
3904  SDValue Zero = DAG.getConstant(0, DL, VT);
3905  SDValue One = DAG.getConstant(1, DL, VT);
3906  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.