LLVM  14.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
29 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/KnownBits.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "riscv-lower"
43 
44 STATISTIC(NumTailCalls, "Number of tail calls");
45 
47  const RISCVSubtarget &STI)
48  : TargetLowering(TM), Subtarget(STI) {
49 
50  if (Subtarget.isRV32E())
51  report_fatal_error("Codegen not yet implemented for RV32E");
52 
53  RISCVABI::ABI ABI = Subtarget.getTargetABI();
54  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
55 
57  !Subtarget.hasStdExtF()) {
58  errs() << "Hard-float 'f' ABI can't be used for a target that "
59  "doesn't support the F instruction set extension (ignoring "
60  "target-abi)\n";
62  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
63  !Subtarget.hasStdExtD()) {
64  errs() << "Hard-float 'd' ABI can't be used for a target that "
65  "doesn't support the D instruction set extension (ignoring "
66  "target-abi)\n";
68  }
69 
70  switch (ABI) {
71  default:
72  report_fatal_error("Don't know how to lower this ABI");
76  case RISCVABI::ABI_LP64:
79  break;
80  }
81 
82  MVT XLenVT = Subtarget.getXLenVT();
83 
84  // Set up the register classes.
85  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
86 
87  if (Subtarget.hasStdExtZfh())
88  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
89  if (Subtarget.hasStdExtF())
90  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
91  if (Subtarget.hasStdExtD())
92  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
93 
94  static const MVT::SimpleValueType BoolVecVTs[] = {
97  static const MVT::SimpleValueType IntVecVTs[] = {
103  static const MVT::SimpleValueType F16VecVTs[] = {
106  static const MVT::SimpleValueType F32VecVTs[] = {
108  static const MVT::SimpleValueType F64VecVTs[] = {
110 
111  if (Subtarget.hasStdExtV()) {
112  auto addRegClassForRVV = [this](MVT VT) {
113  unsigned Size = VT.getSizeInBits().getKnownMinValue();
114  assert(Size <= 512 && isPowerOf2_32(Size));
115  const TargetRegisterClass *RC;
116  if (Size <= 64)
117  RC = &RISCV::VRRegClass;
118  else if (Size == 128)
119  RC = &RISCV::VRM2RegClass;
120  else if (Size == 256)
121  RC = &RISCV::VRM4RegClass;
122  else
123  RC = &RISCV::VRM8RegClass;
124 
125  addRegisterClass(VT, RC);
126  };
127 
128  for (MVT VT : BoolVecVTs)
129  addRegClassForRVV(VT);
130  for (MVT VT : IntVecVTs)
131  addRegClassForRVV(VT);
132 
133  if (Subtarget.hasStdExtZfh())
134  for (MVT VT : F16VecVTs)
135  addRegClassForRVV(VT);
136 
137  if (Subtarget.hasStdExtF())
138  for (MVT VT : F32VecVTs)
139  addRegClassForRVV(VT);
140 
141  if (Subtarget.hasStdExtD())
142  for (MVT VT : F64VecVTs)
143  addRegClassForRVV(VT);
144 
145  if (Subtarget.useRVVForFixedLengthVectors()) {
146  auto addRegClassForFixedVectors = [this](MVT VT) {
147  MVT ContainerVT = getContainerForFixedLengthVector(VT);
148  unsigned RCID = getRegClassIDForVecVT(ContainerVT);
149  const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
150  addRegisterClass(VT, TRI.getRegClass(RCID));
151  };
153  if (useRVVForFixedLengthVectorVT(VT))
154  addRegClassForFixedVectors(VT);
155 
157  if (useRVVForFixedLengthVectorVT(VT))
158  addRegClassForFixedVectors(VT);
159  }
160  }
161 
162  // Compute derived properties from the register classes.
164 
166 
167  for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
168  setLoadExtAction(N, XLenVT, MVT::i1, Promote);
169 
170  // TODO: add all necessary setOperationAction calls.
172 
177 
180 
185 
187  if (!Subtarget.hasStdExtZbb()) {
190  }
191 
192  if (Subtarget.is64Bit()) {
198 
203  } else {
204  setLibcallName(RTLIB::SHL_I128, nullptr);
205  setLibcallName(RTLIB::SRL_I128, nullptr);
206  setLibcallName(RTLIB::SRA_I128, nullptr);
207  setLibcallName(RTLIB::MUL_I128, nullptr);
208  setLibcallName(RTLIB::MULO_I64, nullptr);
209  }
210 
211  if (!Subtarget.hasStdExtM()) {
219  } else {
220  if (Subtarget.is64Bit()) {
223 
233  } else {
235  }
236  }
237 
242 
246 
247  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
248  if (Subtarget.is64Bit()) {
251  }
252  } else {
255  }
256 
257  if (Subtarget.hasStdExtZbp()) {
258  // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
259  // more combining.
263  // BSWAP i8 doesn't exist.
266 
267  if (Subtarget.is64Bit()) {
270  }
271  } else {
272  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
273  // pattern match it directly in isel.
275  Subtarget.hasStdExtZbb() ? Legal : Expand);
276  }
277 
278  if (Subtarget.hasStdExtZbb()) {
283 
284  if (Subtarget.is64Bit()) {
289  }
290  } else {
294  }
295 
296  if (Subtarget.hasStdExtZbt()) {
300 
301  if (Subtarget.is64Bit()) {
304  }
305  } else {
307  }
308 
309  ISD::CondCode FPCCToExpand[] = {
313 
314  ISD::NodeType FPOpToExpand[] = {
317 
318  if (Subtarget.hasStdExtZfh())
320 
321  if (Subtarget.hasStdExtZfh()) {
328  for (auto CC : FPCCToExpand)
333  for (auto Op : FPOpToExpand)
335  }
336 
337  if (Subtarget.hasStdExtF()) {
344  for (auto CC : FPCCToExpand)
349  for (auto Op : FPOpToExpand)
353  }
354 
355  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
357 
358  if (Subtarget.hasStdExtD()) {
365  for (auto CC : FPCCToExpand)
372  for (auto Op : FPOpToExpand)
376  }
377 
378  if (Subtarget.is64Bit()) {
383  }
384 
385  if (Subtarget.hasStdExtF()) {
388 
391  }
392 
397 
399 
400  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
401  // Unfortunately this can't be determined just from the ISA naming string.
403  Subtarget.is64Bit() ? Legal : Custom);
404 
408  if (Subtarget.is64Bit())
410 
411  if (Subtarget.hasStdExtA()) {
414  } else {
416  }
417 
419 
420  if (Subtarget.hasStdExtV()) {
422 
424 
425  // RVV intrinsics may have illegal operands.
426  // We also need to custom legalize vmv.x.s.
431  if (Subtarget.is64Bit()) {
433  } else {
436  }
437 
439 
440  static unsigned IntegerVPOps[] = {
441  ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
442  ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR,
443  ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
444 
445  static unsigned FloatingPointVPOps[] = {ISD::VP_FADD, ISD::VP_FSUB,
446  ISD::VP_FMUL, ISD::VP_FDIV};
447 
448  if (!Subtarget.is64Bit()) {
449  // We must custom-lower certain vXi64 operations on RV32 due to the vector
450  // element type being illegal.
453 
462  }
463 
464  for (MVT VT : BoolVecVTs) {
466 
467  // Mask VTs are custom-expanded into a series of standard nodes
472 
475 
479 
483 
484  // RVV has native int->float & float->int conversions where the
485  // element type sizes are within one power-of-two of each other. Any
486  // wider distances between type sizes have to be lowered as sequences
487  // which progressively narrow the gap in stages.
492 
493  // Expand all extending loads to types larger than this, and truncating
494  // stores from types larger than this.
495  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
496  setTruncStoreAction(OtherVT, VT, Expand);
497  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
498  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
499  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
500  }
501  }
502 
503  for (MVT VT : IntVecVTs) {
506 
511 
514 
515  // Custom-lower extensions and truncations from/to mask types.
519 
520  // RVV has native int->float & float->int conversions where the
521  // element type sizes are within one power-of-two of each other. Any
522  // wider distances between type sizes have to be lowered as sequences
523  // which progressively narrow the gap in stages.
528 
533 
534  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
535  // nodes which truncate by one power of two at a time.
537 
538  // Custom-lower insert/extract operations to simplify patterns.
541 
542  // Custom-lower reduction operations to set up the corresponding custom
543  // nodes' operands.
552 
553  for (unsigned VPOpc : IntegerVPOps)
554  setOperationAction(VPOpc, VT, Custom);
555 
558 
563 
564  setOperationAction(ISD::VP_LOAD, VT, Custom);
565  setOperationAction(ISD::VP_STORE, VT, Custom);
566  setOperationAction(ISD::VP_GATHER, VT, Custom);
567  setOperationAction(ISD::VP_SCATTER, VT, Custom);
568 
572 
575 
578 
579  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
580  setTruncStoreAction(VT, OtherVT, Expand);
581  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
582  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
583  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
584  }
585  }
586 
587  // Expand various CCs to best match the RVV ISA, which natively supports UNE
588  // but no other unordered comparisons, and supports all ordered comparisons
589  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
590  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
591  // and we pattern-match those back to the "original", swapping operands once
592  // more. This way we catch both operations and both "vf" and "fv" forms with
593  // fewer patterns.
594  ISD::CondCode VFPCCToExpand[] = {
598  };
599 
600  // Sets common operation actions on RVV floating-point vector types.
601  const auto SetCommonVFPActions = [&](MVT VT) {
603  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
604  // sizes are within one power-of-two of each other. Therefore conversions
605  // between vXf16 and vXf64 must be lowered as sequences which convert via
606  // vXf32.
609  // Custom-lower insert/extract operations to simplify patterns.
612  // Expand various condition codes (explained above).
613  for (auto CC : VFPCCToExpand)
614  setCondCodeAction(CC, VT, Expand);
615 
618 
624 
627 
632 
633  setOperationAction(ISD::VP_LOAD, VT, Custom);
634  setOperationAction(ISD::VP_STORE, VT, Custom);
635  setOperationAction(ISD::VP_GATHER, VT, Custom);
636  setOperationAction(ISD::VP_SCATTER, VT, Custom);
637 
640 
644 
646 
647  for (unsigned VPOpc : FloatingPointVPOps)
648  setOperationAction(VPOpc, VT, Custom);
649  };
650 
651  // Sets common extload/truncstore actions on RVV floating-point vector
652  // types.
653  const auto SetCommonVFPExtLoadTruncStoreActions =
654  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
655  for (auto SmallVT : SmallerVTs) {
656  setTruncStoreAction(VT, SmallVT, Expand);
657  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
658  }
659  };
660 
661  if (Subtarget.hasStdExtZfh())
662  for (MVT VT : F16VecVTs)
663  SetCommonVFPActions(VT);
664 
665  for (MVT VT : F32VecVTs) {
666  if (Subtarget.hasStdExtF())
667  SetCommonVFPActions(VT);
668  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
669  }
670 
671  for (MVT VT : F64VecVTs) {
672  if (Subtarget.hasStdExtD())
673  SetCommonVFPActions(VT);
674  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
675  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
676  }
677 
678  if (Subtarget.useRVVForFixedLengthVectors()) {
680  if (!useRVVForFixedLengthVectorVT(VT))
681  continue;
682 
683  // By default everything must be expanded.
684  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
686  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
687  setTruncStoreAction(VT, OtherVT, Expand);
688  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
689  setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
690  setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
691  }
692 
693  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
696 
699 
702 
705 
707 
709 
711 
713 
717 
722 
723  // Operations below are different for between masks and other vectors.
724  if (VT.getVectorElementType() == MVT::i1) {
728  continue;
729  }
730 
731  // Use SPLAT_VECTOR to prevent type legalization from destroying the
732  // splats when type legalizing i64 scalar on RV32.
733  // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
734  // improvements first.
735  if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
738  }
739 
742 
747 
748  setOperationAction(ISD::VP_LOAD, VT, Custom);
749  setOperationAction(ISD::VP_STORE, VT, Custom);
750  setOperationAction(ISD::VP_GATHER, VT, Custom);
751  setOperationAction(ISD::VP_SCATTER, VT, Custom);
752 
766 
772 
775 
780 
783 
787 
788  // Custom-lower reduction operations to set up the corresponding custom
789  // nodes' operands.
795 
796  for (unsigned VPOpc : IntegerVPOps)
797  setOperationAction(VPOpc, VT, Custom);
798  }
799 
801  if (!useRVVForFixedLengthVectorVT(VT))
802  continue;
803 
804  // By default everything must be expanded.
805  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
807  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
808  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
809  setTruncStoreAction(VT, OtherVT, Expand);
810  }
811 
812  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
815 
821 
828 
829  setOperationAction(ISD::VP_LOAD, VT, Custom);
830  setOperationAction(ISD::VP_STORE, VT, Custom);
831  setOperationAction(ISD::VP_GATHER, VT, Custom);
832  setOperationAction(ISD::VP_SCATTER, VT, Custom);
833 
845 
848 
849  for (auto CC : VFPCCToExpand)
850  setCondCodeAction(CC, VT, Expand);
851 
855 
857 
862 
863  for (unsigned VPOpc : FloatingPointVPOps)
864  setOperationAction(VPOpc, VT, Custom);
865  }
866 
867  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
875  }
876  }
877 
878  // Function alignments.
879  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
880  setMinFunctionAlignment(FunctionAlignment);
881  setPrefFunctionAlignment(FunctionAlignment);
882 
884 
885  // Jumps are expensive, compared to logic
887 
888  // We can use any register for comparisons
890 
898  if (Subtarget.hasStdExtV()) {
902  setTargetDAGCombine(ISD::VP_GATHER);
903  setTargetDAGCombine(ISD::VP_SCATTER);
907  }
908 }
909 
912  EVT VT) const {
913  if (!VT.isVector())
914  return getPointerTy(DL);
915  if (Subtarget.hasStdExtV() &&
916  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
919 }
920 
921 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
922  return Subtarget.getXLenVT();
923 }
924 
926  const CallInst &I,
927  MachineFunction &MF,
928  unsigned Intrinsic) const {
929  switch (Intrinsic) {
930  default:
931  return false;
932  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
933  case Intrinsic::riscv_masked_atomicrmw_add_i32:
934  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
935  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
936  case Intrinsic::riscv_masked_atomicrmw_max_i32:
937  case Intrinsic::riscv_masked_atomicrmw_min_i32:
938  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
939  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
940  case Intrinsic::riscv_masked_cmpxchg_i32: {
941  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
943  Info.memVT = MVT::getVT(PtrTy->getElementType());
944  Info.ptrVal = I.getArgOperand(0);
945  Info.offset = 0;
946  Info.align = Align(4);
949  return true;
950  }
951  }
952 }
953 
955  const AddrMode &AM, Type *Ty,
956  unsigned AS,
957  Instruction *I) const {
958  // No global is ever allowed as a base.
959  if (AM.BaseGV)
960  return false;
961 
962  // Require a 12-bit signed offset.
963  if (!isInt<12>(AM.BaseOffs))
964  return false;
965 
966  switch (AM.Scale) {
967  case 0: // "r+i" or just "i", depending on HasBaseReg.
968  break;
969  case 1:
970  if (!AM.HasBaseReg) // allow "r+i".
971  break;
972  return false; // disallow "r+r" or "r+r+i".
973  default:
974  return false;
975  }
976 
977  return true;
978 }
979 
981  return isInt<12>(Imm);
982 }
983 
985  return isInt<12>(Imm);
986 }
987 
988 // On RV32, 64-bit integers are split into their high and low parts and held
989 // in two different registers, so the trunc is free since the low register can
990 // just be used.
991 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
992  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
993  return false;
994  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
995  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
996  return (SrcBits == 64 && DestBits == 32);
997 }
998 
999 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1000  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1001  !SrcVT.isInteger() || !DstVT.isInteger())
1002  return false;
1003  unsigned SrcBits = SrcVT.getSizeInBits();
1004  unsigned DestBits = DstVT.getSizeInBits();
1005  return (SrcBits == 64 && DestBits == 32);
1006 }
1007 
1009  // Zexts are free if they can be combined with a load.
1010  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1011  EVT MemVT = LD->getMemoryVT();
1012  if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
1013  (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
1014  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1015  LD->getExtensionType() == ISD::ZEXTLOAD))
1016  return true;
1017  }
1018 
1019  return TargetLowering::isZExtFree(Val, VT2);
1020 }
1021 
1023  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1024 }
1025 
1027  return Subtarget.hasStdExtZbb();
1028 }
1029 
1031  return Subtarget.hasStdExtZbb();
1032 }
1033 
1034 /// Check if sinking \p I's operands to I's basic block is profitable, because
1035 /// the operands can be folded into a target instruction, e.g.
1036 /// splats of scalars can fold into vector instructions.
1038  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1039  using namespace llvm::PatternMatch;
1040 
1041  if (!I->getType()->isVectorTy() || !Subtarget.hasStdExtV())
1042  return false;
1043 
1044  auto IsSinker = [&](Instruction *I, int Operand) {
1045  switch (I->getOpcode()) {
1046  case Instruction::Add:
1047  case Instruction::Sub:
1048  case Instruction::Mul:
1049  return true;
1050  case Instruction::Shl:
1051  case Instruction::LShr:
1052  case Instruction::AShr:
1053  return Operand == 1;
1054  default:
1055  return false;
1056  }
1057  };
1058 
1059  for (auto OpIdx : enumerate(I->operands())) {
1060  if (!IsSinker(I, OpIdx.index()))
1061  continue;
1062 
1063  Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1064  // Make sure we are not already sinking this operand
1065  if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1066  continue;
1067 
1068  // We are looking for a splat that can be sunk.
1070  m_Undef(), m_ZeroMask())))
1071  continue;
1072 
1073  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1074  // and vector registers
1075  for (Use &U : Op->uses()) {
1076  Instruction *Insn = cast<Instruction>(U.getUser());
1077  if (!IsSinker(Insn, U.getOperandNo()))
1078  return false;
1079  }
1080 
1081  Ops.push_back(&Op->getOperandUse(0));
1082  Ops.push_back(&OpIdx.value());
1083  }
1084  return true;
1085 }
1086 
1088  bool ForCodeSize) const {
1089  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1090  return false;
1091  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1092  return false;
1093  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1094  return false;
1095  if (Imm.isNegZero())
1096  return false;
1097  return Imm.isZero();
1098 }
1099 
1101  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1102  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1103  (VT == MVT::f64 && Subtarget.hasStdExtD());
1104 }
1105 
1107  CallingConv::ID CC,
1108  EVT VT) const {
1109  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1110  // end up using a GPR but that will be decided based on ABI.
1111  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1112  return MVT::f32;
1113 
1115 }
1116 
1118  CallingConv::ID CC,
1119  EVT VT) const {
1120  // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
1121  // end up using a GPR but that will be decided based on ABI.
1122  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1123  return 1;
1124 
1126 }
1127 
1128 // Changes the condition code and swaps operands if necessary, so the SetCC
1129 // operation matches one of the comparisons supported directly by branches
1130 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1131 // with 1/-1.
1132 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
1133  ISD::CondCode &CC, SelectionDAG &DAG) {
1134  // Convert X > -1 to X >= 0.
1135  if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1136  RHS = DAG.getConstant(0, DL, RHS.getValueType());
1137  CC = ISD::SETGE;
1138  return;
1139  }
1140  // Convert X < 1 to 0 >= X.
1141  if (CC == ISD::SETLT && isOneConstant(RHS)) {
1142  RHS = LHS;
1143  LHS = DAG.getConstant(0, DL, RHS.getValueType());
1144  CC = ISD::SETGE;
1145  return;
1146  }
1147 
1148  switch (CC) {
1149  default:
1150  break;
1151  case ISD::SETGT:
1152  case ISD::SETLE:
1153  case ISD::SETUGT:
1154  case ISD::SETULE:
1156  std::swap(LHS, RHS);
1157  break;
1158  }
1159 }
1160 
1162  assert(VT.isScalableVector() && "Expecting a scalable vector type");
1163  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1164  if (VT.getVectorElementType() == MVT::i1)
1165  KnownSize *= 8;
1166 
1167  switch (KnownSize) {
1168  default:
1169  llvm_unreachable("Invalid LMUL.");
1170  case 8:
1171  return RISCVII::VLMUL::LMUL_F8;
1172  case 16:
1173  return RISCVII::VLMUL::LMUL_F4;
1174  case 32:
1175  return RISCVII::VLMUL::LMUL_F2;
1176  case 64:
1177  return RISCVII::VLMUL::LMUL_1;
1178  case 128:
1179  return RISCVII::VLMUL::LMUL_2;
1180  case 256:
1181  return RISCVII::VLMUL::LMUL_4;
1182  case 512:
1183  return RISCVII::VLMUL::LMUL_8;
1184  }
1185 }
1186 
1188  switch (LMul) {
1189  default:
1190  llvm_unreachable("Invalid LMUL.");
1195  return RISCV::VRRegClassID;
1197  return RISCV::VRM2RegClassID;
1199  return RISCV::VRM4RegClassID;
1201  return RISCV::VRM8RegClassID;
1202  }
1203 }
1204 
1206  RISCVII::VLMUL LMUL = getLMUL(VT);
1207  if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1211  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1212  "Unexpected subreg numbering");
1213  return RISCV::sub_vrm1_0 + Index;
1214  }
1215  if (LMUL == RISCVII::VLMUL::LMUL_2) {
1216  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1217  "Unexpected subreg numbering");
1218  return RISCV::sub_vrm2_0 + Index;
1219  }
1220  if (LMUL == RISCVII::VLMUL::LMUL_4) {
1221  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1222  "Unexpected subreg numbering");
1223  return RISCV::sub_vrm4_0 + Index;
1224  }
1225  llvm_unreachable("Invalid vector type.");
1226 }
1227 
1229  if (VT.getVectorElementType() == MVT::i1)
1230  return RISCV::VRRegClassID;
1231  return getRegClassIDForLMUL(getLMUL(VT));
1232 }
1233 
1234 // Attempt to decompose a subvector insert/extract between VecVT and
1235 // SubVecVT via subregister indices. Returns the subregister index that
1236 // can perform the subvector insert/extract with the given element index, as
1237 // well as the index corresponding to any leftover subvectors that must be
1238 // further inserted/extracted within the register class for SubVecVT.
1239 std::pair<unsigned, unsigned>
1241  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1242  const RISCVRegisterInfo *TRI) {
1243  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1244  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1245  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1246  "Register classes not ordered");
1247  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1248  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1249  // Try to compose a subregister index that takes us from the incoming
1250  // LMUL>1 register class down to the outgoing one. At each step we half
1251  // the LMUL:
1252  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1253  // Note that this is not guaranteed to find a subregister index, such as
1254  // when we are extracting from one VR type to another.
1255  unsigned SubRegIdx = RISCV::NoSubRegister;
1256  for (const unsigned RCID :
1257  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1258  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1259  VecVT = VecVT.getHalfNumVectorElementsVT();
1260  bool IsHi =
1261  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1262  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1263  getSubregIndexByMVT(VecVT, IsHi));
1264  if (IsHi)
1265  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1266  }
1267  return {SubRegIdx, InsertExtractIdx};
1268 }
1269 
1270 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1271 // stores for those types.
1272 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1273  return !Subtarget.useRVVForFixedLengthVectors() ||
1275 }
1276 
1277 static bool useRVVForFixedLengthVectorVT(MVT VT,
1278  const RISCVSubtarget &Subtarget) {
1279  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1280  if (!Subtarget.useRVVForFixedLengthVectors())
1281  return false;
1282 
1283  // We only support a set of vector types with a consistent maximum fixed size
1284  // across all supported vector element types to avoid legalization issues.
1285  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1286  // fixed-length vector type we support is 1024 bytes.
1287  if (VT.getFixedSizeInBits() > 1024 * 8)
1288  return false;
1289 
1290  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1291 
1292  MVT EltVT = VT.getVectorElementType();
1293 
1294  // Don't use RVV for vectors we cannot scalarize if required.
1295  switch (EltVT.SimpleTy) {
1296  // i1 is supported but has different rules.
1297  default:
1298  return false;
1299  case MVT::i1:
1300  // Masks can only use a single register.
1301  if (VT.getVectorNumElements() > MinVLen)
1302  return false;
1303  MinVLen /= 8;
1304  break;
1305  case MVT::i8:
1306  case MVT::i16:
1307  case MVT::i32:
1308  case MVT::i64:
1309  break;
1310  case MVT::f16:
1311  if (!Subtarget.hasStdExtZfh())
1312  return false;
1313  break;
1314  case MVT::f32:
1315  if (!Subtarget.hasStdExtF())
1316  return false;
1317  break;
1318  case MVT::f64:
1319  if (!Subtarget.hasStdExtD())
1320  return false;
1321  break;
1322  }
1323 
1324  // Reject elements larger than ELEN.
1325  if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
1326  return false;
1327 
1328  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1329  // Don't use RVV for types that don't fit.
1330  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1331  return false;
1332 
1333  // TODO: Perhaps an artificial restriction, but worth having whilst getting
1334  // the base fixed length RVV support in place.
1335  if (!VT.isPow2VectorType())
1336  return false;
1337 
1338  return true;
1339 }
1340 
1341 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1343 }
1344 
1345 // Return the largest legal scalable vector type that matches VT's element type.
1347  const RISCVSubtarget &Subtarget) {
1348  // This may be called before legal types are setup.
1349  assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1350  useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1351  "Expected legal fixed length vector!");
1352 
1353  unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
1354  unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
1355 
1356  MVT EltVT = VT.getVectorElementType();
1357  switch (EltVT.SimpleTy) {
1358  default:
1359  llvm_unreachable("unexpected element type for RVV container");
1360  case MVT::i1:
1361  case MVT::i8:
1362  case MVT::i16:
1363  case MVT::i32:
1364  case MVT::i64:
1365  case MVT::f16:
1366  case MVT::f32:
1367  case MVT::f64: {
1368  // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1369  // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1370  // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1371  unsigned NumElts =
1372  (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1373  NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1374  assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1375  return MVT::getScalableVectorVT(EltVT, NumElts);
1376  }
1377  }
1378 }
1379 
1381  const RISCVSubtarget &Subtarget) {
1383  Subtarget);
1384 }
1385 
1388 }
1389 
1390 // Grow V to consume an entire RVV register.
1392  const RISCVSubtarget &Subtarget) {
1393  assert(VT.isScalableVector() &&
1394  "Expected to convert into a scalable vector!");
1396  "Expected a fixed length vector operand!");
1397  SDLoc DL(V);
1398  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1399  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1400 }
1401 
1402 // Shrink V so it's just big enough to maintain a VT's worth of data.
1404  const RISCVSubtarget &Subtarget) {
1405  assert(VT.isFixedLengthVector() &&
1406  "Expected to convert into a fixed length vector!");
1408  "Expected a scalable vector operand!");
1409  SDLoc DL(V);
1410  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1411  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1412 }
1413 
1414 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1415 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1416 // the vector type that it is contained in.
1417 static std::pair<SDValue, SDValue>
1418 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1419  const RISCVSubtarget &Subtarget) {
1420  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1421  MVT XLenVT = Subtarget.getXLenVT();
1422  SDValue VL = VecVT.isFixedLengthVector()
1423  ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1424  : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
1425  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1426  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1427  return {Mask, VL};
1428 }
1429 
1430 // As above but assuming the given type is a scalable vector type.
1431 static std::pair<SDValue, SDValue>
1433  const RISCVSubtarget &Subtarget) {
1434  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1435  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1436 }
1437 
1438 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1439 // of either is (currently) supported. This can get us into an infinite loop
1440 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1441 // as a ..., etc.
1442 // Until either (or both) of these can reliably lower any node, reporting that
1443 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1444 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1445 // which is not desirable.
1447  EVT VT, unsigned DefinedValues) const {
1448  return false;
1449 }
1450 
1452  // Only splats are currently supported.
1453  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
1454  return true;
1455 
1456  return false;
1457 }
1458 
1460  // RISCV FP-to-int conversions saturate to the destination register size, but
1461  // don't produce 0 for nan. We can use a conversion instruction and fix the
1462  // nan case with a compare and a select.
1463  SDValue Src = Op.getOperand(0);
1464 
1465  EVT DstVT = Op.getValueType();
1466  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1467 
1468  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1469  unsigned Opc;
1470  if (SatVT == DstVT)
1471  Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ;
1472  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1474  else
1475  return SDValue();
1476  // FIXME: Support other SatVTs by clamping before or after the conversion.
1477 
1478  SDLoc DL(Op);
1479  SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src);
1480 
1481  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1482  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1483 }
1484 
1486  const RISCVSubtarget &Subtarget) {
1487  MVT VT = Op.getSimpleValueType();
1488  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1489 
1490  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1491 
1492  SDLoc DL(Op);
1493  SDValue Mask, VL;
1494  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1495 
1496  unsigned Opc =
1498  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
1499  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1500 }
1501 
1502 struct VIDSequence {
1503  int64_t StepNumerator;
1505  int64_t Addend;
1506 };
1507 
1508 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1509 // to the (non-zero) step S and start value X. This can be then lowered as the
1510 // RVV sequence (VID * S) + X, for example.
1511 // The step S is represented as an integer numerator divided by a positive
1512 // denominator. Note that the implementation currently only identifies
1513 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1514 // cannot detect 2/3, for example.
1515 // Note that this method will also match potentially unappealing index
1516 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1517 // determine whether this is worth generating code for.
1519  unsigned NumElts = Op.getNumOperands();
1520  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1521  if (!Op.getValueType().isInteger())
1522  return None;
1523 
1524  Optional<unsigned> SeqStepDenom;
1525  Optional<int64_t> SeqStepNum, SeqAddend;
1527  unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1528  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1529  // Assume undef elements match the sequence; we just have to be careful
1530  // when interpolating across them.
1531  if (Op.getOperand(Idx).isUndef())
1532  continue;
1533  // The BUILD_VECTOR must be all constants.
1534  if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1535  return None;
1536 
1537  uint64_t Val = Op.getConstantOperandVal(Idx) &
1538  maskTrailingOnes<uint64_t>(EltSizeInBits);
1539 
1540  if (PrevElt) {
1541  // Calculate the step since the last non-undef element, and ensure
1542  // it's consistent across the entire sequence.
1543  unsigned IdxDiff = Idx - PrevElt->second;
1544  int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1545 
1546  // A zero-value value difference means that we're somewhere in the middle
1547  // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1548  // step change before evaluating the sequence.
1549  if (ValDiff != 0) {
1550  int64_t Remainder = ValDiff % IdxDiff;
1551  // Normalize the step if it's greater than 1.
1552  if (Remainder != ValDiff) {
1553  // The difference must cleanly divide the element span.
1554  if (Remainder != 0)
1555  return None;
1556  ValDiff /= IdxDiff;
1557  IdxDiff = 1;
1558  }
1559 
1560  if (!SeqStepNum)
1561  SeqStepNum = ValDiff;
1562  else if (ValDiff != SeqStepNum)
1563  return None;
1564 
1565  if (!SeqStepDenom)
1566  SeqStepDenom = IdxDiff;
1567  else if (IdxDiff != *SeqStepDenom)
1568  return None;
1569  }
1570  }
1571 
1572  // Record and/or check any addend.
1573  if (SeqStepNum && SeqStepDenom) {
1574  uint64_t ExpectedVal =
1575  (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1576  int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1577  if (!SeqAddend)
1578  SeqAddend = Addend;
1579  else if (SeqAddend != Addend)
1580  return None;
1581  }
1582 
1583  // Record this non-undef element for later.
1584  if (!PrevElt || PrevElt->first != Val)
1585  PrevElt = std::make_pair(Val, Idx);
1586  }
1587  // We need to have logged both a step and an addend for this to count as
1588  // a legal index sequence.
1589  if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
1590  return None;
1591 
1592  return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1593 }
1594 
1596  const RISCVSubtarget &Subtarget) {
1597  MVT VT = Op.getSimpleValueType();
1598  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1599 
1600  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1601 
1602  SDLoc DL(Op);
1603  SDValue Mask, VL;
1604  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1605 
1606  MVT XLenVT = Subtarget.getXLenVT();
1607  unsigned NumElts = Op.getNumOperands();
1608 
1609  if (VT.getVectorElementType() == MVT::i1) {
1610  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
1611  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
1612  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
1613  }
1614 
1615  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
1616  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
1617  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
1618  }
1619 
1620  // Lower constant mask BUILD_VECTORs via an integer vector type, in
1621  // scalar integer chunks whose bit-width depends on the number of mask
1622  // bits and XLEN.
1623  // First, determine the most appropriate scalar integer type to use. This
1624  // is at most XLenVT, but may be shrunk to a smaller vector element type
1625  // according to the size of the final vector - use i8 chunks rather than
1626  // XLenVT if we're producing a v8i1. This results in more consistent
1627  // codegen across RV32 and RV64.
1628  unsigned NumViaIntegerBits =
1629  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
1630  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
1631  // If we have to use more than one INSERT_VECTOR_ELT then this
1632  // optimization is likely to increase code size; avoid peforming it in
1633  // such a case. We can use a load from a constant pool in this case.
1634  if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
1635  return SDValue();
1636  // Now we can create our integer vector type. Note that it may be larger
1637  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
1638  MVT IntegerViaVecVT =
1639  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
1640  divideCeil(NumElts, NumViaIntegerBits));
1641 
1642  uint64_t Bits = 0;
1643  unsigned BitPos = 0, IntegerEltIdx = 0;
1644  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
1645 
1646  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
1647  // Once we accumulate enough bits to fill our scalar type, insert into
1648  // our vector and clear our accumulated data.
1649  if (I != 0 && I % NumViaIntegerBits == 0) {
1650  if (NumViaIntegerBits <= 32)
1651  Bits = SignExtend64(Bits, 32);
1652  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1653  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
1654  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1655  Bits = 0;
1656  BitPos = 0;
1657  IntegerEltIdx++;
1658  }
1659  SDValue V = Op.getOperand(I);
1660  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
1661  Bits |= ((uint64_t)BitValue << BitPos);
1662  }
1663 
1664  // Insert the (remaining) scalar value into position in our integer
1665  // vector type.
1666  if (NumViaIntegerBits <= 32)
1667  Bits = SignExtend64(Bits, 32);
1668  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
1669  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
1670  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
1671 
1672  if (NumElts < NumViaIntegerBits) {
1673  // If we're producing a smaller vector than our minimum legal integer
1674  // type, bitcast to the equivalent (known-legal) mask type, and extract
1675  // our final mask.
1676  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
1677  Vec = DAG.getBitcast(MVT::v8i1, Vec);
1678  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
1679  DAG.getConstant(0, DL, XLenVT));
1680  } else {
1681  // Else we must have produced an integer type with the same size as the
1682  // mask type; bitcast for the final result.
1683  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
1684  Vec = DAG.getBitcast(VT, Vec);
1685  }
1686 
1687  return Vec;
1688  }
1689 
1690  // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
1691  // vector type, we have a legal equivalently-sized i8 type, so we can use
1692  // that.
1693  MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
1694  SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
1695 
1696  SDValue WideVec;
1697  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1698  // For a splat, perform a scalar truncate before creating the wider
1699  // vector.
1700  assert(Splat.getValueType() == XLenVT &&
1701  "Unexpected type for i1 splat value");
1702  Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
1703  DAG.getConstant(1, DL, XLenVT));
1704  WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
1705  } else {
1706  SmallVector<SDValue, 8> Ops(Op->op_values());
1707  WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
1708  SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
1709  WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
1710  }
1711 
1712  return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
1713  }
1714 
1715  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
1716  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
1718  Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
1719  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
1720  }
1721 
1722  // Try and match index sequences, which we can lower to the vid instruction
1723  // with optional modifications. An all-undef vector is matched by
1724  // getSplatValue, above.
1725  if (auto SimpleVID = isSimpleVIDSequence(Op)) {
1726  int64_t StepNumerator = SimpleVID->StepNumerator;
1727  unsigned StepDenominator = SimpleVID->StepDenominator;
1728  int64_t Addend = SimpleVID->Addend;
1729  // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
1730  // threshold since it's the immediate value many RVV instructions accept.
1731  if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
1732  isInt<5>(Addend)) {
1733  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
1734  // Convert right out of the scalable type so we can use standard ISD
1735  // nodes for the rest of the computation. If we used scalable types with
1736  // these, we'd lose the fixed-length vector info and generate worse
1737  // vsetvli code.
1738  VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
1739  assert(StepNumerator != 0 && "Invalid step");
1740  bool Negate = false;
1741  if (StepNumerator != 1) {
1742  int64_t SplatStepVal = StepNumerator;
1743  unsigned Opcode = ISD::MUL;
1744  if (isPowerOf2_64(std::abs(StepNumerator))) {
1745  Negate = StepNumerator < 0;
1746  Opcode = ISD::SHL;
1747  SplatStepVal = Log2_64(std::abs(StepNumerator));
1748  }
1749  SDValue SplatStep = DAG.getSplatVector(
1750  VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
1751  VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
1752  }
1753  if (StepDenominator != 1) {
1754  SDValue SplatStep = DAG.getSplatVector(
1755  VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
1756  VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
1757  }
1758  if (Addend != 0 || Negate) {
1759  SDValue SplatAddend =
1760  DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
1761  VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
1762  }
1763  return VID;
1764  }
1765  }
1766 
1767  // Attempt to detect "hidden" splats, which only reveal themselves as splats
1768  // when re-interpreted as a vector with a larger element type. For example,
1769  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
1770  // could be instead splat as
1771  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
1772  // TODO: This optimization could also work on non-constant splats, but it
1773  // would require bit-manipulation instructions to construct the splat value.
1775  unsigned EltBitSize = VT.getScalarSizeInBits();
1776  const auto *BV = cast<BuildVectorSDNode>(Op);
1777  if (VT.isInteger() && EltBitSize < 64 &&
1779  BV->getRepeatedSequence(Sequence) &&
1780  (Sequence.size() * EltBitSize) <= 64) {
1781  unsigned SeqLen = Sequence.size();
1782  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
1783  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
1784  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
1785  ViaIntVT == MVT::i64) &&
1786  "Unexpected sequence type");
1787 
1788  unsigned EltIdx = 0;
1789  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
1790  uint64_t SplatValue = 0;
1791  // Construct the amalgamated value which can be splatted as this larger
1792  // vector type.
1793  for (const auto &SeqV : Sequence) {
1794  if (!SeqV.isUndef())
1795  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
1796  << (EltIdx * EltBitSize));
1797  EltIdx++;
1798  }
1799 
1800  // On RV64, sign-extend from 32 to 64 bits where possible in order to
1801  // achieve better constant materializion.
1802  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
1803  SplatValue = SignExtend64(SplatValue, 32);
1804 
1805  // Since we can't introduce illegal i64 types at this stage, we can only
1806  // perform an i64 splat on RV32 if it is its own sign-extended value. That
1807  // way we can use RVV instructions to splat.
1808  assert((ViaIntVT.bitsLE(XLenVT) ||
1809  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
1810  "Unexpected bitcast sequence");
1811  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
1812  SDValue ViaVL =
1813  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
1814  MVT ViaContainerVT =
1815  getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
1816  SDValue Splat =
1817  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
1818  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
1819  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
1820  return DAG.getBitcast(VT, Splat);
1821  }
1822  }
1823 
1824  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
1825  // which constitute a large proportion of the elements. In such cases we can
1826  // splat a vector with the dominant element and make up the shortfall with
1827  // INSERT_VECTOR_ELTs.
1828  // Note that this includes vectors of 2 elements by association. The
1829  // upper-most element is the "dominant" one, allowing us to use a splat to
1830  // "insert" the upper element, and an insert of the lower element at position
1831  // 0, which improves codegen.
1832  SDValue DominantValue;
1833  unsigned MostCommonCount = 0;
1834  DenseMap<SDValue, unsigned> ValueCounts;
1835  unsigned NumUndefElts =
1836  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
1837 
1838  // Track the number of scalar loads we know we'd be inserting, estimated as
1839  // any non-zero floating-point constant. Other kinds of element are either
1840  // already in registers or are materialized on demand. The threshold at which
1841  // a vector load is more desirable than several scalar materializion and
1842  // vector-insertion instructions is not known.
1843  unsigned NumScalarLoads = 0;
1844 
1845  for (SDValue V : Op->op_values()) {
1846  if (V.isUndef())
1847  continue;
1848 
1849  ValueCounts.insert(std::make_pair(V, 0));
1850  unsigned &Count = ValueCounts[V];
1851 
1852  if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
1853  NumScalarLoads += !CFP->isExactlyValue(+0.0);
1854 
1855  // Is this value dominant? In case of a tie, prefer the highest element as
1856  // it's cheaper to insert near the beginning of a vector than it is at the
1857  // end.
1858  if (++Count >= MostCommonCount) {
1859  DominantValue = V;
1860  MostCommonCount = Count;
1861  }
1862  }
1863 
1864  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
1865  unsigned NumDefElts = NumElts - NumUndefElts;
1866  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
1867 
1868  // Don't perform this optimization when optimizing for size, since
1869  // materializing elements and inserting them tends to cause code bloat.
1870  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
1871  ((MostCommonCount > DominantValueCountThreshold) ||
1872  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
1873  // Start by splatting the most common element.
1874  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
1875 
1876  DenseSet<SDValue> Processed{DominantValue};
1877  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
1878  for (const auto &OpIdx : enumerate(Op->ops())) {
1879  const SDValue &V = OpIdx.value();
1880  if (V.isUndef() || !Processed.insert(V).second)
1881  continue;
1882  if (ValueCounts[V] == 1) {
1883  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
1884  DAG.getConstant(OpIdx.index(), DL, XLenVT));
1885  } else {
1886  // Blend in all instances of this value using a VSELECT, using a
1887  // mask where each bit signals whether that element is the one
1888  // we're after.
1890  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
1891  return DAG.getConstant(V == V1, DL, XLenVT);
1892  });
1893  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
1894  DAG.getBuildVector(SelMaskTy, DL, Ops),
1895  DAG.getSplatBuildVector(VT, DL, V), Vec);
1896  }
1897  }
1898 
1899  return Vec;
1900  }
1901 
1902  return SDValue();
1903 }
1904 
1906  SDValue Hi, SDValue VL, SelectionDAG &DAG) {
1907  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
1908  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
1909  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
1910  // If Hi constant is all the same sign bit as Lo, lower this as a custom
1911  // node in order to try and match RVV vector/scalar instructions.
1912  if ((LoC >> 31) == HiC)
1913  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
1914  }
1915 
1916  // Fall back to a stack store and stride x0 vector load.
1917  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
1918 }
1919 
1920 // Called by type legalization to handle splat of i64 on RV32.
1921 // FIXME: We can optimize this when the type has sign or zero bits in one
1922 // of the halves.
1923 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
1924  SDValue VL, SelectionDAG &DAG) {
1925  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
1927  DAG.getConstant(0, DL, MVT::i32));
1929  DAG.getConstant(1, DL, MVT::i32));
1930  return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
1931 }
1932 
1933 // This function lowers a splat of a scalar operand Splat with the vector
1934 // length VL. It ensures the final sequence is type legal, which is useful when
1935 // lowering a splat after type legalization.
1937  SelectionDAG &DAG,
1938  const RISCVSubtarget &Subtarget) {
1939  if (VT.isFloatingPoint())
1940  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
1941 
1942  MVT XLenVT = Subtarget.getXLenVT();
1943 
1944  // Simplest case is that the operand needs to be promoted to XLenVT.
1945  if (Scalar.getValueType().bitsLE(XLenVT)) {
1946  // If the operand is a constant, sign extend to increase our chances
1947  // of being able to use a .vi instruction. ANY_EXTEND would become a
1948  // a zero extend and the simm5 check in isel would fail.
1949  // FIXME: Should we ignore the upper bits in isel instead?
1950  unsigned ExtOpc =
1951  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
1952  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
1953  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
1954  }
1955 
1956  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
1957  "Unexpected scalar for splat lowering!");
1958 
1959  // Otherwise use the more complicated splatting algorithm.
1960  return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
1961 }
1962 
1964  const RISCVSubtarget &Subtarget) {
1965  SDValue V1 = Op.getOperand(0);
1966  SDValue V2 = Op.getOperand(1);
1967  SDLoc DL(Op);
1968  MVT XLenVT = Subtarget.getXLenVT();
1969  MVT VT = Op.getSimpleValueType();
1970  unsigned NumElts = VT.getVectorNumElements();
1971  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
1972 
1973  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1974 
1975  SDValue TrueMask, VL;
1976  std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1977 
1978  if (SVN->isSplat()) {
1979  const int Lane = SVN->getSplatIndex();
1980  if (Lane >= 0) {
1981  MVT SVT = VT.getVectorElementType();
1982 
1983  // Turn splatted vector load into a strided load with an X0 stride.
1984  SDValue V = V1;
1985  // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
1986  // with undef.
1987  // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
1988  int Offset = Lane;
1989  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
1990  int OpElements =
1992  V = V.getOperand(Offset / OpElements);
1993  Offset %= OpElements;
1994  }
1995 
1996  // We need to ensure the load isn't atomic or volatile.
1997  if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
1998  auto *Ld = cast<LoadSDNode>(V);
1999  Offset *= SVT.getStoreSize();
2000  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2002 
2003  // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2004  if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2005  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2006  SDValue IntID =
2007  DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2008  SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
2009  DAG.getRegister(RISCV::X0, XLenVT), VL};
2010  SDValue NewLoad = DAG.getMemIntrinsicNode(
2011  ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2013  Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2014  DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2015  return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2016  }
2017 
2018  // Otherwise use a scalar load and splat. This will give the best
2019  // opportunity to fold a splat into the operation. ISel can turn it into
2020  // the x0 strided load if we aren't able to fold away the select.
2021  if (SVT.isFloatingPoint())
2022  V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2023  Ld->getPointerInfo().getWithOffset(Offset),
2024  Ld->getOriginalAlign(),
2025  Ld->getMemOperand()->getFlags());
2026  else
2027  V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2028  Ld->getPointerInfo().getWithOffset(Offset), SVT,
2029  Ld->getOriginalAlign(),
2030  Ld->getMemOperand()->getFlags());
2031  DAG.makeEquivalentMemoryOrdering(Ld, V);
2032 
2033  unsigned Opc =
2035  SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
2036  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2037  }
2038 
2039  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2040  assert(Lane < (int)NumElts && "Unexpected lane!");
2041  SDValue Gather =
2042  DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
2043  DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
2044  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2045  }
2046  }
2047 
2048  // Detect shuffles which can be re-expressed as vector selects; these are
2049  // shuffles in which each element in the destination is taken from an element
2050  // at the corresponding index in either source vectors.
2051  bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
2052  int MaskIndex = MaskIdx.value();
2053  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2054  });
2055 
2056  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2057 
2058  SmallVector<SDValue> MaskVals;
2059  // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2060  // merged with a second vrgather.
2061  SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2062 
2063  // By default we preserve the original operand order, and use a mask to
2064  // select LHS as true and RHS as false. However, since RVV vector selects may
2065  // feature splats but only on the LHS, we may choose to invert our mask and
2066  // instead select between RHS and LHS.
2067  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2068  bool InvertMask = IsSelect == SwapOps;
2069 
2070  // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2071  // half.
2072  DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2073 
2074  // Now construct the mask that will be used by the vselect or blended
2075  // vrgather operation. For vrgathers, construct the appropriate indices into
2076  // each vector.
2077  for (int MaskIndex : SVN->getMask()) {
2078  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2079  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2080  if (!IsSelect) {
2081  bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2082  GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2083  ? DAG.getConstant(MaskIndex, DL, XLenVT)
2084  : DAG.getUNDEF(XLenVT));
2085  GatherIndicesRHS.push_back(
2086  IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2087  : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2088  if (IsLHSOrUndefIndex && MaskIndex >= 0)
2089  ++LHSIndexCounts[MaskIndex];
2090  if (!IsLHSOrUndefIndex)
2091  ++RHSIndexCounts[MaskIndex - NumElts];
2092  }
2093  }
2094 
2095  if (SwapOps) {
2096  std::swap(V1, V2);
2097  std::swap(GatherIndicesLHS, GatherIndicesRHS);
2098  }
2099 
2100  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2101  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2102  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2103 
2104  if (IsSelect)
2105  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2106 
2107  if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2108  // On such a large vector we're unable to use i8 as the index type.
2109  // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2110  // may involve vector splitting if we're already at LMUL=8, or our
2111  // user-supplied maximum fixed-length LMUL.
2112  return SDValue();
2113  }
2114 
2115  unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2116  unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2117  MVT IndexVT = VT.changeTypeToInteger();
2118  // Since we can't introduce illegal index types at this stage, use i16 and
2119  // vrgatherei16 if the corresponding index type for plain vrgather is greater
2120  // than XLenVT.
2121  if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2122  GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2123  IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2124  }
2125 
2126  MVT IndexContainerVT =
2127  ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2128 
2129  SDValue Gather;
2130  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2131  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2132  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2133  Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
2134  } else {
2135  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2136  // If only one index is used, we can use a "splat" vrgather.
2137  // TODO: We can splat the most-common index and fix-up any stragglers, if
2138  // that's beneficial.
2139  if (LHSIndexCounts.size() == 1) {
2140  int SplatIndex = LHSIndexCounts.begin()->getFirst();
2141  Gather =
2142  DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2143  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2144  } else {
2145  SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2146  LHSIndices =
2147  convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2148 
2149  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2150  TrueMask, VL);
2151  }
2152  }
2153 
2154  // If a second vector operand is used by this shuffle, blend it in with an
2155  // additional vrgather.
2156  if (!V2.isUndef()) {
2157  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2158  // If only one index is used, we can use a "splat" vrgather.
2159  // TODO: We can splat the most-common index and fix-up any stragglers, if
2160  // that's beneficial.
2161  if (RHSIndexCounts.size() == 1) {
2162  int SplatIndex = RHSIndexCounts.begin()->getFirst();
2163  V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2164  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
2165  } else {
2166  SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2167  RHSIndices =
2168  convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2169  V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
2170  VL);
2171  }
2172 
2173  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2174  SelectMask =
2175  convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2176 
2177  Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
2178  Gather, VL);
2179  }
2180 
2181  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2182 }
2183 
2184 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
2185  SDLoc DL, SelectionDAG &DAG,
2186  const RISCVSubtarget &Subtarget) {
2187  if (VT.isScalableVector())
2188  return DAG.getFPExtendOrRound(Op, DL, VT);
2189  assert(VT.isFixedLengthVector() &&
2190  "Unexpected value type for RVV FP extend/round lowering");
2191  SDValue Mask, VL;
2192  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2193  unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
2196  return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
2197 }
2198 
2199 // While RVV has alignment restrictions, we should always be able to load as a
2200 // legal equivalently-sized byte-typed vector instead. This method is
2201 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2202 // the load is already correctly-aligned, it returns SDValue().
2203 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2204  SelectionDAG &DAG) const {
2205  auto *Load = cast<LoadSDNode>(Op);
2206  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2207 
2209  Load->getMemoryVT(),
2210  *Load->getMemOperand()))
2211  return SDValue();
2212 
2213  SDLoc DL(Op);
2214  MVT VT = Op.getSimpleValueType();
2215  unsigned EltSizeBits = VT.getScalarSizeInBits();
2216  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2217  "Unexpected unaligned RVV load type");
2218  MVT NewVT =
2219  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2220  assert(NewVT.isValid() &&
2221  "Expecting equally-sized RVV vector types to be legal");
2222  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2223  Load->getPointerInfo(), Load->getOriginalAlign(),
2224  Load->getMemOperand()->getFlags());
2225  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2226 }
2227 
2228 // While RVV has alignment restrictions, we should always be able to store as a
2229 // legal equivalently-sized byte-typed vector instead. This method is
2230 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2231 // returns SDValue() if the store is already correctly aligned.
2232 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2233  SelectionDAG &DAG) const {
2234  auto *Store = cast<StoreSDNode>(Op);
2235  assert(Store && Store->getValue().getValueType().isVector() &&
2236  "Expected vector store");
2237 
2239  Store->getMemoryVT(),
2240  *Store->getMemOperand()))
2241  return SDValue();
2242 
2243  SDLoc DL(Op);
2244  SDValue StoredVal = Store->getValue();
2245  MVT VT = StoredVal.getSimpleValueType();
2246  unsigned EltSizeBits = VT.getScalarSizeInBits();
2247  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2248  "Unexpected unaligned RVV store type");
2249  MVT NewVT =
2250  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2251  assert(NewVT.isValid() &&
2252  "Expecting equally-sized RVV vector types to be legal");
2253  StoredVal = DAG.getBitcast(NewVT, StoredVal);
2254  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2255  Store->getPointerInfo(), Store->getOriginalAlign(),
2256  Store->getMemOperand()->getFlags());
2257 }
2258 
2260  SelectionDAG &DAG) const {
2261  switch (Op.getOpcode()) {
2262  default:
2263  report_fatal_error("unimplemented operand");
2264  case ISD::GlobalAddress:
2265  return lowerGlobalAddress(Op, DAG);
2266  case ISD::BlockAddress:
2267  return lowerBlockAddress(Op, DAG);
2268  case ISD::ConstantPool:
2269  return lowerConstantPool(Op, DAG);
2270  case ISD::JumpTable:
2271  return lowerJumpTable(Op, DAG);
2272  case ISD::GlobalTLSAddress:
2273  return lowerGlobalTLSAddress(Op, DAG);
2274  case ISD::SELECT:
2275  return lowerSELECT(Op, DAG);
2276  case ISD::BRCOND:
2277  return lowerBRCOND(Op, DAG);
2278  case ISD::VASTART:
2279  return lowerVASTART(Op, DAG);
2280  case ISD::FRAMEADDR:
2281  return lowerFRAMEADDR(Op, DAG);
2282  case ISD::RETURNADDR:
2283  return lowerRETURNADDR(Op, DAG);
2284  case ISD::SHL_PARTS:
2285  return lowerShiftLeftParts(Op, DAG);
2286  case ISD::SRA_PARTS:
2287  return lowerShiftRightParts(Op, DAG, true);
2288  case ISD::SRL_PARTS:
2289  return lowerShiftRightParts(Op, DAG, false);
2290  case ISD::BITCAST: {
2291  SDLoc DL(Op);
2292  EVT VT = Op.getValueType();
2293  SDValue Op0 = Op.getOperand(0);
2294  EVT Op0VT = Op0.getValueType();
2295  MVT XLenVT = Subtarget.getXLenVT();
2296  if (VT.isFixedLengthVector()) {
2297  // We can handle fixed length vector bitcasts with a simple replacement
2298  // in isel.
2299  if (Op0VT.isFixedLengthVector())
2300  return Op;
2301  // When bitcasting from scalar to fixed-length vector, insert the scalar
2302  // into a one-element vector of the result type, and perform a vector
2303  // bitcast.
2304  if (!Op0VT.isVector()) {
2305  auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
2306  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
2307  DAG.getUNDEF(BVT), Op0,
2308  DAG.getConstant(0, DL, XLenVT)));
2309  }
2310  return SDValue();
2311  }
2312  // Custom-legalize bitcasts from fixed-length vector types to scalar types
2313  // thus: bitcast the vector to a one-element vector type whose element type
2314  // is the same as the result type, and extract the first element.
2315  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
2316  LLVMContext &Context = *DAG.getContext();
2317  SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
2318  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
2319  DAG.getConstant(0, DL, XLenVT));
2320  }
2321  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
2322  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
2323  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
2324  return FPConv;
2325  }
2326  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
2327  Subtarget.hasStdExtF()) {
2328  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
2329  SDValue FPConv =
2330  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
2331  return FPConv;
2332  }
2333  return SDValue();
2334  }
2336  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2338  return LowerINTRINSIC_W_CHAIN(Op, DAG);
2339  case ISD::BSWAP:
2340  case ISD::BITREVERSE: {
2341  // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
2342  assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
2343  MVT VT = Op.getSimpleValueType();
2344  SDLoc DL(Op);
2345  // Start with the maximum immediate value which is the bitwidth - 1.
2346  unsigned Imm = VT.getSizeInBits() - 1;
2347  // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
2348  if (Op.getOpcode() == ISD::BSWAP)
2349  Imm &= ~0x7U;
2350  return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
2351  DAG.getConstant(Imm, DL, VT));
2352  }
2353  case ISD::FSHL:
2354  case ISD::FSHR: {
2355  MVT VT = Op.getSimpleValueType();
2356  assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
2357  SDLoc DL(Op);
2358  if (Op.getOperand(2).getOpcode() == ISD::Constant)
2359  return Op;
2360  // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
2361  // use log(XLen) bits. Mask the shift amount accordingly.
2362  unsigned ShAmtWidth = Subtarget.getXLen() - 1;
2363  SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
2364  DAG.getConstant(ShAmtWidth, DL, VT));
2365  unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
2366  return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
2367  }
2368  case ISD::TRUNCATE: {
2369  SDLoc DL(Op);
2370  MVT VT = Op.getSimpleValueType();
2371  // Only custom-lower vector truncates
2372  if (!VT.isVector())
2373  return Op;
2374 
2375  // Truncates to mask types are handled differently
2376  if (VT.getVectorElementType() == MVT::i1)
2377  return lowerVectorMaskTrunc(Op, DAG);
2378 
2379  // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
2380  // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
2381  // truncate by one power of two at a time.
2382  MVT DstEltVT = VT.getVectorElementType();
2383 
2384  SDValue Src = Op.getOperand(0);
2385  MVT SrcVT = Src.getSimpleValueType();
2386  MVT SrcEltVT = SrcVT.getVectorElementType();
2387 
2388  assert(DstEltVT.bitsLT(SrcEltVT) &&
2389  isPowerOf2_64(DstEltVT.getSizeInBits()) &&
2390  isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
2391  "Unexpected vector truncate lowering");
2392 
2393  MVT ContainerVT = SrcVT;
2394  if (SrcVT.isFixedLengthVector()) {
2395  ContainerVT = getContainerForFixedLengthVector(SrcVT);
2396  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2397  }
2398 
2399  SDValue Result = Src;
2400  SDValue Mask, VL;
2401  std::tie(Mask, VL) =
2402  getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
2403  LLVMContext &Context = *DAG.getContext();
2404  const ElementCount Count = ContainerVT.getVectorElementCount();
2405  do {
2406  SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2407  EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
2408  Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
2409  Mask, VL);
2410  } while (SrcEltVT != DstEltVT);
2411 
2412  if (SrcVT.isFixedLengthVector())
2413  Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
2414 
2415  return Result;
2416  }
2417  case ISD::ANY_EXTEND:
2418  case ISD::ZERO_EXTEND:
2419  if (Op.getOperand(0).getValueType().isVector() &&
2420  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2421  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
2422  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
2423  case ISD::SIGN_EXTEND:
2424  if (Op.getOperand(0).getValueType().isVector() &&
2425  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2426  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
2427  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
2429  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
2431  return lowerINSERT_VECTOR_ELT(Op, DAG);
2433  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
2434  case ISD::VSCALE: {
2435  MVT VT = Op.getSimpleValueType();
2436  SDLoc DL(Op);
2437  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
2438  // We define our scalable vector types for lmul=1 to use a 64 bit known
2439  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
2440  // vscale as VLENB / 8.
2441  assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
2442  if (isa<ConstantSDNode>(Op.getOperand(0))) {
2443  // We assume VLENB is a multiple of 8. We manually choose the best shift
2444  // here because SimplifyDemandedBits isn't always able to simplify it.
2445  uint64_t Val = Op.getConstantOperandVal(0);
2446  if (isPowerOf2_64(Val)) {
2447  uint64_t Log2 = Log2_64(Val);
2448  if (Log2 < 3)
2449  return DAG.getNode(ISD::SRL, DL, VT, VLENB,
2450  DAG.getConstant(3 - Log2, DL, VT));
2451  if (Log2 > 3)
2452  return DAG.getNode(ISD::SHL, DL, VT, VLENB,
2453  DAG.getConstant(Log2 - 3, DL, VT));
2454  return VLENB;
2455  }
2456  // If the multiplier is a multiple of 8, scale it down to avoid needing
2457  // to shift the VLENB value.
2458  if ((Val % 8) == 0)
2459  return DAG.getNode(ISD::MUL, DL, VT, VLENB,
2460  DAG.getConstant(Val / 8, DL, VT));
2461  }
2462 
2463  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
2464  DAG.getConstant(3, DL, VT));
2465  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
2466  }
2467  case ISD::FP_EXTEND: {
2468  // RVV can only do fp_extend to types double the size as the source. We
2469  // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
2470  // via f32.
2471  SDLoc DL(Op);
2472  MVT VT = Op.getSimpleValueType();
2473  SDValue Src = Op.getOperand(0);
2474  MVT SrcVT = Src.getSimpleValueType();
2475 
2476  // Prepare any fixed-length vector operands.
2477  MVT ContainerVT = VT;
2478  if (SrcVT.isFixedLengthVector()) {
2479  ContainerVT = getContainerForFixedLengthVector(VT);
2480  MVT SrcContainerVT =
2481  ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
2482  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2483  }
2484 
2485  if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
2486  SrcVT.getVectorElementType() != MVT::f16) {
2487  // For scalable vectors, we only need to close the gap between
2488  // vXf16->vXf64.
2489  if (!VT.isFixedLengthVector())
2490  return Op;
2491  // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
2492  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2493  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2494  }
2495 
2496  MVT InterVT = VT.changeVectorElementType(MVT::f32);
2497  MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
2498  SDValue IntermediateExtend = getRVVFPExtendOrRound(
2499  Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
2500 
2501  SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
2502  DL, DAG, Subtarget);
2503  if (VT.isFixedLengthVector())
2504  return convertFromScalableVector(VT, Extend, DAG, Subtarget);
2505  return Extend;
2506  }
2507  case ISD::FP_ROUND: {
2508  // RVV can only do fp_round to types half the size as the source. We
2509  // custom-lower f64->f16 rounds via RVV's round-to-odd float
2510  // conversion instruction.
2511  SDLoc DL(Op);
2512  MVT VT = Op.getSimpleValueType();
2513  SDValue Src = Op.getOperand(0);
2514  MVT SrcVT = Src.getSimpleValueType();
2515 
2516  // Prepare any fixed-length vector operands.
2517  MVT ContainerVT = VT;
2518  if (VT.isFixedLengthVector()) {
2519  MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2520  ContainerVT =
2521  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2522  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2523  }
2524 
2525  if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
2526  SrcVT.getVectorElementType() != MVT::f64) {
2527  // For scalable vectors, we only need to close the gap between
2528  // vXf64<->vXf16.
2529  if (!VT.isFixedLengthVector())
2530  return Op;
2531  // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
2532  Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
2533  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2534  }
2535 
2536  SDValue Mask, VL;
2537  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2538 
2539  MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
2540  SDValue IntermediateRound =
2541  DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
2542  SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
2543  DL, DAG, Subtarget);
2544 
2545  if (VT.isFixedLengthVector())
2546  return convertFromScalableVector(VT, Round, DAG, Subtarget);
2547  return Round;
2548  }
2549  case ISD::FP_TO_SINT:
2550  case ISD::FP_TO_UINT:
2551  case ISD::SINT_TO_FP:
2552  case ISD::UINT_TO_FP: {
2553  // RVV can only do fp<->int conversions to types half/double the size as
2554  // the source. We custom-lower any conversions that do two hops into
2555  // sequences.
2556  MVT VT = Op.getSimpleValueType();
2557  if (!VT.isVector())
2558  return Op;
2559  SDLoc DL(Op);
2560  SDValue Src = Op.getOperand(0);
2561  MVT EltVT = VT.getVectorElementType();
2562  MVT SrcVT = Src.getSimpleValueType();
2563  MVT SrcEltVT = SrcVT.getVectorElementType();
2564  unsigned EltSize = EltVT.getSizeInBits();
2565  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2566  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
2567  "Unexpected vector element types");
2568 
2569  bool IsInt2FP = SrcEltVT.isInteger();
2570  // Widening conversions
2571  if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
2572  if (IsInt2FP) {
2573  // Do a regular integer sign/zero extension then convert to float.
2575  VT.getVectorElementCount());
2576  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
2578  : ISD::SIGN_EXTEND;
2579  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
2580  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
2581  }
2582  // FP2Int
2583  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
2584  // Do one doubling fp_extend then complete the operation by converting
2585  // to int.
2586  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2587  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
2588  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
2589  }
2590 
2591  // Narrowing conversions
2592  if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
2593  if (IsInt2FP) {
2594  // One narrowing int_to_fp, then an fp_round.
2595  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
2596  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
2597  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
2598  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
2599  }
2600  // FP2Int
2601  // One narrowing fp_to_int, then truncate the integer. If the float isn't
2602  // representable by the integer, the result is poison.
2603  MVT IVecVT =
2605  VT.getVectorElementCount());
2606  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
2607  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
2608  }
2609 
2610  // Scalable vectors can exit here. Patterns will handle equally-sized
2611  // conversions halving/doubling ones.
2612  if (!VT.isFixedLengthVector())
2613  return Op;
2614 
2615  // For fixed-length vectors we lower to a custom "VL" node.
2616  unsigned RVVOpc = 0;
2617  switch (Op.getOpcode()) {
2618  default:
2619  llvm_unreachable("Impossible opcode");
2620  case ISD::FP_TO_SINT:
2621  RVVOpc = RISCVISD::FP_TO_SINT_VL;
2622  break;
2623  case ISD::FP_TO_UINT:
2624  RVVOpc = RISCVISD::FP_TO_UINT_VL;
2625  break;
2626  case ISD::SINT_TO_FP:
2627  RVVOpc = RISCVISD::SINT_TO_FP_VL;
2628  break;
2629  case ISD::UINT_TO_FP:
2630  RVVOpc = RISCVISD::UINT_TO_FP_VL;
2631  break;
2632  }
2633 
2634  MVT ContainerVT, SrcContainerVT;
2635  // Derive the reference container type from the larger vector type.
2636  if (SrcEltSize > EltSize) {
2637  SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
2638  ContainerVT =
2639  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
2640  } else {
2641  ContainerVT = getContainerForFixedLengthVector(VT);
2642  SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
2643  }
2644 
2645  SDValue Mask, VL;
2646  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2647 
2648  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2649  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
2650  return convertFromScalableVector(VT, Src, DAG, Subtarget);
2651  }
2652  case ISD::FP_TO_SINT_SAT:
2653  case ISD::FP_TO_UINT_SAT:
2654  return lowerFP_TO_INT_SAT(Op, DAG);
2655  case ISD::VECREDUCE_ADD:
2656  case ISD::VECREDUCE_UMAX:
2657  case ISD::VECREDUCE_SMAX:
2658  case ISD::VECREDUCE_UMIN:
2659  case ISD::VECREDUCE_SMIN:
2660  return lowerVECREDUCE(Op, DAG);
2661  case ISD::VECREDUCE_AND:
2662  case ISD::VECREDUCE_OR:
2663  case ISD::VECREDUCE_XOR:
2664  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
2665  return lowerVectorMaskVECREDUCE(Op, DAG);
2666  return lowerVECREDUCE(Op, DAG);
2667  case ISD::VECREDUCE_FADD:
2669  case ISD::VECREDUCE_FMIN:
2670  case ISD::VECREDUCE_FMAX:
2671  return lowerFPVECREDUCE(Op, DAG);
2672  case ISD::INSERT_SUBVECTOR:
2673  return lowerINSERT_SUBVECTOR(Op, DAG);
2675  return lowerEXTRACT_SUBVECTOR(Op, DAG);
2676  case ISD::STEP_VECTOR:
2677  return lowerSTEP_VECTOR(Op, DAG);
2678  case ISD::VECTOR_REVERSE:
2679  return lowerVECTOR_REVERSE(Op, DAG);
2680  case ISD::BUILD_VECTOR:
2681  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
2682  case ISD::SPLAT_VECTOR:
2683  if (Op.getValueType().getVectorElementType() == MVT::i1)
2684  return lowerVectorMaskSplat(Op, DAG);
2685  return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
2686  case ISD::VECTOR_SHUFFLE:
2687  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
2688  case ISD::CONCAT_VECTORS: {
2689  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
2690  // better than going through the stack, as the default expansion does.
2691  SDLoc DL(Op);
2692  MVT VT = Op.getSimpleValueType();
2693  unsigned NumOpElts =
2694  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
2695  SDValue Vec = DAG.getUNDEF(VT);
2696  for (const auto &OpIdx : enumerate(Op->ops()))
2697  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
2698  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
2699  return Vec;
2700  }
2701  case ISD::LOAD:
2702  if (auto V = expandUnalignedRVVLoad(Op, DAG))
2703  return V;
2704  if (Op.getValueType().isFixedLengthVector())
2705  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
2706  return Op;
2707  case ISD::STORE:
2708  if (auto V = expandUnalignedRVVStore(Op, DAG))
2709  return V;
2710  if (Op.getOperand(1).getValueType().isFixedLengthVector())
2711  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
2712  return Op;
2713  case ISD::MLOAD:
2714  case ISD::VP_LOAD:
2715  return lowerMaskedLoad(Op, DAG);
2716  case ISD::MSTORE:
2717  case ISD::VP_STORE:
2718  return lowerMaskedStore(Op, DAG);
2719  case ISD::SETCC:
2720  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
2721  case ISD::ADD:
2722  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
2723  case ISD::SUB:
2724  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
2725  case ISD::MUL:
2726  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
2727  case ISD::MULHS:
2728  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
2729  case ISD::MULHU:
2730  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
2731  case ISD::AND:
2732  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
2734  case ISD::OR:
2735  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
2736  RISCVISD::OR_VL);
2737  case ISD::XOR:
2738  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
2740  case ISD::SDIV:
2741  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
2742  case ISD::SREM:
2743  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
2744  case ISD::UDIV:
2745  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
2746  case ISD::UREM:
2747  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
2748  case ISD::SHL:
2749  case ISD::SRA:
2750  case ISD::SRL:
2751  if (Op.getSimpleValueType().isFixedLengthVector())
2752  return lowerFixedLengthVectorShiftToRVV(Op, DAG);
2753  // This can be called for an i32 shift amount that needs to be promoted.
2754  assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
2755  "Unexpected custom legalisation");
2756  return SDValue();
2757  case ISD::SADDSAT:
2758  return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
2759  case ISD::UADDSAT:
2760  return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
2761  case ISD::SSUBSAT:
2762  return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
2763  case ISD::USUBSAT:
2764  return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
2765  case ISD::FADD:
2766  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
2767  case ISD::FSUB:
2768  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
2769  case ISD::FMUL:
2770  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
2771  case ISD::FDIV:
2772  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
2773  case ISD::FNEG:
2774  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
2775  case ISD::FABS:
2776  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
2777  case ISD::FSQRT:
2778  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
2779  case ISD::FMA:
2780  return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
2781  case ISD::SMIN:
2782  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
2783  case ISD::SMAX:
2784  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
2785  case ISD::UMIN:
2786  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
2787  case ISD::UMAX:
2788  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
2789  case ISD::FMINNUM:
2790  return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
2791  case ISD::FMAXNUM:
2792  return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
2793  case ISD::ABS:
2794  return lowerABS(Op, DAG);
2795  case ISD::VSELECT:
2796  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
2797  case ISD::FCOPYSIGN:
2798  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
2799  case ISD::MGATHER:
2800  case ISD::VP_GATHER:
2801  return lowerMaskedGather(Op, DAG);
2802  case ISD::MSCATTER:
2803  case ISD::VP_SCATTER:
2804  return lowerMaskedScatter(Op, DAG);
2805  case ISD::FLT_ROUNDS_:
2806  return lowerGET_ROUNDING(Op, DAG);
2807  case ISD::SET_ROUNDING:
2808  return lowerSET_ROUNDING(Op, DAG);
2809  case ISD::VP_ADD:
2810  return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
2811  case ISD::VP_SUB:
2812  return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
2813  case ISD::VP_MUL:
2814  return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
2815  case ISD::VP_SDIV:
2816  return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
2817  case ISD::VP_UDIV:
2818  return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
2819  case ISD::VP_SREM:
2820  return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
2821  case ISD::VP_UREM:
2822  return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
2823  case ISD::VP_AND:
2824  return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
2825  case ISD::VP_OR:
2826  return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
2827  case ISD::VP_XOR:
2828  return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
2829  case ISD::VP_ASHR:
2830  return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
2831  case ISD::VP_LSHR:
2832  return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
2833  case ISD::VP_SHL:
2834  return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
2835  case ISD::VP_FADD:
2836  return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
2837  case ISD::VP_FSUB:
2838  return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
2839  case ISD::VP_FMUL:
2840  return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
2841  case ISD::VP_FDIV:
2842  return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
2843  }
2844 }
2845 
2847  SelectionDAG &DAG, unsigned Flags) {
2848  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
2849 }
2850 
2852  SelectionDAG &DAG, unsigned Flags) {
2853  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
2854  Flags);
2855 }
2856 
2858  SelectionDAG &DAG, unsigned Flags) {
2859  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
2860  N->getOffset(), Flags);
2861 }
2862 
2864  SelectionDAG &DAG, unsigned Flags) {
2865  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
2866 }
2867 
2868 template <class NodeTy>
2869 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
2870  bool IsLocal) const {
2871  SDLoc DL(N);
2872  EVT Ty = getPointerTy(DAG.getDataLayout());
2873 
2874  if (isPositionIndependent()) {
2875  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2876  if (IsLocal)
2877  // Use PC-relative addressing to access the symbol. This generates the
2878  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
2879  // %pcrel_lo(auipc)).
2880  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2881 
2882  // Use PC-relative addressing to access the GOT for this symbol, then load
2883  // the address from the GOT. This generates the pattern (PseudoLA sym),
2884  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
2885  return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
2886  }
2887 
2888  switch (getTargetMachine().getCodeModel()) {
2889  default:
2890  report_fatal_error("Unsupported code model for lowering");
2891  case CodeModel::Small: {
2892  // Generate a sequence for accessing addresses within the first 2 GiB of
2893  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
2894  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
2895  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
2896  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2897  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
2898  }
2899  case CodeModel::Medium: {
2900  // Generate a sequence for accessing addresses within any 2GiB range within
2901  // the address space. This generates the pattern (PseudoLLA sym), which
2902  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
2903  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
2904  return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
2905  }
2906  }
2907 }
2908 
2909 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
2910  SelectionDAG &DAG) const {
2911  SDLoc DL(Op);
2912  EVT Ty = Op.getValueType();
2913  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
2914  int64_t Offset = N->getOffset();
2915  MVT XLenVT = Subtarget.getXLenVT();
2916 
2917  const GlobalValue *GV = N->getGlobal();
2918  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
2919  SDValue Addr = getAddr(N, DAG, IsLocal);
2920 
2921  // In order to maximise the opportunity for common subexpression elimination,
2922  // emit a separate ADD node for the global address offset instead of folding
2923  // it in the global address node. Later peephole optimisations may choose to
2924  // fold it back in when profitable.
2925  if (Offset != 0)
2926  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
2927  DAG.getConstant(Offset, DL, XLenVT));
2928  return Addr;
2929 }
2930 
2931 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
2932  SelectionDAG &DAG) const {
2933  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
2934 
2935  return getAddr(N, DAG);
2936 }
2937 
2938 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
2939  SelectionDAG &DAG) const {
2940  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
2941 
2942  return getAddr(N, DAG);
2943 }
2944 
2945 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
2946  SelectionDAG &DAG) const {
2947  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
2948 
2949  return getAddr(N, DAG);
2950 }
2951 
2952 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
2953  SelectionDAG &DAG,
2954  bool UseGOT) const {
2955  SDLoc DL(N);
2956  EVT Ty = getPointerTy(DAG.getDataLayout());
2957  const GlobalValue *GV = N->getGlobal();
2958  MVT XLenVT = Subtarget.getXLenVT();
2959 
2960  if (UseGOT) {
2961  // Use PC-relative addressing to access the GOT for this TLS symbol, then
2962  // load the address from the GOT and add the thread pointer. This generates
2963  // the pattern (PseudoLA_TLS_IE sym), which expands to
2964  // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
2965  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
2966  SDValue Load =
2967  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
2968 
2969  // Add the thread pointer.
2970  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2971  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
2972  }
2973 
2974  // Generate a sequence for accessing the address relative to the thread
2975  // pointer, with the appropriate adjustment for the thread pointer offset.
2976  // This generates the pattern
2977  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
2978  SDValue AddrHi =
2980  SDValue AddrAdd =
2982  SDValue AddrLo =
2984 
2985  SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
2986  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
2987  SDValue MNAdd = SDValue(
2988  DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
2989  0);
2990  return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
2991 }
2992 
2993 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
2994  SelectionDAG &DAG) const {
2995  SDLoc DL(N);
2996  EVT Ty = getPointerTy(DAG.getDataLayout());
2997  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
2998  const GlobalValue *GV = N->getGlobal();
2999 
3000  // Use a PC-relative addressing mode to access the global dynamic GOT address.
3001  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3002  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3003  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3004  SDValue Load =
3005  SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
3006 
3007  // Prepare argument list to generate call.
3008  ArgListTy Args;
3009  ArgListEntry Entry;
3010  Entry.Node = Load;
3011  Entry.Ty = CallTy;
3012  Args.push_back(Entry);
3013 
3014  // Setup call to __tls_get_addr.
3016  CLI.setDebugLoc(DL)
3017  .setChain(DAG.getEntryNode())
3018  .setLibCallee(CallingConv::C, CallTy,
3019  DAG.getExternalSymbol("__tls_get_addr", Ty),
3020  std::move(Args));
3021 
3022  return LowerCallTo(CLI).first;
3023 }
3024 
3025 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3026  SelectionDAG &DAG) const {
3027  SDLoc DL(Op);
3028  EVT Ty = Op.getValueType();
3029  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3030  int64_t Offset = N->getOffset();
3031  MVT XLenVT = Subtarget.getXLenVT();
3032 
3034 
3037  report_fatal_error("In GHC calling convention TLS is not supported");
3038 
3039  SDValue Addr;
3040  switch (Model) {
3041  case TLSModel::LocalExec:
3042  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3043  break;
3044  case TLSModel::InitialExec:
3045  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3046  break;
3049  Addr = getDynamicTLSAddr(N, DAG);
3050  break;
3051  }
3052 
3053  // In order to maximise the opportunity for common subexpression elimination,
3054  // emit a separate ADD node for the global address offset instead of folding
3055  // it in the global address node. Later peephole optimisations may choose to
3056  // fold it back in when profitable.
3057  if (Offset != 0)
3058  return DAG.getNode(ISD::ADD, DL, Ty, Addr,
3059  DAG.getConstant(Offset, DL, XLenVT));
3060  return Addr;
3061 }
3062 
3063 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3064  SDValue CondV = Op.getOperand(0);
3065  SDValue TrueV = Op.getOperand(1);
3066  SDValue FalseV = Op.getOperand(2);
3067  SDLoc DL(Op);
3068  MVT VT = Op.getSimpleValueType();
3069  MVT XLenVT = Subtarget.getXLenVT();
3070 
3071  // Lower vector SELECTs to VSELECTs by splatting the condition.
3072  if (VT.isVector()) {
3073  MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3074  SDValue CondSplat = VT.isScalableVector()
3075  ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3076  : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3077  return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3078  }
3079 
3080  // If the result type is XLenVT and CondV is the output of a SETCC node
3081  // which also operated on XLenVT inputs, then merge the SETCC node into the
3082  // lowered RISCVISD::SELECT_CC to take advantage of the integer
3083  // compare+branch instructions. i.e.:
3084  // (select (setcc lhs, rhs, cc), truev, falsev)
3085  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3086  if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3087  CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3088  SDValue LHS = CondV.getOperand(0);
3089  SDValue RHS = CondV.getOperand(1);
3090  const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3091  ISD::CondCode CCVal = CC->get();
3092 
3093  // Special case for a select of 2 constants that have a diffence of 1.
3094  // Normally this is done by DAGCombine, but if the select is introduced by
3095  // type legalization or op legalization, we miss it. Restricting to SETLT
3096  // case for now because that is what signed saturating add/sub need.
3097  // FIXME: We don't need the condition to be SETLT or even a SETCC,
3098  // but we would probably want to swap the true/false values if the condition
3099  // is SETGE/SETLE to avoid an XORI.
3100  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3101  CCVal == ISD::SETLT) {
3102  const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3103  const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3104  if (TrueVal - 1 == FalseVal)
3105  return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3106  if (TrueVal + 1 == FalseVal)
3107  return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3108  }
3109 
3110  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3111 
3112  SDValue TargetCC = DAG.getCondCode(CCVal);
3113  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3114  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3115  }
3116 
3117  // Otherwise:
3118  // (select condv, truev, falsev)
3119  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3120  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3121  SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3122 
3123  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3124 
3125  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3126 }
3127 
3128 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3129  SDValue CondV = Op.getOperand(1);
3130  SDLoc DL(Op);
3131  MVT XLenVT = Subtarget.getXLenVT();
3132 
3133  if (CondV.getOpcode() == ISD::SETCC &&
3134  CondV.getOperand(0).getValueType() == XLenVT) {
3135  SDValue LHS = CondV.getOperand(0);
3136  SDValue RHS = CondV.getOperand(1);
3137  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3138 
3139  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3140 
3141  SDValue TargetCC = DAG.getCondCode(CCVal);
3142  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3143  LHS, RHS, TargetCC, Op.getOperand(2));
3144  }
3145 
3146  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3147  CondV, DAG.getConstant(0, DL, XLenVT),
3148  DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3149 }
3150 
3151 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3152  MachineFunction &MF = DAG.getMachineFunction();
3154 
3155  SDLoc DL(Op);
3156  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3157  getPointerTy(MF.getDataLayout()));
3158 
3159  // vastart just stores the address of the VarArgsFrameIndex slot into the
3160  // memory location argument.
3161  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3162  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3163  MachinePointerInfo(SV));
3164 }
3165 
3166 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3167  SelectionDAG &DAG) const {
3168  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3169  MachineFunction &MF = DAG.getMachineFunction();
3170  MachineFrameInfo &MFI = MF.getFrameInfo();
3171  MFI.setFrameAddressIsTaken(true);
3172  Register FrameReg = RI.getFrameRegister(MF);
3173  int XLenInBytes = Subtarget.getXLen() / 8;
3174 
3175  EVT VT = Op.getValueType();
3176  SDLoc DL(Op);
3177  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3178  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3179  while (Depth--) {
3180  int Offset = -(XLenInBytes * 2);
3181  SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3182  DAG.getIntPtrConstant(Offset, DL));
3183  FrameAddr =
3184  DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3185  }
3186  return FrameAddr;
3187 }
3188 
3189 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3190  SelectionDAG &DAG) const {
3191  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3192  MachineFunction &MF = DAG.getMachineFunction();
3193  MachineFrameInfo &MFI = MF.getFrameInfo();
3194  MFI.setReturnAddressIsTaken(true);
3195  MVT XLenVT = Subtarget.getXLenVT();
3196  int XLenInBytes = Subtarget.getXLen() / 8;
3197 
3199  return SDValue();
3200 
3201  EVT VT = Op.getValueType();
3202  SDLoc DL(Op);
3203  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3204  if (Depth) {
3205  int Off = -XLenInBytes;
3206  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3207  SDValue Offset = DAG.getConstant(Off, DL, VT);
3208  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3209  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3210  MachinePointerInfo());
3211  }
3212 
3213  // Return the value of the return address register, marking it an implicit
3214  // live-in.
3215  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3216  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3217 }
3218 
3219 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3220  SelectionDAG &DAG) const {
3221  SDLoc DL(Op);
3222  SDValue Lo = Op.getOperand(0);
3223  SDValue Hi = Op.getOperand(1);
3224  SDValue Shamt = Op.getOperand(2);
3225  EVT VT = Lo.getValueType();
3226 
3227  // if Shamt-XLEN < 0: // Shamt < XLEN
3228  // Lo = Lo << Shamt
3229  // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
3230  // else:
3231  // Lo = 0
3232  // Hi = Lo << (Shamt-XLEN)
3233 
3234  SDValue Zero = DAG.getConstant(0, DL, VT);
3235  SDValue One = DAG.getConstant(1, DL, VT);
3236  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3237  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3238  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3239  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3240 
3241  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3242  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3243  SDValue ShiftRightLo =
3244  DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3245  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3246  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3247  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3248 
3249  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3250 
3251  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3252  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3253 
3254  SDValue Parts[2] = {Lo, Hi};
3255  return DAG.getMergeValues(Parts, DL);
3256 }
3257 
3258 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
3259  bool IsSRA) const {
3260  SDLoc DL(Op);
3261  SDValue Lo = Op.getOperand(0);
3262  SDValue Hi = Op.getOperand(1);
3263  SDValue Shamt = Op.getOperand(2);
3264  EVT VT = Lo.getValueType();
3265 
3266  // SRA expansion:
3267  // if Shamt-XLEN < 0: // Shamt < XLEN
3268  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3269  // Hi = Hi >>s Shamt
3270  // else:
3271  // Lo = Hi >>s (Shamt-XLEN);
3272  // Hi = Hi >>s (XLEN-1)
3273  //
3274  // SRL expansion:
3275  // if Shamt-XLEN < 0: // Shamt < XLEN
3276  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
3277  // Hi = Hi >>u Shamt
3278  // else:
3279  // Lo = Hi >>u (Shamt-XLEN);
3280  // Hi = 0;
3281 
3282  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
3283 
3284  SDValue Zero = DAG.getConstant(0, DL, VT);
3285  SDValue One = DAG.getConstant(1, DL, VT);
3286  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3287  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3288  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3289  SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
3290 
3291  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
3292  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
3293  SDValue ShiftLeftHi =
3294  DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
3295  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
3296  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
3297  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
3298  SDValue HiFalse =
3299  IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
3300 
3301  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3302 
3303  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
3304  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3305 
3306  SDValue Parts[2] = {Lo, Hi};
3307  return DAG.getMergeValues(Parts, DL);
3308 }
3309 
3310 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
3311 // legal equivalently-sized i8 type, so we can use that as a go-between.
3312 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
3313  SelectionDAG &DAG) const {
3314  SDLoc DL(Op);
3315  MVT VT = Op.getSimpleValueType();
3316  SDValue SplatVal = Op.getOperand(0);
3317  // All-zeros or all-ones splats are handled specially.
3318  if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
3319  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3320  return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
3321  }
3322  if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
3323  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
3324  return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
3325  }
3326  MVT XLenVT = Subtarget.getXLenVT();
3327  assert(SplatVal.getValueType() == XLenVT &&
3328  "Unexpected type for i1 splat value");
3329  MVT InterVT = VT.changeVectorElementType(MVT::i8);
3330  SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
3331  DAG.getConstant(1, DL, XLenVT));
3332  SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
3333  SDValue Zero = DAG.getConstant(0, DL, InterVT);
3334  return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
3335 }
3336 
3337 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
3338 // illegal (currently only vXi64 RV32).
3339 // FIXME: We could also catch non-constant sign-extended i32 values and lower
3340 // them to SPLAT_VECTOR_I64
3341 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
3342  SelectionDAG &DAG) const {
3343  SDLoc DL(Op);
3344  MVT VecVT = Op.getSimpleValueType();
3345  assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
3346  "Unexpected SPLAT_VECTOR_PARTS lowering");
3347 
3348  assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
3349  SDValue Lo = Op.getOperand(0);
3350  SDValue Hi = Op.getOperand(1);
3351 
3352  if (VecVT.isFixedLengthVector()) {
3353  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3354  SDLoc DL(Op);
3355  SDValue Mask, VL;
3356  std::tie(Mask, VL) =
3357  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3358 
3359  SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
3360  return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
3361  }
3362 
3363  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3364  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3365  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3366  // If Hi constant is all the same sign bit as Lo, lower this as a custom
3367  // node in order to try and match RVV vector/scalar instructions.
3368  if ((LoC >> 31) == HiC)
3369  return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3370  }
3371 
3372  // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3373  if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3374  isa<ConstantSDNode>(Hi.getOperand(1)) &&
3375  Hi.getConstantOperandVal(1) == 31)
3376  return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
3377 
3378  // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
3379  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
3381 }
3382 
3383 // Custom-lower extensions from mask vectors by using a vselect either with 1
3384 // for zero/any-extension or -1 for sign-extension:
3385 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
3386 // Note that any-extension is lowered identically to zero-extension.
3387 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
3388  int64_t ExtTrueVal) const {
3389  SDLoc DL(Op);
3390  MVT VecVT = Op.getSimpleValueType();
3391  SDValue Src = Op.getOperand(0);
3392  // Only custom-lower extensions from mask types
3393  assert(Src.getValueType().isVector() &&
3394  Src.getValueType().getVectorElementType() == MVT::i1);
3395 
3396  MVT XLenVT = Subtarget.getXLenVT();
3397  SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
3398  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
3399 
3400  if (VecVT.isScalableVector()) {
3401  // Be careful not to introduce illegal scalar types at this stage, and be
3402  // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
3403  // illegal and must be expanded. Since we know that the constants are
3404  // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
3405  bool IsRV32E64 =
3406  !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
3407 
3408  if (!IsRV32E64) {
3409  SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
3410  SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
3411  } else {
3412  SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
3413  SplatTrueVal =
3414  DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
3415  }
3416 
3417  return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
3418  }
3419 
3420  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
3421  MVT I1ContainerVT =
3423 
3424  SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
3425 
3426  SDValue Mask, VL;
3427  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3428 
3429  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
3430  SplatTrueVal =
3431  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
3432  SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
3433  SplatTrueVal, SplatZero, VL);
3434 
3435  return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
3436 }
3437 
3438 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
3439  SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
3440  MVT ExtVT = Op.getSimpleValueType();
3441  // Only custom-lower extensions from fixed-length vector types.
3442  if (!ExtVT.isFixedLengthVector())
3443  return Op;
3444  MVT VT = Op.getOperand(0).getSimpleValueType();
3445  // Grab the canonical container type for the extended type. Infer the smaller
3446  // type from that to ensure the same number of vector elements, as we know
3447  // the LMUL will be sufficient to hold the smaller type.
3448  MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
3449  // Get the extended container type manually to ensure the same number of
3450  // vector elements between source and dest.
3451  MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
3452  ContainerExtVT.getVectorElementCount());
3453 
3454  SDValue Op1 =
3455  convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
3456 
3457  SDLoc DL(Op);
3458  SDValue Mask, VL;
3459  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3460 
3461  SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
3462 
3463  return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
3464 }
3465 
3466 // Custom-lower truncations from vectors to mask vectors by using a mask and a
3467 // setcc operation:
3468 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
3469 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
3470  SelectionDAG &DAG) const {
3471  SDLoc DL(Op);
3472  EVT MaskVT = Op.getValueType();
3473  // Only expect to custom-lower truncations to mask types
3474  assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
3475  "Unexpected type for vector mask lowering");
3476  SDValue Src = Op.getOperand(0);
3477  MVT VecVT = Src.getSimpleValueType();
3478 
3479  // If this is a fixed vector, we need to convert it to a scalable vector.
3480  MVT ContainerVT = VecVT;
3481  if (VecVT.isFixedLengthVector()) {
3482  ContainerVT = getContainerForFixedLengthVector(VecVT);
3483  Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3484  }
3485 
3486  SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
3487  SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
3488 
3489  SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
3490  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
3491 
3492  if (VecVT.isScalableVector()) {
3493  SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
3494  return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
3495  }
3496 
3497  SDValue Mask, VL;
3498  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3499 
3500  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
3501  SDValue Trunc =
3502  DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
3503  Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
3504  DAG.getCondCode(ISD::SETNE), Mask, VL);
3505  return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
3506 }
3507 
3508 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
3509 // first position of a vector, and that vector is slid up to the insert index.
3510 // By limiting the active vector length to index+1 and merging with the
3511 // original vector (with an undisturbed tail policy for elements >= VL), we
3512 // achieve the desired result of leaving all elements untouched except the one
3513 // at VL-1, which is replaced with the desired value.
3514 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3515  SelectionDAG &DAG) const {
3516  SDLoc DL(Op);
3517  MVT VecVT = Op.getSimpleValueType();
3518  SDValue Vec = Op.getOperand(0);
3519  SDValue Val = Op.getOperand(1);
3520  SDValue Idx = Op.getOperand(2);
3521 
3522  if (VecVT.getVectorElementType() == MVT::i1) {
3523  // FIXME: For now we just promote to an i8 vector and insert into that,
3524  // but this is probably not optimal.
3526  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3527  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
3528  return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
3529  }
3530 
3531  MVT ContainerVT = VecVT;
3532  // If the operand is a fixed-length vector, convert to a scalable one.
3533  if (VecVT.isFixedLengthVector()) {
3534  ContainerVT = getContainerForFixedLengthVector(VecVT);
3535  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3536  }
3537 
3538  MVT XLenVT = Subtarget.getXLenVT();
3539 
3540  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3541  bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
3542  // Even i64-element vectors on RV32 can be lowered without scalar
3543  // legalization if the most-significant 32 bits of the value are not affected
3544  // by the sign-extension of the lower 32 bits.
3545  // TODO: We could also catch sign extensions of a 32-bit value.
3546  if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
3547  const auto *CVal = cast<ConstantSDNode>(Val);
3548  if (isInt<32>(CVal->getSExtValue())) {
3549  IsLegalInsert = true;
3550  Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3551  }
3552  }
3553 
3554  SDValue Mask, VL;
3555  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3556 
3557  SDValue ValInVec;
3558 
3559  if (IsLegalInsert) {
3560  unsigned Opc =
3562  if (isNullConstant(Idx)) {
3563  Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
3564  if (!VecVT.isFixedLengthVector())
3565  return Vec;
3566  return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
3567  }
3568  ValInVec =
3569  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
3570  } else {
3571  // On RV32, i64-element vectors must be specially handled to place the
3572  // value at element 0, by using two vslide1up instructions in sequence on
3573  // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
3574  // this.
3575  SDValue One = DAG.getConstant(1, DL, XLenVT);
3576  SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
3577  SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
3578  MVT I32ContainerVT =
3579  MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
3580  SDValue I32Mask =
3581  getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
3582  // Limit the active VL to two.
3583  SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
3584  // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
3585  // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
3586  ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
3587  InsertI64VL);
3588  // First slide in the hi value, then the lo in underneath it.
3589  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3590  ValHi, I32Mask, InsertI64VL);
3591  ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
3592  ValLo, I32Mask, InsertI64VL);
3593  // Bitcast back to the right container type.
3594  ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
3595  }
3596 
3597  // Now that the value is in a vector, slide it into position.
3598  SDValue InsertVL =
3599  DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
3600  SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
3601  ValInVec, Idx, Mask, InsertVL);
3602  if (!VecVT.isFixedLengthVector())
3603  return Slideup;
3604  return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
3605 }
3606 
3607 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
3608 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
3609 // types this is done using VMV_X_S to allow us to glean information about the
3610 // sign bits of the result.
3611 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3612  SelectionDAG &DAG) const {
3613  SDLoc DL(Op);
3614  SDValue Idx = Op.getOperand(1);
3615  SDValue Vec = Op.getOperand(0);
3616  EVT EltVT = Op.getValueType();
3617  MVT VecVT = Vec.getSimpleValueType();
3618  MVT XLenVT = Subtarget.getXLenVT();
3619 
3620  if (VecVT.getVectorElementType() == MVT::i1) {
3621  // FIXME: For now we just promote to an i8 vector and extract from that,
3622  // but this is probably not optimal.
3624  Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
3625  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
3626  }
3627 
3628  // If this is a fixed vector, we need to convert it to a scalable vector.
3629  MVT ContainerVT = VecVT;
3630  if (VecVT.isFixedLengthVector()) {
3631  ContainerVT = getContainerForFixedLengthVector(VecVT);
3632  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3633  }
3634 
3635  // If the index is 0, the vector is already in the right position.
3636  if (!isNullConstant(Idx)) {
3637  // Use a VL of 1 to avoid processing more elements than we need.
3638  SDValue VL = DAG.getConstant(1, DL, XLenVT);
3639  MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3640  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3641  Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
3642  DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
3643  }
3644 
3645  if (!EltVT.isInteger()) {
3646  // Floating-point extracts are handled in TableGen.
3647  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
3648  DAG.getConstant(0, DL, XLenVT));
3649  }
3650 
3651  SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
3652  return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
3653 }
3654 
3655 // Some RVV intrinsics may claim that they want an integer operand to be
3656 // promoted or expanded.
3658  const RISCVSubtarget &Subtarget) {
3659  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3660  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
3661  "Unexpected opcode");
3662 
3663  if (!Subtarget.hasStdExtV())
3664  return SDValue();
3665 
3666  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
3667  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
3668  SDLoc DL(Op);
3669 
3671  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
3672  if (!II || !II->SplatOperand)
3673  return SDValue();
3674 
3675  unsigned SplatOp = II->SplatOperand + HasChain;
3676  assert(SplatOp < Op.getNumOperands());
3677 
3678  SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
3679  SDValue &ScalarOp = Operands[SplatOp];
3680  MVT OpVT = ScalarOp.getSimpleValueType();
3681  MVT XLenVT = Subtarget.getXLenVT();
3682 
3683  // If this isn't a scalar, or its type is XLenVT we're done.
3684  if (!OpVT.isScalarInteger() || OpVT == XLenVT)
3685  return SDValue();
3686 
3687  // Simplest case is that the operand needs to be promoted to XLenVT.
3688  if (OpVT.bitsLT(XLenVT)) {
3689  // If the operand is a constant, sign extend to increase our chances
3690  // of being able to use a .vi instruction. ANY_EXTEND would become a
3691  // a zero extend and the simm5 check in isel would fail.
3692  // FIXME: Should we ignore the upper bits in isel instead?
3693  unsigned ExtOpc =
3694  isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3695  ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
3696  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3697  }
3698 
3699  // Use the previous operand to get the vXi64 VT. The result might be a mask
3700  // VT for compares. Using the previous operand assumes that the previous
3701  // operand will never have a smaller element size than a scalar operand and
3702  // that a widening operation never uses SEW=64.
3703  // NOTE: If this fails the below assert, we can probably just find the
3704  // element count from any operand or result and use it to construct the VT.
3705  assert(II->SplatOperand > 1 && "Unexpected splat operand!");
3706  MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
3707 
3708  // The more complex case is when the scalar is larger than XLenVT.
3709  assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
3710  VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
3711 
3712  // If this is a sign-extended 32-bit constant, we can truncate it and rely
3713  // on the instruction to sign-extend since SEW>XLEN.
3714  if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
3715  if (isInt<32>(CVal->getSExtValue())) {
3716  ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
3717  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3718  }
3719  }
3720 
3721  // We need to convert the scalar to a splat vector.
3722  // FIXME: Can we implicitly truncate the scalar if it is known to
3723  // be sign extended?
3724  // VL should be the last operand.
3725  SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
3726  assert(VL.getValueType() == XLenVT);
3727  ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
3728  return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
3729 }
3730 
3731 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3732  SelectionDAG &DAG) const {
3733  unsigned IntNo = Op.getConstantOperandVal(0);
3734  SDLoc DL(Op);
3735  MVT XLenVT = Subtarget.getXLenVT();
3736 
3737  switch (IntNo) {
3738  default:
3739  break; // Don't custom lower most intrinsics.
3740  case Intrinsic::thread_pointer: {
3741  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3742  return DAG.getRegister(RISCV::X4, PtrVT);
3743  }
3744  case Intrinsic::riscv_orc_b:
3745  // Lower to the GORCI encoding for orc.b.
3746  return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
3747  DAG.getConstant(7, DL, XLenVT));
3748  case Intrinsic::riscv_grev:
3749  case Intrinsic::riscv_gorc: {
3750  unsigned Opc =
3751  IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
3752  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3753  }
3754  case Intrinsic::riscv_shfl:
3755  case Intrinsic::riscv_unshfl: {
3756  unsigned Opc =
3757  IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
3758  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3759  }
3760  case Intrinsic::riscv_bcompress:
3761  case Intrinsic::riscv_bdecompress: {
3762  unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
3764  return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
3765  }
3766  case Intrinsic::riscv_vmv_x_s:
3767  assert(Op.getValueType() == XLenVT && "Unexpected VT!");
3768  return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
3769  Op.getOperand(1));
3770  case Intrinsic::riscv_vmv_v_x:
3771  return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
3772  Op.getSimpleValueType(), DL, DAG, Subtarget);
3773  case Intrinsic::riscv_vfmv_v_f:
3774  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
3775  Op.getOperand(1), Op.getOperand(2));
3776  case Intrinsic::riscv_vmv_s_x: {
3777  SDValue Scalar = Op.getOperand(2);
3778 
3779  if (Scalar.getValueType().bitsLE(XLenVT)) {
3780  Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
3781  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
3782  Op.getOperand(1), Scalar, Op.getOperand(3));
3783  }
3784 
3785  assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
3786 
3787  // This is an i64 value that lives in two scalar registers. We have to
3788  // insert this in a convoluted way. First we build vXi64 splat containing
3789  // the/ two values that we assemble using some bit math. Next we'll use
3790  // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
3791  // to merge element 0 from our splat into the source vector.
3792  // FIXME: This is probably not the best way to do this, but it is
3793  // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
3794  // point.
3795  // sw lo, (a0)
3796  // sw hi, 4(a0)
3797  // vlse vX, (a0)
3798  //
3799  // vid.v vVid
3800  // vmseq.vx mMask, vVid, 0
3801  // vmerge.vvm vDest, vSrc, vVal, mMask
3802  MVT VT = Op.getSimpleValueType();
3803  SDValue Vec = Op.getOperand(1);
3804  SDValue VL = Op.getOperand(3);
3805 
3806  SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
3807  SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
3808  DAG.getConstant(0, DL, MVT::i32), VL);
3809 
3811  SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
3812  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
3813  SDValue SelectCond =
3814  DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
3815  DAG.getCondCode(ISD::SETEQ), Mask, VL);
3816  return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
3817  Vec, VL);
3818  }
3819  case Intrinsic::riscv_vslide1up:
3820  case Intrinsic::riscv_vslide1down:
3821  case Intrinsic::riscv_vslide1up_mask:
3822  case Intrinsic::riscv_vslide1down_mask: {
3823  // We need to special case these when the scalar is larger than XLen.
3824  unsigned NumOps = Op.getNumOperands();
3825  bool IsMasked = NumOps == 6;
3826  unsigned OpOffset = IsMasked ? 1 : 0;
3827  SDValue Scalar = Op.getOperand(2 + OpOffset);
3828  if (Scalar.getValueType().bitsLE(XLenVT))
3829  break;
3830 
3831  // Splatting a sign extended constant is fine.
3832  if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
3833  if (isInt<32>(CVal->getSExtValue()))
3834  break;
3835 
3836  MVT VT = Op.getSimpleValueType();
3838  Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
3839 
3840  // Convert the vector source to the equivalent nxvXi32 vector.
3842  SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
3843 
3844  SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3845  DAG.getConstant(0, DL, XLenVT));
3846  SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
3847  DAG.getConstant(1, DL, XLenVT));
3848 
3849  // Double the VL since we halved SEW.
3850  SDValue VL = Op.getOperand(NumOps - 1);
3851  SDValue I32VL =
3852  DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
3853 
3854  MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
3855  SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
3856 
3857  // Shift the two scalar parts in using SEW=32 slide1up/slide1down
3858  // instructions.
3859  if (IntNo == Intrinsic::riscv_vslide1up ||
3860  IntNo == Intrinsic::riscv_vslide1up_mask) {
3861  Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
3862  I32Mask, I32VL);
3863  Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
3864  I32Mask, I32VL);
3865  } else {
3866  Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
3867  I32Mask, I32VL);
3868  Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
3869  I32Mask, I32VL);
3870  }
3871 
3872  // Convert back to nxvXi64.
3873  Vec = DAG.getBitcast(VT, Vec);
3874 
3875  if (!IsMasked)
3876  return Vec;
3877 
3878  // Apply mask after the operation.
3879  SDValue Mask = Op.getOperand(NumOps - 2);
3880  SDValue MaskedOff = Op.getOperand(1);
3881  return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
3882  }
3883  }
3884 
3885  return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3886 }
3887 
3888 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
3889  SelectionDAG &DAG) const {
3890  return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
3891 }
3892 
3893 static MVT getLMUL1VT(MVT VT) {
3894  assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
3895  "Unexpected vector MVT");
3896  return MVT::getScalableVectorVT(
3897  VT.getVectorElementType(),
3899 }
3900 
3901 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
3902  switch (ISDOpcode) {
3903  default:
3904  llvm_unreachable("Unhandled reduction");
3905  case ISD::VECREDUCE_ADD:
3907  case ISD::VECREDUCE_UMAX:
3909  case ISD::VECREDUCE_SMAX:
3911  case ISD::VECREDUCE_UMIN:
3913  case ISD::VECREDUCE_SMIN:
3915  case ISD::VECREDUCE_AND:
3917  case ISD::VECREDUCE_OR:
3919  case ISD::VECREDUCE_XOR:
3921  }
3922 }
3923 
3924 SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
3925  SelectionDAG &DAG) const {
3926  SDLoc DL(Op);
3927  SDValue Vec = Op.getOperand(0);
3928  MVT VecVT = Vec.getSimpleValueType();
3929  assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
3930  Op.getOpcode() == ISD::VECREDUCE_OR ||
3931  Op.getOpcode() == ISD::VECREDUCE_XOR) &&
3932  "Unexpected reduction lowering");
3933 
3934  MVT XLenVT = Subtarget.getXLenVT();
3935  assert(Op.getValueType() == XLenVT &&
3936  "Expected reduction output to be legalized to XLenVT");
3937 
3938  MVT ContainerVT = VecVT;
3939  if (VecVT.isFixedLengthVector()) {
3940  ContainerVT = getContainerForFixedLengthVector(VecVT);
3941  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3942  }
3943 
3944  SDValue Mask, VL;
3945  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
3946  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3947 
3948  switch (Op.getOpcode()) {
3949  default:
3950  llvm_unreachable("Unhandled reduction");
3951  case ISD::VECREDUCE_AND:
3952  // vpopc ~x == 0
3953  Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
3954  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3955  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
3956  case ISD::VECREDUCE_OR:
3957  // vpopc x != 0
3958  Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
3959  return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
3960  case ISD::VECREDUCE_XOR: {
3961  // ((vpopc x) & 1) != 0
3962  SDValue One = DAG.getConstant(1, DL, XLenVT);
3963  Vec = DAG.getNode(RISCVISD::VPOPC