LLVM  15.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RISCVISelLowering.h"
16 #include "RISCV.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
31 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/IntrinsicsRISCV.h"
35 #include "llvm/IR/PatternMatch.h"
36 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/KnownBits.h"
41 
42 using namespace llvm;
43 
44 #define DEBUG_TYPE "riscv-lower"
45 
46 STATISTIC(NumTailCalls, "Number of tail calls");
47 
49  const RISCVSubtarget &STI)
50  : TargetLowering(TM), Subtarget(STI) {
51 
52  if (Subtarget.isRV32E())
53  report_fatal_error("Codegen not yet implemented for RV32E");
54 
55  RISCVABI::ABI ABI = Subtarget.getTargetABI();
56  assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
57 
59  !Subtarget.hasStdExtF()) {
60  errs() << "Hard-float 'f' ABI can't be used for a target that "
61  "doesn't support the F instruction set extension (ignoring "
62  "target-abi)\n";
64  } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
65  !Subtarget.hasStdExtD()) {
66  errs() << "Hard-float 'd' ABI can't be used for a target that "
67  "doesn't support the D instruction set extension (ignoring "
68  "target-abi)\n";
70  }
71 
72  switch (ABI) {
73  default:
74  report_fatal_error("Don't know how to lower this ABI");
78  case RISCVABI::ABI_LP64:
81  break;
82  }
83 
84  MVT XLenVT = Subtarget.getXLenVT();
85 
86  // Set up the register classes.
87  addRegisterClass(XLenVT, &RISCV::GPRRegClass);
88 
89  if (Subtarget.hasStdExtZfh())
90  addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
91  if (Subtarget.hasStdExtF())
92  addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
93  if (Subtarget.hasStdExtD())
94  addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
95 
96  static const MVT::SimpleValueType BoolVecVTs[] = {
99  static const MVT::SimpleValueType IntVecVTs[] = {
105  static const MVT::SimpleValueType F16VecVTs[] = {
108  static const MVT::SimpleValueType F32VecVTs[] = {
110  static const MVT::SimpleValueType F64VecVTs[] = {
112 
113  if (Subtarget.hasVInstructions()) {
114  auto addRegClassForRVV = [this](MVT VT) {
115  // Disable the smallest fractional LMUL types if ELEN is less than
116  // RVVBitsPerBlock.
117  unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
118  if (VT.getVectorMinNumElements() < MinElts)
119  return;
120 
121  unsigned Size = VT.getSizeInBits().getKnownMinValue();
122  const TargetRegisterClass *RC;
123  if (Size <= RISCV::RVVBitsPerBlock)
124  RC = &RISCV::VRRegClass;
125  else if (Size == 2 * RISCV::RVVBitsPerBlock)
126  RC = &RISCV::VRM2RegClass;
127  else if (Size == 4 * RISCV::RVVBitsPerBlock)
128  RC = &RISCV::VRM4RegClass;
129  else if (Size == 8 * RISCV::RVVBitsPerBlock)
130  RC = &RISCV::VRM8RegClass;
131  else
132  llvm_unreachable("Unexpected size");
133 
134  addRegisterClass(VT, RC);
135  };
136 
137  for (MVT VT : BoolVecVTs)
138  addRegClassForRVV(VT);
139  for (MVT VT : IntVecVTs) {
140  if (VT.getVectorElementType() == MVT::i64 &&
141  !Subtarget.hasVInstructionsI64())
142  continue;
143  addRegClassForRVV(VT);
144  }
145 
146  if (Subtarget.hasVInstructionsF16())
147  for (MVT VT : F16VecVTs)
148  addRegClassForRVV(VT);
149 
150  if (Subtarget.hasVInstructionsF32())
151  for (MVT VT : F32VecVTs)
152  addRegClassForRVV(VT);
153 
154  if (Subtarget.hasVInstructionsF64())
155  for (MVT VT : F64VecVTs)
156  addRegClassForRVV(VT);
157 
158  if (Subtarget.useRVVForFixedLengthVectors()) {
159  auto addRegClassForFixedVectors = [this](MVT VT) {
160  MVT ContainerVT = getContainerForFixedLengthVector(VT);
161  unsigned RCID = getRegClassIDForVecVT(ContainerVT);
162  const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
163  addRegisterClass(VT, TRI.getRegClass(RCID));
164  };
166  if (useRVVForFixedLengthVectorVT(VT))
167  addRegClassForFixedVectors(VT);
168 
170  if (useRVVForFixedLengthVectorVT(VT))
171  addRegClassForFixedVectors(VT);
172  }
173  }
174 
175  // Compute derived properties from the register classes.
177 
179 
181  MVT::i1, Promote);
182 
183  // TODO: add all necessary setOperationAction calls.
185 
190 
192 
195 
197 
199 
200  if (!Subtarget.hasStdExtZbb())
202 
203  if (Subtarget.is64Bit()) {
205 
207  MVT::i32, Custom);
208 
210  MVT::i32, Custom);
211  } else {
213  {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
214  nullptr);
215  setLibcallName(RTLIB::MULO_I64, nullptr);
216  }
217 
218  if (!Subtarget.hasStdExtM()) {
221  XLenVT, Expand);
222  } else {
223  if (Subtarget.is64Bit()) {
225 
228  } else {
230  }
231  }
232 
235  Expand);
236 
238  Custom);
239 
240  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
241  Subtarget.hasStdExtZbkb()) {
242  if (Subtarget.is64Bit())
244  } else {
246  }
247 
248  if (Subtarget.hasStdExtZbp()) {
249  // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
250  // more combining.
252 
253  // BSWAP i8 doesn't exist.
255 
257 
258  if (Subtarget.is64Bit())
260  } else {
261  // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
262  // pattern match it directly in isel.
264  (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
265  ? Legal
266  : Expand);
267  // Zbkb can use rev8+brev8 to implement bitreverse.
269  Subtarget.hasStdExtZbkb() ? Custom : Expand);
270  }
271 
272  if (Subtarget.hasStdExtZbb()) {
274  Legal);
275 
276  if (Subtarget.is64Bit())
279  MVT::i32, Custom);
280  } else {
282 
283  if (Subtarget.is64Bit())
285  }
286 
287  if (Subtarget.hasStdExtZbt()) {
290 
291  if (Subtarget.is64Bit())
293  } else {
295  }
296 
297  static constexpr ISD::NodeType FPLegalNodeTypes[] = {
304 
305  static const ISD::CondCode FPCCToExpand[] = {
309 
310  static const ISD::NodeType FPOpToExpand[] = {
313 
314  if (Subtarget.hasStdExtZfh())
316 
317  if (Subtarget.hasStdExtZfh()) {
318  for (auto NT : FPLegalNodeTypes)
322  setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
326 
332  MVT::f16, Promote);
333 
334  // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
335  // complete support for all operations in LegalizeDAG.
336 
337  // We need to custom promote this.
338  if (Subtarget.is64Bit())
340  }
341 
342  if (Subtarget.hasStdExtF()) {
343  for (auto NT : FPLegalNodeTypes)
345  setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
349  for (auto Op : FPOpToExpand)
353  }
354 
355  if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
357 
358  if (Subtarget.hasStdExtD()) {
359  for (auto NT : FPLegalNodeTypes)
363  setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
369  for (auto Op : FPOpToExpand)
373  }
374 
375  if (Subtarget.is64Bit())
378  MVT::i32, Custom);
379 
380  if (Subtarget.hasStdExtF()) {
382  Custom);
383 
386  XLenVT, Legal);
387 
390  }
391 
394  XLenVT, Custom);
395 
397 
398  if (Subtarget.is64Bit())
400 
401  // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
402  // Unfortunately this can't be determined just from the ISA naming string.
404  Subtarget.is64Bit() ? Legal : Custom);
405 
408  if (Subtarget.is64Bit())
410 
411  if (Subtarget.hasStdExtA()) {
414  } else {
416  }
417 
419 
420  if (Subtarget.hasVInstructions()) {
422 
424 
425  // RVV intrinsics may have illegal operands.
426  // We also need to custom legalize vmv.x.s.
428  {MVT::i8, MVT::i16}, Custom);
429  if (Subtarget.is64Bit())
431  else
433  MVT::i64, Custom);
434 
436  MVT::Other, Custom);
437 
438  static const unsigned IntegerVPOps[] = {
439  ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
440  ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
441  ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
442  ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
443  ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
444  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
445  ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
446  ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FPTOSI,
447  ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
448  ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE};
449 
450  static const unsigned FloatingPointVPOps[] = {
451  ISD::VP_FADD, ISD::VP_FSUB,
452  ISD::VP_FMUL, ISD::VP_FDIV,
453  ISD::VP_FNEG, ISD::VP_FMA,
454  ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
455  ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
456  ISD::VP_MERGE, ISD::VP_SELECT,
457  ISD::VP_SITOFP, ISD::VP_UITOFP,
458  ISD::VP_SETCC, ISD::VP_FP_ROUND,
459  ISD::VP_FP_EXTEND};
460 
461  if (!Subtarget.is64Bit()) {
462  // We must custom-lower certain vXi64 operations on RV32 due to the vector
463  // element type being illegal.
465  MVT::i64, Custom);
466 
471  MVT::i64, Custom);
472 
473  setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
474  ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
475  ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
476  ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
477  MVT::i64, Custom);
478  }
479 
480  for (MVT VT : BoolVecVTs) {
481  if (!isTypeLegal(VT))
482  continue;
483 
485 
486  // Mask VTs are custom-expanded into a series of standard nodes
489  VT, Custom);
490 
492  Custom);
493 
496  {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
497  Expand);
498 
499  setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
500 
503  Custom);
504 
506  {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
507  Custom);
508 
509  // RVV has native int->float & float->int conversions where the
510  // element type sizes are within one power-of-two of each other. Any
511  // wider distances between type sizes have to be lowered as sequences
512  // which progressively narrow the gap in stages.
515  VT, Custom);
516 
517  // Expand all extending loads to types larger than this, and truncating
518  // stores from types larger than this.
519  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
520  setTruncStoreAction(OtherVT, VT, Expand);
522  VT, Expand);
523  }
524 
526  {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT,
527  Custom);
529  }
530 
531  for (MVT VT : IntVecVTs) {
532  if (!isTypeLegal(VT))
533  continue;
534 
537 
538  // Vectors implement MULHS/MULHU.
540 
541  // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
542  if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
544 
546  Legal);
547 
549 
551  Expand);
552 
554 
555  // Custom-lower extensions and truncations from/to mask types.
557  VT, Custom);
558 
559  // RVV has native int->float & float->int conversions where the
560  // element type sizes are within one power-of-two of each other. Any
561  // wider distances between type sizes have to be lowered as sequences
562  // which progressively narrow the gap in stages.
565  VT, Custom);
566 
569 
570  // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
571  // nodes which truncate by one power of two at a time.
573 
574  // Custom-lower insert/extract operations to simplify patterns.
576  Custom);
577 
578  // Custom-lower reduction operations to set up the corresponding custom
579  // nodes' operands.
584  VT, Custom);
585 
586  setOperationAction(IntegerVPOps, VT, Custom);
587 
589 
591  VT, Custom);
592 
594  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
595  Custom);
596 
599  VT, Custom);
600 
603 
605 
606  for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
607  setTruncStoreAction(VT, OtherVT, Expand);
609  VT, Expand);
610  }
611 
612  // Splice
614 
615  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
616  // type that can represent the value exactly.
617  if (VT.getVectorElementType() != MVT::i64) {
618  MVT FloatEltVT =
620  EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
621  if (isTypeLegal(FloatVT)) {
623  Custom);
624  }
625  }
626  }
627 
628  // Expand various CCs to best match the RVV ISA, which natively supports UNE
629  // but no other unordered comparisons, and supports all ordered comparisons
630  // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
631  // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
632  // and we pattern-match those back to the "original", swapping operands once
633  // more. This way we catch both operations and both "vf" and "fv" forms with
634  // fewer patterns.
635  static const ISD::CondCode VFPCCToExpand[] = {
639  };
640 
641  // Sets common operation actions on RVV floating-point vector types.
642  const auto SetCommonVFPActions = [&](MVT VT) {
644  // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
645  // sizes are within one power-of-two of each other. Therefore conversions
646  // between vXf16 and vXf64 must be lowered as sequences which convert via
647  // vXf32.
649  // Custom-lower insert/extract operations to simplify patterns.
651  Custom);
652  // Expand various condition codes (explained above).
653  setCondCodeAction(VFPCCToExpand, VT, Expand);
654 
656 
658  VT, Custom);
659 
662  VT, Custom);
663 
664  // Expand FP operations that need libcalls.
677 
682 
684 
686 
688  VT, Custom);
689 
691  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
692  Custom);
693 
696 
699  VT, Custom);
700 
702 
703  setOperationAction(FloatingPointVPOps, VT, Custom);
704  };
705 
706  // Sets common extload/truncstore actions on RVV floating-point vector
707  // types.
708  const auto SetCommonVFPExtLoadTruncStoreActions =
709  [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
710  for (auto SmallVT : SmallerVTs) {
711  setTruncStoreAction(VT, SmallVT, Expand);
712  setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
713  }
714  };
715 
716  if (Subtarget.hasVInstructionsF16()) {
717  for (MVT VT : F16VecVTs) {
718  if (!isTypeLegal(VT))
719  continue;
720  SetCommonVFPActions(VT);
721  }
722  }
723 
724  if (Subtarget.hasVInstructionsF32()) {
725  for (MVT VT : F32VecVTs) {
726  if (!isTypeLegal(VT))
727  continue;
728  SetCommonVFPActions(VT);
729  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
730  }
731  }
732 
733  if (Subtarget.hasVInstructionsF64()) {
734  for (MVT VT : F64VecVTs) {
735  if (!isTypeLegal(VT))
736  continue;
737  SetCommonVFPActions(VT);
738  SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
739  SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
740  }
741  }
742 
743  if (Subtarget.useRVVForFixedLengthVectors()) {
745  if (!useRVVForFixedLengthVectorVT(VT))
746  continue;
747 
748  // By default everything must be expanded.
749  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
751  for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
752  setTruncStoreAction(VT, OtherVT, Expand);
754  OtherVT, VT, Expand);
755  }
756 
757  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
759  Custom);
760 
762  Custom);
763 
765  VT, Custom);
766 
768 
770 
772 
774 
776 
779  Custom);
780 
782  {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
783  Custom);
784 
787  VT, Custom);
788 
789  // Operations below are different for between masks and other vectors.
790  if (VT.getVectorElementType() == MVT::i1) {
791  setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
792  ISD::OR, ISD::XOR},
793  VT, Custom);
794 
796  {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_TRUNCATE},
797  VT, Custom);
798  continue;
799  }
800 
801  // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
802  // it before type legalization for i64 vectors on RV32. It will then be
803  // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
804  // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
805  // improvements first.
806  if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
809  }
810 
813 
816 
818  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
819  Custom);
820 
824  VT, Custom);
825 
828 
829  // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
830  if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
832 
835  Custom);
836 
839 
842 
843  // Custom-lower reduction operations to set up the corresponding custom
844  // nodes' operands.
848  VT, Custom);
849 
850  setOperationAction(IntegerVPOps, VT, Custom);
851 
852  // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
853  // type that can represent the value exactly.
854  if (VT.getVectorElementType() != MVT::i64) {
855  MVT FloatEltVT =
857  EVT FloatVT =
858  MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
859  if (isTypeLegal(FloatVT))
861  Custom);
862  }
863  }
864 
866  if (!useRVVForFixedLengthVectorVT(VT))
867  continue;
868 
869  // By default everything must be expanded.
870  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
872  for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
873  setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
874  setTruncStoreAction(VT, OtherVT, Expand);
875  }
876 
877  // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
879  Custom);
880 
884  VT, Custom);
885 
888  VT, Custom);
889 
891  {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
892  Custom);
893 
897  VT, Custom);
898 
900 
902  VT, Custom);
903 
904  for (auto CC : VFPCCToExpand)
905  setCondCodeAction(CC, VT, Expand);
906 
909 
911 
914  VT, Custom);
915 
916  setOperationAction(FloatingPointVPOps, VT, Custom);
917  }
918 
919  // Custom-legalize bitcasts from fixed-length vectors to scalar types.
921  Custom);
922  if (Subtarget.hasStdExtZfh())
924  if (Subtarget.hasStdExtF())
926  if (Subtarget.hasStdExtD())
928  }
929  }
930 
931  // Function alignments.
932  const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
933  setMinFunctionAlignment(FunctionAlignment);
934  setPrefFunctionAlignment(FunctionAlignment);
935 
937 
938  // Jumps are expensive, compared to logic
940 
942  ISD::OR, ISD::XOR});
943  if (Subtarget.is64Bit())
945 
946  if (Subtarget.hasStdExtF())
948 
949  if (Subtarget.hasStdExtZbp())
951 
952  if (Subtarget.hasStdExtZbb())
954 
955  if (Subtarget.hasStdExtZbkb())
957  if (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZbb())
959  if (Subtarget.hasStdExtF())
962  if (Subtarget.hasVInstructions())
964  ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
966  if (Subtarget.useRVVForFixedLengthVectors())
968 
969  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
970  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
971 }
972 
975  EVT VT) const {
976  if (!VT.isVector())
977  return getPointerTy(DL);
978  if (Subtarget.hasVInstructions() &&
979  (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
982 }
983 
984 MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
985  return Subtarget.getXLenVT();
986 }
987 
989  const CallInst &I,
990  MachineFunction &MF,
991  unsigned Intrinsic) const {
992  auto &DL = I.getModule()->getDataLayout();
993  switch (Intrinsic) {
994  default:
995  return false;
996  case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
997  case Intrinsic::riscv_masked_atomicrmw_add_i32:
998  case Intrinsic::riscv_masked_atomicrmw_sub_i32:
999  case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1000  case Intrinsic::riscv_masked_atomicrmw_max_i32:
1001  case Intrinsic::riscv_masked_atomicrmw_min_i32:
1002  case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1003  case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1004  case Intrinsic::riscv_masked_cmpxchg_i32:
1006  Info.memVT = MVT::i32;
1007  Info.ptrVal = I.getArgOperand(0);
1008  Info.offset = 0;
1009  Info.align = Align(4);
1012  return true;
1013  case Intrinsic::riscv_masked_strided_load:
1015  Info.ptrVal = I.getArgOperand(1);
1016  Info.memVT = getValueType(DL, I.getType()->getScalarType());
1017  Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
1020  return true;
1021  case Intrinsic::riscv_masked_strided_store:
1022  Info.opc = ISD::INTRINSIC_VOID;
1023  Info.ptrVal = I.getArgOperand(1);
1024  Info.memVT =
1025  getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
1026  Info.align = Align(
1027  DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
1028  8);
1031  return true;
1032  case Intrinsic::riscv_seg2_load:
1033  case Intrinsic::riscv_seg3_load:
1034  case Intrinsic::riscv_seg4_load:
1035  case Intrinsic::riscv_seg5_load:
1036  case Intrinsic::riscv_seg6_load:
1037  case Intrinsic::riscv_seg7_load:
1038  case Intrinsic::riscv_seg8_load:
1040  Info.ptrVal = I.getArgOperand(0);
1041  Info.memVT =
1042  getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
1043  Info.align =
1044  Align(DL.getTypeSizeInBits(
1045  I.getType()->getStructElementType(0)->getScalarType()) /
1046  8);
1049  return true;
1050  }
1051 }
1052 
1054  const AddrMode &AM, Type *Ty,
1055  unsigned AS,
1056  Instruction *I) const {
1057  // No global is ever allowed as a base.
1058  if (AM.BaseGV)
1059  return false;
1060 
1061  // RVV instructions only support register addressing.
1062  if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1063  return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1064 
1065  // Require a 12-bit signed offset.
1066  if (!isInt<12>(AM.BaseOffs))
1067  return false;
1068 
1069  switch (AM.Scale) {
1070  case 0: // "r+i" or just "i", depending on HasBaseReg.
1071  break;
1072  case 1:
1073  if (!AM.HasBaseReg) // allow "r+i".
1074  break;
1075  return false; // disallow "r+r" or "r+r+i".
1076  default:
1077  return false;
1078  }
1079 
1080  return true;
1081 }
1082 
1084  return isInt<12>(Imm);
1085 }
1086 
1088  return isInt<12>(Imm);
1089 }
1090 
1091 // On RV32, 64-bit integers are split into their high and low parts and held
1092 // in two different registers, so the trunc is free since the low register can
1093 // just be used.
1094 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
1095  if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1096  return false;
1097  unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1098  unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1099  return (SrcBits == 64 && DestBits == 32);
1100 }
1101 
1102 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
1103  if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
1104  !SrcVT.isInteger() || !DstVT.isInteger())
1105  return false;
1106  unsigned SrcBits = SrcVT.getSizeInBits();
1107  unsigned DestBits = DstVT.getSizeInBits();
1108  return (SrcBits == 64 && DestBits == 32);
1109 }
1110 
1112  // Zexts are free if they can be combined with a load.
1113  // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1114  // poorly with type legalization of compares preferring sext.
1115  if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1116  EVT MemVT = LD->getMemoryVT();
1117  if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1118  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1119  LD->getExtensionType() == ISD::ZEXTLOAD))
1120  return true;
1121  }
1122 
1123  return TargetLowering::isZExtFree(Val, VT2);
1124 }
1125 
1127  return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1128 }
1129 
1131  return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1132 }
1133 
1135  return Subtarget.hasStdExtZbb();
1136 }
1137 
1139  return Subtarget.hasStdExtZbb();
1140 }
1141 
1143  EVT VT = Y.getValueType();
1144 
1145  // FIXME: Support vectors once we have tests.
1146  if (VT.isVector())
1147  return false;
1148 
1149  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
1150  Subtarget.hasStdExtZbkb()) &&
1151  !isa<ConstantSDNode>(Y);
1152 }
1153 
1155  // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1156  auto *C = dyn_cast<ConstantSDNode>(Y);
1157  return C && C->getAPIntValue().ule(10);
1158 }
1159 
1163  unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1164  SelectionDAG &DAG) const {
1165  // One interesting pattern that we'd want to form is 'bit extract':
1166  // ((1 >> Y) & 1) ==/!= 0
1167  // But we also need to be careful not to try to reverse that fold.
1168 
1169  // Is this '((1 >> Y) & 1)'?
1170  if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1171  return false; // Keep the 'bit extract' pattern.
1172 
1173  // Will this be '((1 >> Y) & 1)' after the transform?
1174  if (NewShiftOpcode == ISD::SRL && CC->isOne())
1175  return true; // Do form the 'bit extract' pattern.
1176 
1177  // If 'X' is a constant, and we transform, then we will immediately
1178  // try to undo the fold, thus causing endless combine loop.
1179  // So only do the transform if X is not a constant. This matches the default
1180  // implementation of this function.
1181  return !XC;
1182 }
1183 
1184 /// Check if sinking \p I's operands to I's basic block is profitable, because
1185 /// the operands can be folded into a target instruction, e.g.
1186 /// splats of scalars can fold into vector instructions.
1188  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1189  using namespace llvm::PatternMatch;
1190 
1191  if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1192  return false;
1193 
1194  auto IsSinker = [&](Instruction *I, int Operand) {
1195  switch (I->getOpcode()) {
1196  case Instruction::Add:
1197  case Instruction::Sub:
1198  case Instruction::Mul:
1199  case Instruction::And:
1200  case Instruction::Or:
1201  case Instruction::Xor:
1202  case Instruction::FAdd:
1203  case Instruction::FSub:
1204  case Instruction::FMul:
1205  case Instruction::FDiv:
1206  case Instruction::ICmp:
1207  case Instruction::FCmp:
1208  return true;
1209  case Instruction::Shl:
1210  case Instruction::LShr:
1211  case Instruction::AShr:
1212  case Instruction::UDiv:
1213  case Instruction::SDiv:
1214  case Instruction::URem:
1215  case Instruction::SRem:
1216  return Operand == 1;
1217  case Instruction::Call:
1218  if (auto *II = dyn_cast<IntrinsicInst>(I)) {
1219  switch (II->getIntrinsicID()) {
1220  case Intrinsic::fma:
1221  case Intrinsic::vp_fma:
1222  return Operand == 0 || Operand == 1;
1223  // FIXME: Our patterns can only match vx/vf instructions when the splat
1224  // it on the RHS, because TableGen doesn't recognize our VP operations
1225  // as commutative.
1226  case Intrinsic::vp_add:
1227  case Intrinsic::vp_mul:
1228  case Intrinsic::vp_and:
1229  case Intrinsic::vp_or:
1230  case Intrinsic::vp_xor:
1231  case Intrinsic::vp_fadd:
1232  case Intrinsic::vp_fmul:
1233  case Intrinsic::vp_shl:
1234  case Intrinsic::vp_lshr:
1235  case Intrinsic::vp_ashr:
1236  case Intrinsic::vp_udiv:
1237  case Intrinsic::vp_sdiv:
1238  case Intrinsic::vp_urem:
1239  case Intrinsic::vp_srem:
1240  return Operand == 1;
1241  // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
1242  // explicit patterns for both LHS and RHS (as 'vr' versions).
1243  case Intrinsic::vp_sub:
1244  case Intrinsic::vp_fsub:
1245  case Intrinsic::vp_fdiv:
1246  return Operand == 0 || Operand == 1;
1247  default:
1248  return false;
1249  }
1250  }
1251  return false;
1252  default:
1253  return false;
1254  }
1255  };
1256 
1257  for (auto OpIdx : enumerate(I->operands())) {
1258  if (!IsSinker(I, OpIdx.index()))
1259  continue;
1260 
1261  Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1262  // Make sure we are not already sinking this operand
1263  if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1264  continue;
1265 
1266  // We are looking for a splat that can be sunk.
1268  m_Undef(), m_ZeroMask())))
1269  continue;
1270 
1271  // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1272  // and vector registers
1273  for (Use &U : Op->uses()) {
1274  Instruction *Insn = cast<Instruction>(U.getUser());
1275  if (!IsSinker(Insn, U.getOperandNo()))
1276  return false;
1277  }
1278 
1279  Ops.push_back(&Op->getOperandUse(0));
1280  Ops.push_back(&OpIdx.value());
1281  }
1282  return true;
1283 }
1284 
1286  const GlobalAddressSDNode *GA) const {
1287  // In order to maximise the opportunity for common subexpression elimination,
1288  // keep a separate ADD node for the global address offset instead of folding
1289  // it in the global address node. Later peephole optimisations may choose to
1290  // fold it back in when profitable.
1291  return false;
1292 }
1293 
1295  bool ForCodeSize) const {
1296  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1297  if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
1298  return false;
1299  if (VT == MVT::f32 && !Subtarget.hasStdExtF())
1300  return false;
1301  if (VT == MVT::f64 && !Subtarget.hasStdExtD())
1302  return false;
1303  return Imm.isZero();
1304 }
1305 
1307  return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
1308  (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
1309  (VT == MVT::f64 && Subtarget.hasStdExtD());
1310 }
1311 
1313  CallingConv::ID CC,
1314  EVT VT) const {
1315  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1316  // We might still end up using a GPR but that will be decided based on ABI.
1317  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1318  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1319  return MVT::f32;
1320 
1322 }
1323 
1325  CallingConv::ID CC,
1326  EVT VT) const {
1327  // Use f32 to pass f16 if it is legal and Zfh is not enabled.
1328  // We might still end up using a GPR but that will be decided based on ABI.
1329  // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
1330  if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
1331  return 1;
1332 
1334 }
1335 
1336 // Changes the condition code and swaps operands if necessary, so the SetCC
1337 // operation matches one of the comparisons supported directly by branches
1338 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
1339 // with 1/-1.
1341  ISD::CondCode &CC, SelectionDAG &DAG) {
1342  // Convert X > -1 to X >= 0.
1343  if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
1344  RHS = DAG.getConstant(0, DL, RHS.getValueType());
1345  CC = ISD::SETGE;
1346  return;
1347  }
1348  // Convert X < 1 to 0 >= X.
1349  if (CC == ISD::SETLT && isOneConstant(RHS)) {
1350  RHS = LHS;
1351  LHS = DAG.getConstant(0, DL, RHS.getValueType());
1352  CC = ISD::SETGE;
1353  return;
1354  }
1355 
1356  switch (CC) {
1357  default:
1358  break;
1359  case ISD::SETGT:
1360  case ISD::SETLE:
1361  case ISD::SETUGT:
1362  case ISD::SETULE:
1364  std::swap(LHS, RHS);
1365  break;
1366  }
1367 }
1368 
1370  assert(VT.isScalableVector() && "Expecting a scalable vector type");
1371  unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
1372  if (VT.getVectorElementType() == MVT::i1)
1373  KnownSize *= 8;
1374 
1375  switch (KnownSize) {
1376  default:
1377  llvm_unreachable("Invalid LMUL.");
1378  case 8:
1379  return RISCVII::VLMUL::LMUL_F8;
1380  case 16:
1381  return RISCVII::VLMUL::LMUL_F4;
1382  case 32:
1383  return RISCVII::VLMUL::LMUL_F2;
1384  case 64:
1385  return RISCVII::VLMUL::LMUL_1;
1386  case 128:
1387  return RISCVII::VLMUL::LMUL_2;
1388  case 256:
1389  return RISCVII::VLMUL::LMUL_4;
1390  case 512:
1391  return RISCVII::VLMUL::LMUL_8;
1392  }
1393 }
1394 
1396  switch (LMul) {
1397  default:
1398  llvm_unreachable("Invalid LMUL.");
1403  return RISCV::VRRegClassID;
1405  return RISCV::VRM2RegClassID;
1407  return RISCV::VRM4RegClassID;
1409  return RISCV::VRM8RegClassID;
1410  }
1411 }
1412 
1414  RISCVII::VLMUL LMUL = getLMUL(VT);
1415  if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
1419  static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1420  "Unexpected subreg numbering");
1421  return RISCV::sub_vrm1_0 + Index;
1422  }
1423  if (LMUL == RISCVII::VLMUL::LMUL_2) {
1424  static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1425  "Unexpected subreg numbering");
1426  return RISCV::sub_vrm2_0 + Index;
1427  }
1428  if (LMUL == RISCVII::VLMUL::LMUL_4) {
1429  static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1430  "Unexpected subreg numbering");
1431  return RISCV::sub_vrm4_0 + Index;
1432  }
1433  llvm_unreachable("Invalid vector type.");
1434 }
1435 
1437  if (VT.getVectorElementType() == MVT::i1)
1438  return RISCV::VRRegClassID;
1439  return getRegClassIDForLMUL(getLMUL(VT));
1440 }
1441 
1442 // Attempt to decompose a subvector insert/extract between VecVT and
1443 // SubVecVT via subregister indices. Returns the subregister index that
1444 // can perform the subvector insert/extract with the given element index, as
1445 // well as the index corresponding to any leftover subvectors that must be
1446 // further inserted/extracted within the register class for SubVecVT.
1447 std::pair<unsigned, unsigned>
1449  MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
1450  const RISCVRegisterInfo *TRI) {
1451  static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1452  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1453  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1454  "Register classes not ordered");
1455  unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
1456  unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
1457  // Try to compose a subregister index that takes us from the incoming
1458  // LMUL>1 register class down to the outgoing one. At each step we half
1459  // the LMUL:
1460  // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
1461  // Note that this is not guaranteed to find a subregister index, such as
1462  // when we are extracting from one VR type to another.
1463  unsigned SubRegIdx = RISCV::NoSubRegister;
1464  for (const unsigned RCID :
1465  {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1466  if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1467  VecVT = VecVT.getHalfNumVectorElementsVT();
1468  bool IsHi =
1469  InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
1470  SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
1471  getSubregIndexByMVT(VecVT, IsHi));
1472  if (IsHi)
1473  InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
1474  }
1475  return {SubRegIdx, InsertExtractIdx};
1476 }
1477 
1478 // Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
1479 // stores for those types.
1480 bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
1481  return !Subtarget.useRVVForFixedLengthVectors() ||
1483 }
1484 
1486  if (ScalarTy->isPointerTy())
1487  return true;
1488 
1489  if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
1490  ScalarTy->isIntegerTy(32))
1491  return true;
1492 
1493  if (ScalarTy->isIntegerTy(64))
1494  return Subtarget.hasVInstructionsI64();
1495 
1496  if (ScalarTy->isHalfTy())
1497  return Subtarget.hasVInstructionsF16();
1498  if (ScalarTy->isFloatTy())
1499  return Subtarget.hasVInstructionsF32();
1500  if (ScalarTy->isDoubleTy())
1501  return Subtarget.hasVInstructionsF64();
1502 
1503  return false;
1504 }
1505 
1507  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1508  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
1509  "Unexpected opcode");
1510  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
1511  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1513  RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1514  if (!II)
1515  return SDValue();
1516  return Op.getOperand(II->VLOperand + 1 + HasChain);
1517 }
1518 
1519 static bool useRVVForFixedLengthVectorVT(MVT VT,
1520  const RISCVSubtarget &Subtarget) {
1521  assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
1522  if (!Subtarget.useRVVForFixedLengthVectors())
1523  return false;
1524 
1525  // We only support a set of vector types with a consistent maximum fixed size
1526  // across all supported vector element types to avoid legalization issues.
1527  // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
1528  // fixed-length vector type we support is 1024 bytes.
1529  if (VT.getFixedSizeInBits() > 1024 * 8)
1530  return false;
1531 
1532  unsigned MinVLen = Subtarget.getRealMinVLen();
1533 
1534  MVT EltVT = VT.getVectorElementType();
1535 
1536  // Don't use RVV for vectors we cannot scalarize if required.
1537  switch (EltVT.SimpleTy) {
1538  // i1 is supported but has different rules.
1539  default:
1540  return false;
1541  case MVT::i1:
1542  // Masks can only use a single register.
1543  if (VT.getVectorNumElements() > MinVLen)
1544  return false;
1545  MinVLen /= 8;
1546  break;
1547  case MVT::i8:
1548  case MVT::i16:
1549  case MVT::i32:
1550  break;
1551  case MVT::i64:
1552  if (!Subtarget.hasVInstructionsI64())
1553  return false;
1554  break;
1555  case MVT::f16:
1556  if (!Subtarget.hasVInstructionsF16())
1557  return false;
1558  break;
1559  case MVT::f32:
1560  if (!Subtarget.hasVInstructionsF32())
1561  return false;
1562  break;
1563  case MVT::f64:
1564  if (!Subtarget.hasVInstructionsF64())
1565  return false;
1566  break;
1567  }
1568 
1569  // Reject elements larger than ELEN.
1570  if (EltVT.getSizeInBits() > Subtarget.getELEN())
1571  return false;
1572 
1573  unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1574  // Don't use RVV for types that don't fit.
1575  if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
1576  return false;
1577 
1578  // TODO: Perhaps an artificial restriction, but worth having whilst getting
1579  // the base fixed length RVV support in place.
1580  if (!VT.isPow2VectorType())
1581  return false;
1582 
1583  return true;
1584 }
1585 
1586 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
1588 }
1589 
1590 // Return the largest legal scalable vector type that matches VT's element type.
1592  const RISCVSubtarget &Subtarget) {
1593  // This may be called before legal types are setup.
1594  assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
1595  useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
1596  "Expected legal fixed length vector!");
1597 
1598  unsigned MinVLen = Subtarget.getRealMinVLen();
1599  unsigned MaxELen = Subtarget.getELEN();
1600 
1601  MVT EltVT = VT.getVectorElementType();
1602  switch (EltVT.SimpleTy) {
1603  default:
1604  llvm_unreachable("unexpected element type for RVV container");
1605  case MVT::i1:
1606  case MVT::i8:
1607  case MVT::i16:
1608  case MVT::i32:
1609  case MVT::i64:
1610  case MVT::f16:
1611  case MVT::f32:
1612  case MVT::f64: {
1613  // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1614  // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
1615  // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
1616  unsigned NumElts =
1617  (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
1618  NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
1619  assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
1620  return MVT::getScalableVectorVT(EltVT, NumElts);
1621  }
1622  }
1623 }
1624 
1626  const RISCVSubtarget &Subtarget) {
1628  Subtarget);
1629 }
1630 
1633 }
1634 
1635 // Grow V to consume an entire RVV register.
1637  const RISCVSubtarget &Subtarget) {
1638  assert(VT.isScalableVector() &&
1639  "Expected to convert into a scalable vector!");
1641  "Expected a fixed length vector operand!");
1642  SDLoc DL(V);
1643  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1644  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
1645 }
1646 
1647 // Shrink V so it's just big enough to maintain a VT's worth of data.
1649  const RISCVSubtarget &Subtarget) {
1650  assert(VT.isFixedLengthVector() &&
1651  "Expected to convert into a fixed length vector!");
1653  "Expected a scalable vector operand!");
1654  SDLoc DL(V);
1655  SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1656  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
1657 }
1658 
1659 /// Return the type of the mask type suitable for masking the provided
1660 /// vector type. This is simply an i1 element type vector of the same
1661 /// (possibly scalable) length.
1662 static MVT getMaskTypeFor(EVT VecVT) {
1663  assert(VecVT.isVector());
1664  ElementCount EC = VecVT.getVectorElementCount();
1665  return MVT::getVectorVT(MVT::i1, EC);
1666 }
1667 
1668 /// Creates an all ones mask suitable for masking a vector of type VecTy with
1669 /// vector length VL. .
1671  SelectionDAG &DAG) {
1672  MVT MaskVT = getMaskTypeFor(VecVT);
1673  return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
1674 }
1675 
1676 // Gets the two common "VL" operands: an all-ones mask and the vector length.
1677 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
1678 // the vector type that it is contained in.
1679 static std::pair<SDValue, SDValue>
1680 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
1681  const RISCVSubtarget &Subtarget) {
1682  assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
1683  MVT XLenVT = Subtarget.getXLenVT();
1684  SDValue VL = VecVT.isFixedLengthVector()
1685  ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
1686  : DAG.getRegister(RISCV::X0, XLenVT);
1687  SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
1688  return {Mask, VL};
1689 }
1690 
1691 // As above but assuming the given type is a scalable vector type.
1692 static std::pair<SDValue, SDValue>
1694  const RISCVSubtarget &Subtarget) {
1695  assert(VecVT.isScalableVector() && "Expecting a scalable vector");
1696  return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
1697 }
1698 
1699 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
1700 // of either is (currently) supported. This can get us into an infinite loop
1701 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
1702 // as a ..., etc.
1703 // Until either (or both) of these can reliably lower any node, reporting that
1704 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
1705 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
1706 // which is not desirable.
1708  EVT VT, unsigned DefinedValues) const {
1709  return false;
1710 }
1711 
1713  const RISCVSubtarget &Subtarget) {
1714  // RISCV FP-to-int conversions saturate to the destination register size, but
1715  // don't produce 0 for nan. We can use a conversion instruction and fix the
1716  // nan case with a compare and a select.
1717  SDValue Src = Op.getOperand(0);
1718 
1719  EVT DstVT = Op.getValueType();
1720  EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1721 
1722  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
1723  unsigned Opc;
1724  if (SatVT == DstVT)
1725  Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
1726  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
1727  Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
1728  else
1729  return SDValue();
1730  // FIXME: Support other SatVTs by clamping before or after the conversion.
1731 
1732  SDLoc DL(Op);
1733  SDValue FpToInt = DAG.getNode(
1734  Opc, DL, DstVT, Src,
1735  DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
1736 
1737  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
1738  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
1739 }
1740 
1741 // Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1742 // and back. Taking care to avoid converting values that are nan or already
1743 // correct.
1744 // TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
1745 // have FRM dependencies modeled yet.
1747  MVT VT = Op.getSimpleValueType();
1748  assert(VT.isVector() && "Unexpected type");
1749 
1750  SDLoc DL(Op);
1751 
1752  // Freeze the source since we are increasing the number of uses.
1753  SDValue Src = DAG.getFreeze(Op.getOperand(0));
1754 
1755  // Truncate to integer and convert back to FP.
1757  SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
1758  Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1759 
1761 
1762  if (Op.getOpcode() == ISD::FCEIL) {
1763  // If the truncated value is the greater than or equal to the original
1764  // value, we've computed the ceil. Otherwise, we went the wrong way and
1765  // need to increase by 1.
1766  // FIXME: This should use a masked operation. Handle here or in isel?
1767  SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
1768  DAG.getConstantFP(1.0, DL, VT));
1769  SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
1770  Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1771  } else if (Op.getOpcode() == ISD::FFLOOR) {
1772  // If the truncated value is the less than or equal to the original value,
1773  // we've computed the floor. Otherwise, we went the wrong way and need to
1774  // decrease by 1.
1775  // FIXME: This should use a masked operation. Handle here or in isel?
1776  SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
1777  DAG.getConstantFP(1.0, DL, VT));
1778  SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
1779  Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
1780  }
1781 
1782  // Restore the original sign so that -0.0 is preserved.
1783  Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1784 
1785  // Determine the largest integer that can be represented exactly. This and
1786  // values larger than it don't have any fractional bits so don't need to
1787  // be converted.
1788  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1789  unsigned Precision = APFloat::semanticsPrecision(FltSem);
1790  APFloat MaxVal = APFloat(FltSem);
1791  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1792  /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1793  SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1794 
1795  // If abs(Src) was larger than MaxVal or nan, keep it.
1796  SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1797  SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1798  return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1799 }
1800 
1801 // ISD::FROUND is defined to round to nearest with ties rounding away from 0.
1802 // This mode isn't supported in vector hardware on RISCV. But as long as we
1803 // aren't compiling with trapping math, we can emulate this with
1804 // floor(X + copysign(nextafter(0.5, 0.0), X)).
1805 // FIXME: Could be shorter by changing rounding mode, but we don't have FRM
1806 // dependencies modeled yet.
1807 // FIXME: Use masked operations to avoid final merge.
1809  MVT VT = Op.getSimpleValueType();
1810  assert(VT.isVector() && "Unexpected type");
1811 
1812  SDLoc DL(Op);
1813 
1814  // Freeze the source since we are increasing the number of uses.
1815  SDValue Src = DAG.getFreeze(Op.getOperand(0));
1816 
1817  // We do the conversion on the absolute value and fix the sign at the end.
1818  SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
1819 
1820  const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
1821  bool Ignored;
1822  APFloat Point5Pred = APFloat(0.5f);
1823  Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
1824  Point5Pred.next(/*nextDown*/ true);
1825 
1826  // Add the adjustment.
1827  SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs,
1828  DAG.getConstantFP(Point5Pred, DL, VT));
1829 
1830  // Truncate to integer and convert back to fp.
1832  SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust);
1833  Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
1834 
1835  // Restore the original sign.
1836  Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
1837 
1838  // Determine the largest integer that can be represented exactly. This and
1839  // values larger than it don't have any fractional bits so don't need to
1840  // be converted.
1841  unsigned Precision = APFloat::semanticsPrecision(FltSem);
1842  APFloat MaxVal = APFloat(FltSem);
1843  MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1844  /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1845  SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
1846 
1847  // If abs(Src) was larger than MaxVal or nan, keep it.
1849  SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
1850  return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
1851 }
1852 
1853 struct VIDSequence {
1854  int64_t StepNumerator;
1856  int64_t Addend;
1857 };
1858 
1859 // Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
1860 // to the (non-zero) step S and start value X. This can be then lowered as the
1861 // RVV sequence (VID * S) + X, for example.
1862 // The step S is represented as an integer numerator divided by a positive
1863 // denominator. Note that the implementation currently only identifies
1864 // sequences in which either the numerator is +/- 1 or the denominator is 1. It
1865 // cannot detect 2/3, for example.
1866 // Note that this method will also match potentially unappealing index
1867 // sequences, like <i32 0, i32 50939494>, however it is left to the caller to
1868 // determine whether this is worth generating code for.
1870  unsigned NumElts = Op.getNumOperands();
1871  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
1872  if (!Op.getValueType().isInteger())
1873  return None;
1874 
1875  Optional<unsigned> SeqStepDenom;
1876  Optional<int64_t> SeqStepNum, SeqAddend;
1878  unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
1879  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1880  // Assume undef elements match the sequence; we just have to be careful
1881  // when interpolating across them.
1882  if (Op.getOperand(Idx).isUndef())
1883  continue;
1884  // The BUILD_VECTOR must be all constants.
1885  if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
1886  return None;
1887 
1888  uint64_t Val = Op.getConstantOperandVal(Idx) &
1889  maskTrailingOnes<uint64_t>(EltSizeInBits);
1890 
1891  if (PrevElt) {
1892  // Calculate the step since the last non-undef element, and ensure
1893  // it's consistent across the entire sequence.
1894  unsigned IdxDiff = Idx - PrevElt->second;
1895  int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
1896 
1897  // A zero-value value difference means that we're somewhere in the middle
1898  // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
1899  // step change before evaluating the sequence.
1900  if (ValDiff == 0)
1901  continue;
1902 
1903  int64_t Remainder = ValDiff % IdxDiff;
1904  // Normalize the step if it's greater than 1.
1905  if (Remainder != ValDiff) {
1906  // The difference must cleanly divide the element span.
1907  if (Remainder != 0)
1908  return None;
1909  ValDiff /= IdxDiff;
1910  IdxDiff = 1;
1911  }
1912 
1913  if (!SeqStepNum)
1914  SeqStepNum = ValDiff;
1915  else if (ValDiff != SeqStepNum)
1916  return None;
1917 
1918  if (!SeqStepDenom)
1919  SeqStepDenom = IdxDiff;
1920  else if (IdxDiff != *SeqStepDenom)
1921  return None;
1922  }
1923 
1924  // Record this non-undef element for later.
1925  if (!PrevElt || PrevElt->first != Val)
1926  PrevElt = std::make_pair(Val, Idx);
1927  }
1928 
1929  // We need to have logged a step for this to count as a legal index sequence.
1930  if (!SeqStepNum || !SeqStepDenom)
1931  return None;
1932 
1933  // Loop back through the sequence and validate elements we might have skipped
1934  // while waiting for a valid step. While doing this, log any sequence addend.
1935  for (unsigned Idx = 0; Idx < NumElts; Idx++) {
1936  if (Op.getOperand(Idx).isUndef())
1937  continue;
1938  uint64_t Val = Op.getConstantOperandVal(Idx) &
1939  maskTrailingOnes<uint64_t>(EltSizeInBits);
1940  uint64_t ExpectedVal =
1941  (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
1942  int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
1943  if (!SeqAddend)
1944  SeqAddend = Addend;
1945  else if (Addend != SeqAddend)
1946  return None;
1947  }
1948 
1949  assert(SeqAddend && "Must have an addend if we have a step");
1950 
1951  return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
1952 }
1953 
1954 // Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
1955 // and lower it as a VRGATHER_VX_VL from the source vector.
1956 static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
1957  SelectionDAG &DAG,
1958  const RISCVSubtarget &Subtarget) {
1959  if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1960  return SDValue();
1961  SDValue Vec = SplatVal.getOperand(0);
1962  // Only perform this optimization on vectors of the same size for simplicity.
1963  // Don't perform this optimization for i1 vectors.
1964  // FIXME: Support i1 vectors, maybe by promoting to i8?
1965  if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
1966  return SDValue();
1967  SDValue Idx = SplatVal.getOperand(1);
1968  // The index must be a legal type.
1969  if (Idx.getValueType() != Subtarget.getXLenVT())
1970  return SDValue();
1971 
1972  MVT ContainerVT = VT;
1973  if (VT.isFixedLengthVector()) {
1974  ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1975  Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
1976  }
1977 
1978  SDValue Mask, VL;
1979  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1980 
1981  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
1982  Idx, Mask, DAG.getUNDEF(ContainerVT), VL);
1983 
1984  if (!VT.isFixedLengthVector())
1985  return Gather;
1986 
1987  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
1988 }
1989 
1991  const RISCVSubtarget &Subtarget) {
1992  MVT VT = Op.getSimpleValueType();
1993  assert(VT.isFixedLengthVector() && "Unexpected vector!");
1994 
1995  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1996 
1997  SDLoc DL(Op);
1998  SDValue Mask, VL;
1999  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2000 
2001  MVT XLenVT = Subtarget.getXLenVT();
2002  unsigned NumElts = Op.getNumOperands();
2003 
2004  if (VT.getVectorElementType() == MVT::i1) {
2005  if (ISD::isBuildVectorAllZeros(Op.getNode())) {
2006  SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
2007  return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
2008  }
2009 
2010  if (ISD::isBuildVectorAllOnes(Op.getNode())) {
2011  SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
2012  return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
2013  }
2014 
2015  // Lower constant mask BUILD_VECTORs via an integer vector type, in
2016  // scalar integer chunks whose bit-width depends on the number of mask
2017  // bits and XLEN.
2018  // First, determine the most appropriate scalar integer type to use. This
2019  // is at most XLenVT, but may be shrunk to a smaller vector element type
2020  // according to the size of the final vector - use i8 chunks rather than
2021  // XLenVT if we're producing a v8i1. This results in more consistent
2022  // codegen across RV32 and RV64.
2023  unsigned NumViaIntegerBits =
2024  std::min(std::max(NumElts, 8u), Subtarget.getXLen());
2025  NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
2026  if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
2027  // If we have to use more than one INSERT_VECTOR_ELT then this
2028  // optimization is likely to increase code size; avoid peforming it in
2029  // such a case. We can use a load from a constant pool in this case.
2030  if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
2031  return SDValue();
2032  // Now we can create our integer vector type. Note that it may be larger
2033  // than the resulting mask type: v4i1 would use v1i8 as its integer type.
2034  MVT IntegerViaVecVT =
2035  MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
2036  divideCeil(NumElts, NumViaIntegerBits));
2037 
2038  uint64_t Bits = 0;
2039  unsigned BitPos = 0, IntegerEltIdx = 0;
2040  SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
2041 
2042  for (unsigned I = 0; I < NumElts; I++, BitPos++) {
2043  // Once we accumulate enough bits to fill our scalar type, insert into
2044  // our vector and clear our accumulated data.
2045  if (I != 0 && I % NumViaIntegerBits == 0) {
2046  if (NumViaIntegerBits <= 32)
2047  Bits = SignExtend64<32>(Bits);
2048  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2049  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
2050  Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2051  Bits = 0;
2052  BitPos = 0;
2053  IntegerEltIdx++;
2054  }
2055  SDValue V = Op.getOperand(I);
2056  bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2057  Bits |= ((uint64_t)BitValue << BitPos);
2058  }
2059 
2060  // Insert the (remaining) scalar value into position in our integer
2061  // vector type.
2062  if (NumViaIntegerBits <= 32)
2063  Bits = SignExtend64<32>(Bits);
2064  SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
2065  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
2066  DAG.getConstant(IntegerEltIdx, DL, XLenVT));
2067 
2068  if (NumElts < NumViaIntegerBits) {
2069  // If we're producing a smaller vector than our minimum legal integer
2070  // type, bitcast to the equivalent (known-legal) mask type, and extract
2071  // our final mask.
2072  assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
2073  Vec = DAG.getBitcast(MVT::v8i1, Vec);
2074  Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
2075  DAG.getConstant(0, DL, XLenVT));
2076  } else {
2077  // Else we must have produced an integer type with the same size as the
2078  // mask type; bitcast for the final result.
2079  assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
2080  Vec = DAG.getBitcast(VT, Vec);
2081  }
2082 
2083  return Vec;
2084  }
2085 
2086  // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
2087  // vector type, we have a legal equivalently-sized i8 type, so we can use
2088  // that.
2089  MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
2090  SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
2091 
2092  SDValue WideVec;
2093  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2094  // For a splat, perform a scalar truncate before creating the wider
2095  // vector.
2096  assert(Splat.getValueType() == XLenVT &&
2097  "Unexpected type for i1 splat value");
2098  Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
2099  DAG.getConstant(1, DL, XLenVT));
2100  WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
2101  } else {
2102  SmallVector<SDValue, 8> Ops(Op->op_values());
2103  WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
2104  SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
2105  WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
2106  }
2107 
2108  return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
2109  }
2110 
2111  if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
2112  if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
2113  return Gather;
2114  unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
2116  Splat =
2117  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
2118  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2119  }
2120 
2121  // Try and match index sequences, which we can lower to the vid instruction
2122  // with optional modifications. An all-undef vector is matched by
2123  // getSplatValue, above.
2124  if (auto SimpleVID = isSimpleVIDSequence(Op)) {
2125  int64_t StepNumerator = SimpleVID->StepNumerator;
2126  unsigned StepDenominator = SimpleVID->StepDenominator;
2127  int64_t Addend = SimpleVID->Addend;
2128 
2129  assert(StepNumerator != 0 && "Invalid step");
2130  bool Negate = false;
2131  int64_t SplatStepVal = StepNumerator;
2132  unsigned StepOpcode = ISD::MUL;
2133  if (StepNumerator != 1) {
2134  if (isPowerOf2_64(std::abs(StepNumerator))) {
2135  Negate = StepNumerator < 0;
2136  StepOpcode = ISD::SHL;
2137  SplatStepVal = Log2_64(std::abs(StepNumerator));
2138  }
2139  }
2140 
2141  // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
2142  // threshold since it's the immediate value many RVV instructions accept.
2143  // There is no vmul.vi instruction so ensure multiply constant can fit in
2144  // a single addi instruction.
2145  if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
2146  (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
2147  isPowerOf2_32(StepDenominator) &&
2148  (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2149  SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
2150  // Convert right out of the scalable type so we can use standard ISD
2151  // nodes for the rest of the computation. If we used scalable types with
2152  // these, we'd lose the fixed-length vector info and generate worse
2153  // vsetvli code.
2154  VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
2155  if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
2156  (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
2157  SDValue SplatStep = DAG.getSplatBuildVector(
2158  VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
2159  VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
2160  }
2161  if (StepDenominator != 1) {
2162  SDValue SplatStep = DAG.getSplatBuildVector(
2163  VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
2164  VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
2165  }
2166  if (Addend != 0 || Negate) {
2167  SDValue SplatAddend = DAG.getSplatBuildVector(
2168  VT, DL, DAG.getConstant(Addend, DL, XLenVT));
2169  VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
2170  }
2171  return VID;
2172  }
2173  }
2174 
2175  // Attempt to detect "hidden" splats, which only reveal themselves as splats
2176  // when re-interpreted as a vector with a larger element type. For example,
2177  // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
2178  // could be instead splat as
2179  // v2i32 = build_vector i32 0x00010000, i32 0x00010000
2180  // TODO: This optimization could also work on non-constant splats, but it
2181  // would require bit-manipulation instructions to construct the splat value.
2183  unsigned EltBitSize = VT.getScalarSizeInBits();
2184  const auto *BV = cast<BuildVectorSDNode>(Op);
2185  if (VT.isInteger() && EltBitSize < 64 &&
2187  BV->getRepeatedSequence(Sequence) &&
2188  (Sequence.size() * EltBitSize) <= 64) {
2189  unsigned SeqLen = Sequence.size();
2190  MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
2191  MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
2192  assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
2193  ViaIntVT == MVT::i64) &&
2194  "Unexpected sequence type");
2195 
2196  unsigned EltIdx = 0;
2197  uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2198  uint64_t SplatValue = 0;
2199  // Construct the amalgamated value which can be splatted as this larger
2200  // vector type.
2201  for (const auto &SeqV : Sequence) {
2202  if (!SeqV.isUndef())
2203  SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2204  << (EltIdx * EltBitSize));
2205  EltIdx++;
2206  }
2207 
2208  // On RV64, sign-extend from 32 to 64 bits where possible in order to
2209  // achieve better constant materializion.
2210  if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
2211  SplatValue = SignExtend64<32>(SplatValue);
2212 
2213  // Since we can't introduce illegal i64 types at this stage, we can only
2214  // perform an i64 splat on RV32 if it is its own sign-extended value. That
2215  // way we can use RVV instructions to splat.
2216  assert((ViaIntVT.bitsLE(XLenVT) ||
2217  (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
2218  "Unexpected bitcast sequence");
2219  if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2220  SDValue ViaVL =
2221  DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
2222  MVT ViaContainerVT =
2223  getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
2224  SDValue Splat =
2225  DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
2226  DAG.getUNDEF(ViaContainerVT),
2227  DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
2228  Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
2229  return DAG.getBitcast(VT, Splat);
2230  }
2231  }
2232 
2233  // Try and optimize BUILD_VECTORs with "dominant values" - these are values
2234  // which constitute a large proportion of the elements. In such cases we can
2235  // splat a vector with the dominant element and make up the shortfall with
2236  // INSERT_VECTOR_ELTs.
2237  // Note that this includes vectors of 2 elements by association. The
2238  // upper-most element is the "dominant" one, allowing us to use a splat to
2239  // "insert" the upper element, and an insert of the lower element at position
2240  // 0, which improves codegen.
2241  SDValue DominantValue;
2242  unsigned MostCommonCount = 0;
2243  DenseMap<SDValue, unsigned> ValueCounts;
2244  unsigned NumUndefElts =
2245  count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
2246 
2247  // Track the number of scalar loads we know we'd be inserting, estimated as
2248  // any non-zero floating-point constant. Other kinds of element are either
2249  // already in registers or are materialized on demand. The threshold at which
2250  // a vector load is more desirable than several scalar materializion and
2251  // vector-insertion instructions is not known.
2252  unsigned NumScalarLoads = 0;
2253 
2254  for (SDValue V : Op->op_values()) {
2255  if (V.isUndef())
2256  continue;
2257 
2258  ValueCounts.insert(std::make_pair(V, 0));
2259  unsigned &Count = ValueCounts[V];
2260 
2261  if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2262  NumScalarLoads += !CFP->isExactlyValue(+0.0);
2263 
2264  // Is this value dominant? In case of a tie, prefer the highest element as
2265  // it's cheaper to insert near the beginning of a vector than it is at the
2266  // end.
2267  if (++Count >= MostCommonCount) {
2268  DominantValue = V;
2269  MostCommonCount = Count;
2270  }
2271  }
2272 
2273  assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
2274  unsigned NumDefElts = NumElts - NumUndefElts;
2275  unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2276 
2277  // Don't perform this optimization when optimizing for size, since
2278  // materializing elements and inserting them tends to cause code bloat.
2279  if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
2280  ((MostCommonCount > DominantValueCountThreshold) ||
2281  (ValueCounts.size() <= Log2_32(NumDefElts)))) {
2282  // Start by splatting the most common element.
2283  SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
2284 
2285  DenseSet<SDValue> Processed{DominantValue};
2286  MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
2287  for (const auto &OpIdx : enumerate(Op->ops())) {
2288  const SDValue &V = OpIdx.value();
2289  if (V.isUndef() || !Processed.insert(V).second)
2290  continue;
2291  if (ValueCounts[V] == 1) {
2292  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
2293  DAG.getConstant(OpIdx.index(), DL, XLenVT));
2294  } else {
2295  // Blend in all instances of this value using a VSELECT, using a
2296  // mask where each bit signals whether that element is the one
2297  // we're after.
2299  transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
2300  return DAG.getConstant(V == V1, DL, XLenVT);
2301  });
2302  Vec = DAG.getNode(ISD::VSELECT, DL, VT,
2303  DAG.getBuildVector(SelMaskTy, DL, Ops),
2304  DAG.getSplatBuildVector(VT, DL, V), Vec);
2305  }
2306  }
2307 
2308  return Vec;
2309  }
2310 
2311  return SDValue();
2312 }
2313 
2314 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2315  SDValue Lo, SDValue Hi, SDValue VL,
2316  SelectionDAG &DAG) {
2317  if (!Passthru)
2318  Passthru = DAG.getUNDEF(VT);
2319  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
2320  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
2321  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
2322  // If Hi constant is all the same sign bit as Lo, lower this as a custom
2323  // node in order to try and match RVV vector/scalar instructions.
2324  if ((LoC >> 31) == HiC)
2325  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
2326 
2327  // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
2328  // vmv.v.x whose EEW = 32 to lower it.
2329  auto *Const = dyn_cast<ConstantSDNode>(VL);
2330  if (LoC == HiC && Const && Const->isAllOnesValue()) {
2331  MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
2332  // TODO: if vl <= min(VLMAX), we can also do this. But we could not
2333  // access the subtarget here now.
2334  auto InterVec = DAG.getNode(
2335  RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
2336  DAG.getRegister(RISCV::X0, MVT::i32));
2337  return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
2338  }
2339  }
2340 
2341  // Fall back to a stack store and stride x0 vector load.
2342  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
2343  Hi, VL);
2344 }
2345 
2346 // Called by type legalization to handle splat of i64 on RV32.
2347 // FIXME: We can optimize this when the type has sign or zero bits in one
2348 // of the halves.
2349 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
2350  SDValue Scalar, SDValue VL,
2351  SelectionDAG &DAG) {
2352  assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
2353  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2354  DAG.getConstant(0, DL, MVT::i32));
2355  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
2356  DAG.getConstant(1, DL, MVT::i32));
2357  return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
2358 }
2359 
2360 // This function lowers a splat of a scalar operand Splat with the vector
2361 // length VL. It ensures the final sequence is type legal, which is useful when
2362 // lowering a splat after type legalization.
2363 static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
2364  MVT VT, SDLoc DL, SelectionDAG &DAG,
2365  const RISCVSubtarget &Subtarget) {
2366  bool HasPassthru = Passthru && !Passthru.isUndef();
2367  if (!HasPassthru && !Passthru)
2368  Passthru = DAG.getUNDEF(VT);
2369  if (VT.isFloatingPoint()) {
2370  // If VL is 1, we could use vfmv.s.f.
2371  if (isOneConstant(VL))
2372  return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
2373  return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
2374  }
2375 
2376  MVT XLenVT = Subtarget.getXLenVT();
2377 
2378  // Simplest case is that the operand needs to be promoted to XLenVT.
2379  if (Scalar.getValueType().bitsLE(XLenVT)) {
2380  // If the operand is a constant, sign extend to increase our chances
2381  // of being able to use a .vi instruction. ANY_EXTEND would become a
2382  // a zero extend and the simm5 check in isel would fail.
2383  // FIXME: Should we ignore the upper bits in isel instead?
2384  unsigned ExtOpc =
2385  isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
2386  Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
2387  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
2388  // If VL is 1 and the scalar value won't benefit from immediate, we could
2389  // use vmv.s.x.
2390  if (isOneConstant(VL) &&
2391  (!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2392  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
2393  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
2394  }
2395 
2396  assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
2397  "Unexpected scalar for splat lowering!");
2398 
2399  if (isOneConstant(VL) && isNullConstant(Scalar))
2400  return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
2401  DAG.getConstant(0, DL, XLenVT), VL);
2402 
2403  // Otherwise use the more complicated splatting algorithm.
2404  return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
2405 }
2406 
2407 static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
2408  const RISCVSubtarget &Subtarget) {
2409  // We need to be able to widen elements to the next larger integer type.
2410  if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
2411  return false;
2412 
2413  int Size = Mask.size();
2414  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
2415 
2416  int Srcs[] = {-1, -1};
2417  for (int i = 0; i != Size; ++i) {
2418  // Ignore undef elements.
2419  if (Mask[i] < 0)
2420  continue;
2421 
2422  // Is this an even or odd element.
2423  int Pol = i % 2;
2424 
2425  // Ensure we consistently use the same source for this element polarity.
2426  int Src = Mask[i] / Size;
2427  if (Srcs[Pol] < 0)
2428  Srcs[Pol] = Src;
2429  if (Srcs[Pol] != Src)
2430  return false;
2431 
2432  // Make sure the element within the source is appropriate for this element
2433  // in the destination.
2434  int Elt = Mask[i] % Size;
2435  if (Elt != i / 2)
2436  return false;
2437  }
2438 
2439  // We need to find a source for each polarity and they can't be the same.
2440  if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
2441  return false;
2442 
2443  // Swap the sources if the second source was in the even polarity.
2444  SwapSources = Srcs[0] > Srcs[1];
2445 
2446  return true;
2447 }
2448 
2449 /// Match shuffles that concatenate two vectors, rotate the concatenation,
2450 /// and then extract the original number of elements from the rotated result.
2451 /// This is equivalent to vector.splice or X86's PALIGNR instruction. The
2452 /// returned rotation amount is for a rotate right, where elements move from
2453 /// higher elements to lower elements. \p LoSrc indicates the first source
2454 /// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
2455 /// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
2456 /// 0 or 1 if a rotation is found.
2457 ///
2458 /// NOTE: We talk about rotate to the right which matches how bit shift and
2459 /// rotate instructions are described where LSBs are on the right, but LLVM IR
2460 /// and the table below write vectors with the lowest elements on the left.
2461 static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
2462  int Size = Mask.size();
2463 
2464  // We need to detect various ways of spelling a rotation:
2465  // [11, 12, 13, 14, 15, 0, 1, 2]
2466  // [-1, 12, 13, 14, -1, -1, 1, -1]
2467  // [-1, -1, -1, -1, -1, -1, 1, 2]
2468  // [ 3, 4, 5, 6, 7, 8, 9, 10]
2469  // [-1, 4, 5, 6, -1, -1, 9, -1]
2470  // [-1, 4, 5, 6, -1, -1, -1, -1]
2471  int Rotation = 0;
2472  LoSrc = -1;
2473  HiSrc = -1;
2474  for (int i = 0; i != Size; ++i) {
2475  int M = Mask[i];
2476  if (M < 0)
2477  continue;
2478 
2479  // Determine where a rotate vector would have started.
2480  int StartIdx = i - (M % Size);
2481  // The identity rotation isn't interesting, stop.
2482  if (StartIdx == 0)
2483  return -1;
2484 
2485  // If we found the tail of a vector the rotation must be the missing
2486  // front. If we found the head of a vector, it must be how much of the
2487  // head.
2488  int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
2489 
2490  if (Rotation == 0)
2491  Rotation = CandidateRotation;
2492  else if (Rotation != CandidateRotation)
2493  // The rotations don't match, so we can't match this mask.
2494  return -1;
2495 
2496  // Compute which value this mask is pointing at.
2497  int MaskSrc = M < Size ? 0 : 1;
2498 
2499  // Compute which of the two target values this index should be assigned to.
2500  // This reflects whether the high elements are remaining or the low elemnts
2501  // are remaining.
2502  int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
2503 
2504  // Either set up this value if we've not encountered it before, or check
2505  // that it remains consistent.
2506  if (TargetSrc < 0)
2507  TargetSrc = MaskSrc;
2508  else if (TargetSrc != MaskSrc)
2509  // This may be a rotation, but it pulls from the inputs in some
2510  // unsupported interleaving.
2511  return -1;
2512  }
2513 
2514  // Check that we successfully analyzed the mask, and normalize the results.
2515  assert(Rotation != 0 && "Failed to locate a viable rotation!");
2516  assert((LoSrc >= 0 || HiSrc >= 0) &&
2517  "Failed to find a rotated input vector!");
2518 
2519  return Rotation;
2520 }
2521 
2523  const RISCVSubtarget &Subtarget) {
2524  SDValue V1 = Op.getOperand(0);
2525  SDValue V2 = Op.getOperand(1);
2526  SDLoc DL(Op);
2527  MVT XLenVT = Subtarget.getXLenVT();
2528  MVT VT = Op.getSimpleValueType();
2529  unsigned NumElts = VT.getVectorNumElements();
2530  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
2531 
2532  MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2533 
2534  SDValue TrueMask, VL;
2535  std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2536 
2537  if (SVN->isSplat()) {
2538  const int Lane = SVN->getSplatIndex();
2539  if (Lane >= 0) {
2540  MVT SVT = VT.getVectorElementType();
2541 
2542  // Turn splatted vector load into a strided load with an X0 stride.
2543  SDValue V = V1;
2544  // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
2545  // with undef.
2546  // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
2547  int Offset = Lane;
2548  if (V.getOpcode() == ISD::CONCAT_VECTORS) {
2549  int OpElements =
2551  V = V.getOperand(Offset / OpElements);
2552  Offset %= OpElements;
2553  }
2554 
2555  // We need to ensure the load isn't atomic or volatile.
2556  if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
2557  auto *Ld = cast<LoadSDNode>(V);
2558  Offset *= SVT.getStoreSize();
2559  SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
2560  TypeSize::Fixed(Offset), DL);
2561 
2562  // If this is SEW=64 on RV32, use a strided load with a stride of x0.
2563  if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
2564  SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
2565  SDValue IntID =
2566  DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
2567  SDValue Ops[] = {Ld->getChain(),
2568  IntID,
2569  DAG.getUNDEF(ContainerVT),
2570  NewAddr,
2571  DAG.getRegister(RISCV::X0, XLenVT),
2572  VL};
2573  SDValue NewLoad = DAG.getMemIntrinsicNode(
2574  ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
2576  Ld->getMemOperand(), Offset, SVT.getStoreSize()));
2577  DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
2578  return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
2579  }
2580 
2581  // Otherwise use a scalar load and splat. This will give the best
2582  // opportunity to fold a splat into the operation. ISel can turn it into
2583  // the x0 strided load if we aren't able to fold away the select.
2584  if (SVT.isFloatingPoint())
2585  V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
2586  Ld->getPointerInfo().getWithOffset(Offset),
2587  Ld->getOriginalAlign(),
2588  Ld->getMemOperand()->getFlags());
2589  else
2590  V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
2591  Ld->getPointerInfo().getWithOffset(Offset), SVT,
2592  Ld->getOriginalAlign(),
2593  Ld->getMemOperand()->getFlags());
2594  DAG.makeEquivalentMemoryOrdering(Ld, V);
2595 
2596  unsigned Opc =
2598  SDValue Splat =
2599  DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
2600  return convertFromScalableVector(VT, Splat, DAG, Subtarget);
2601  }
2602 
2603  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2604  assert(Lane < (int)NumElts && "Unexpected lane!");
2605  SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
2606  V1, DAG.getConstant(Lane, DL, XLenVT),
2607  TrueMask, DAG.getUNDEF(ContainerVT), VL);
2608  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2609  }
2610  }
2611 
2612  ArrayRef<int> Mask = SVN->getMask();
2613 
2614  // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
2615  // be undef which can be handled with a single SLIDEDOWN/UP.
2616  int LoSrc, HiSrc;
2617  int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
2618  if (Rotation > 0) {
2619  SDValue LoV, HiV;
2620  if (LoSrc >= 0) {
2621  LoV = LoSrc == 0 ? V1 : V2;
2622  LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
2623  }
2624  if (HiSrc >= 0) {
2625  HiV = HiSrc == 0 ? V1 : V2;
2626  HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
2627  }
2628 
2629  // We found a rotation. We need to slide HiV down by Rotation. Then we need
2630  // to slide LoV up by (NumElts - Rotation).
2631  unsigned InvRotate = NumElts - Rotation;
2632 
2633  SDValue Res = DAG.getUNDEF(ContainerVT);
2634  if (HiV) {
2635  // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
2636  // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
2637  // causes multiple vsetvlis in some test cases such as lowering
2638  // reduce.mul
2639  SDValue DownVL = VL;
2640  if (LoV)
2641  DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
2642  Res =
2643  DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV,
2644  DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL);
2645  }
2646  if (LoV)
2647  Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV,
2648  DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL);
2649 
2650  return convertFromScalableVector(VT, Res, DAG, Subtarget);
2651  }
2652 
2653  // Detect an interleave shuffle and lower to
2654  // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
2655  bool SwapSources;
2656  if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
2657  // Swap sources if needed.
2658  if (SwapSources)
2659  std::swap(V1, V2);
2660 
2661  // Extract the lower half of the vectors.
2662  MVT HalfVT = VT.getHalfNumVectorElementsVT();
2663  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
2664  DAG.getConstant(0, DL, XLenVT));
2665  V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
2666  DAG.getConstant(0, DL, XLenVT));
2667 
2668  // Double the element width and halve the number of elements in an int type.
2669  unsigned EltBits = VT.getScalarSizeInBits();
2670  MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
2671  MVT WideIntVT =
2672  MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
2673  // Convert this to a scalable vector. We need to base this on the
2674  // destination size to ensure there's always a type with a smaller LMUL.
2675  MVT WideIntContainerVT =
2676  getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
2677 
2678  // Convert sources to scalable vectors with the same element count as the
2679  // larger type.
2680  MVT HalfContainerVT = MVT::getVectorVT(
2681  VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
2682  V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
2683  V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
2684 
2685  // Cast sources to integer.
2686  MVT IntEltVT = MVT::getIntegerVT(EltBits);
2687  MVT IntHalfVT =
2688  MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
2689  V1 = DAG.getBitcast(IntHalfVT, V1);
2690  V2 = DAG.getBitcast(IntHalfVT, V2);
2691 
2692  // Freeze V2 since we use it twice and we need to be sure that the add and
2693  // multiply see the same value.
2694  V2 = DAG.getFreeze(V2);
2695 
2696  // Recreate TrueMask using the widened type's element count.
2697  TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
2698 
2699  // Widen V1 and V2 with 0s and add one copy of V2 to V1.
2700  SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
2701  V2, TrueMask, VL);
2702  // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
2703  SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
2704  DAG.getUNDEF(IntHalfVT),
2705  DAG.getAllOnesConstant(DL, XLenVT));
2706  SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
2707  V2, Multiplier, TrueMask, VL);
2708  // Add the new copies to our previous addition giving us 2^eltbits copies of
2709  // V2. This is equivalent to shifting V2 left by eltbits. This should
2710  // combine with the vwmulu.vv above to form vwmaccu.vv.
2711  Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
2712  TrueMask, VL);
2713  // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
2714  // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
2715  // vector VT.
2716  ContainerVT =
2718  WideIntContainerVT.getVectorElementCount() * 2);
2719  Add = DAG.getBitcast(ContainerVT, Add);
2720  return convertFromScalableVector(VT, Add, DAG, Subtarget);
2721  }
2722 
2723  // Detect shuffles which can be re-expressed as vector selects; these are
2724  // shuffles in which each element in the destination is taken from an element
2725  // at the corresponding index in either source vectors.
2726  bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
2727  int MaskIndex = MaskIdx.value();
2728  return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
2729  });
2730 
2731  assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
2732 
2733  SmallVector<SDValue> MaskVals;
2734  // As a backup, shuffles can be lowered via a vrgather instruction, possibly
2735  // merged with a second vrgather.
2736  SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
2737 
2738  // By default we preserve the original operand order, and use a mask to
2739  // select LHS as true and RHS as false. However, since RVV vector selects may
2740  // feature splats but only on the LHS, we may choose to invert our mask and
2741  // instead select between RHS and LHS.
2742  bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
2743  bool InvertMask = IsSelect == SwapOps;
2744 
2745  // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
2746  // half.
2747  DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
2748 
2749  // Now construct the mask that will be used by the vselect or blended
2750  // vrgather operation. For vrgathers, construct the appropriate indices into
2751  // each vector.
2752  for (int MaskIndex : Mask) {
2753  bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
2754  MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
2755  if (!IsSelect) {
2756  bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
2757  GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
2758  ? DAG.getConstant(MaskIndex, DL, XLenVT)
2759  : DAG.getUNDEF(XLenVT));
2760  GatherIndicesRHS.push_back(
2761  IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
2762  : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
2763  if (IsLHSOrUndefIndex && MaskIndex >= 0)
2764  ++LHSIndexCounts[MaskIndex];
2765  if (!IsLHSOrUndefIndex)
2766  ++RHSIndexCounts[MaskIndex - NumElts];
2767  }
2768  }
2769 
2770  if (SwapOps) {
2771  std::swap(V1, V2);
2772  std::swap(GatherIndicesLHS, GatherIndicesRHS);
2773  }
2774 
2775  assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
2776  MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
2777  SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
2778 
2779  if (IsSelect)
2780  return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
2781 
2782  if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
2783  // On such a large vector we're unable to use i8 as the index type.
2784  // FIXME: We could promote the index to i16 and use vrgatherei16, but that
2785  // may involve vector splitting if we're already at LMUL=8, or our
2786  // user-supplied maximum fixed-length LMUL.
2787  return SDValue();
2788  }
2789 
2790  unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
2791  unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
2792  MVT IndexVT = VT.changeTypeToInteger();
2793  // Since we can't introduce illegal index types at this stage, use i16 and
2794  // vrgatherei16 if the corresponding index type for plain vrgather is greater
2795  // than XLenVT.
2796  if (IndexVT.getScalarType().bitsGT(XLenVT)) {
2797  GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
2798  IndexVT = IndexVT.changeVectorElementType(MVT::i16);
2799  }
2800 
2801  MVT IndexContainerVT =
2802  ContainerVT.changeVectorElementType(IndexVT.getScalarType());
2803 
2804  SDValue Gather;
2805  // TODO: This doesn't trigger for i64 vectors on RV32, since there we
2806  // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
2807  if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
2808  Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
2809  Subtarget);
2810  } else {
2811  V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
2812  // If only one index is used, we can use a "splat" vrgather.
2813  // TODO: We can splat the most-common index and fix-up any stragglers, if
2814  // that's beneficial.
2815  if (LHSIndexCounts.size() == 1) {
2816  int SplatIndex = LHSIndexCounts.begin()->getFirst();
2817  Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
2818  DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask,
2819  DAG.getUNDEF(ContainerVT), VL);
2820  } else {
2821  SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
2822  LHSIndices =
2823  convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
2824 
2825  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
2826  TrueMask, DAG.getUNDEF(ContainerVT), VL);
2827  }
2828  }
2829 
2830  // If a second vector operand is used by this shuffle, blend it in with an
2831  // additional vrgather.
2832  if (!V2.isUndef()) {
2833  V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
2834 
2835  MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
2836  SelectMask =
2837  convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
2838 
2839  // If only one index is used, we can use a "splat" vrgather.
2840  // TODO: We can splat the most-common index and fix-up any stragglers, if
2841  // that's beneficial.
2842  if (RHSIndexCounts.size() == 1) {
2843  int SplatIndex = RHSIndexCounts.begin()->getFirst();
2844  Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
2845  DAG.getConstant(SplatIndex, DL, XLenVT), SelectMask,
2846  Gather, VL);
2847  } else {
2848  SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
2849  RHSIndices =
2850  convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
2851  Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices,
2852  SelectMask, Gather, VL);
2853  }
2854  }
2855 
2856  return convertFromScalableVector(VT, Gather, DAG, Subtarget);
2857 }
2858 
2860  // Support splats for any type. These should type legalize well.
2861  if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
2862  return true;
2863 
2864  // Only support legal VTs for other shuffles for now.
2865  if (!isTypeLegal(VT))
2866  return false;
2867 
2868  MVT SVT = VT.getSimpleVT();
2869 
2870  bool SwapSources;
2871  int LoSrc, HiSrc;
2872  return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
2873  isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
2874 }
2875 
2876 // Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
2877 // the exponent.
2879  MVT VT = Op.getSimpleValueType();
2880  unsigned EltSize = VT.getScalarSizeInBits();
2881  SDValue Src = Op.getOperand(0);
2882  SDLoc DL(Op);
2883 
2884  // We need a FP type that can represent the value.
2885  // TODO: Use f16 for i8 when possible?
2886  MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
2887  MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
2888 
2889  // Legal types should have been checked in the RISCVTargetLowering
2890  // constructor.
2891  // TODO: Splitting may make sense in some cases.
2892  assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
2893  "Expected legal float type!");
2894 
2895  // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
2896  // The trailing zero count is equal to log2 of this single bit value.
2897  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
2898  SDValue Neg =
2899  DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
2900  Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
2901  }
2902 
2903  // We have a legal FP type, convert to it.
2904  SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
2905  // Bitcast to integer and shift the exponent to the LSB.
2906  EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
2907  SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
2908  unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
2909  SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
2910  DAG.getConstant(ShiftAmt, DL, IntVT));
2911  // Truncate back to original type to allow vnsrl.
2912  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
2913  // The exponent contains log2 of the value in biased form.
2914  unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
2915 
2916  // For trailing zeros, we just need to subtract the bias.
2917  if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
2918  return DAG.getNode(ISD::SUB, DL, VT, Trunc,
2919  DAG.getConstant(ExponentBias, DL, VT));
2920 
2921  // For leading zeros, we need to remove the bias and convert from log2 to
2922  // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
2923  unsigned Adjust = ExponentBias + (EltSize - 1);
2924  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
2925 }
2926 
2927 // While RVV has alignment restrictions, we should always be able to load as a
2928 // legal equivalently-sized byte-typed vector instead. This method is
2929 // responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
2930 // the load is already correctly-aligned, it returns SDValue().
2931 SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
2932  SelectionDAG &DAG) const {
2933  auto *Load = cast<LoadSDNode>(Op);
2934  assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
2935 
2937  Load->getMemoryVT(),
2938  *Load->getMemOperand()))
2939  return SDValue();
2940 
2941  SDLoc DL(Op);
2942  MVT VT = Op.getSimpleValueType();
2943  unsigned EltSizeBits = VT.getScalarSizeInBits();
2944  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2945  "Unexpected unaligned RVV load type");
2946  MVT NewVT =
2947  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2948  assert(NewVT.isValid() &&
2949  "Expecting equally-sized RVV vector types to be legal");
2950  SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
2951  Load->getPointerInfo(), Load->getOriginalAlign(),
2952  Load->getMemOperand()->getFlags());
2953  return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
2954 }
2955 
2956 // While RVV has alignment restrictions, we should always be able to store as a
2957 // legal equivalently-sized byte-typed vector instead. This method is
2958 // responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
2959 // returns SDValue() if the store is already correctly aligned.
2960 SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
2961  SelectionDAG &DAG) const {
2962  auto *Store = cast<StoreSDNode>(Op);
2963  assert(Store && Store->getValue().getValueType().isVector() &&
2964  "Expected vector store");
2965 
2967  Store->getMemoryVT(),
2968  *Store->getMemOperand()))
2969  return SDValue();
2970 
2971  SDLoc DL(Op);
2972  SDValue StoredVal = Store->getValue();
2973  MVT VT = StoredVal.getSimpleValueType();
2974  unsigned EltSizeBits = VT.getScalarSizeInBits();
2975  assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
2976  "Unexpected unaligned RVV store type");
2977  MVT NewVT =
2978  MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
2979  assert(NewVT.isValid() &&
2980  "Expecting equally-sized RVV vector types to be legal");
2981  StoredVal = DAG.getBitcast(NewVT, StoredVal);
2982  return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
2983  Store->getPointerInfo(), Store->getOriginalAlign(),
2984  Store->getMemOperand()->getFlags());
2985 }
2986 
2988  const RISCVSubtarget &Subtarget) {
2989  assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
2990 
2991  int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
2992 
2993  // All simm32 constants should be handled by isel.
2994  // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
2995  // this check redundant, but small immediates are common so this check
2996  // should have better compile time.
2997  if (isInt<32>(Imm))
2998  return Op;
2999 
3000  // We only need to cost the immediate, if constant pool lowering is enabled.
3001  if (!Subtarget.useConstantPoolForLargeInts())
3002  return Op;
3003 
3004  RISCVMatInt::InstSeq Seq =
3005  RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
3006  if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
3007  return Op;
3008 
3009  // Expand to a constant pool using the default expansion code.
3010  return SDValue();
3011 }
3012 
3014  SelectionDAG &DAG) const {
3015  switch (Op.getOpcode()) {
3016  default:
3017  report_fatal_error("unimplemented operand");
3018  case ISD::GlobalAddress:
3019  return lowerGlobalAddress(Op, DAG);
3020  case ISD::BlockAddress:
3021  return lowerBlockAddress(Op, DAG);
3022  case ISD::ConstantPool:
3023  return lowerConstantPool(Op, DAG);
3024  case ISD::JumpTable:
3025  return lowerJumpTable(Op, DAG);
3026  case ISD::GlobalTLSAddress:
3027  return lowerGlobalTLSAddress(Op, DAG);
3028  case ISD::Constant:
3029  return lowerConstant(Op, DAG, Subtarget);
3030  case ISD::SELECT:
3031  return lowerSELECT(Op, DAG);
3032  case ISD::BRCOND:
3033  return lowerBRCOND(Op, DAG);
3034  case ISD::VASTART:
3035  return lowerVASTART(Op, DAG);
3036  case ISD::FRAMEADDR:
3037  return lowerFRAMEADDR(Op, DAG);
3038  case ISD::RETURNADDR:
3039  return lowerRETURNADDR(Op, DAG);
3040  case ISD::SHL_PARTS:
3041  return lowerShiftLeftParts(Op, DAG);
3042  case ISD::SRA_PARTS:
3043  return lowerShiftRightParts(Op, DAG, true);
3044  case ISD::SRL_PARTS:
3045  return lowerShiftRightParts(Op, DAG, false);
3046  case ISD::BITCAST: {
3047  SDLoc DL(Op);
3048  EVT VT = Op.getValueType();
3049  SDValue Op0 = Op.getOperand(0);
3050  EVT Op0VT = Op0.getValueType();
3051  MVT XLenVT = Subtarget.getXLenVT();
3052  if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
3053  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
3054  SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
3055  return FPConv;
3056  }
3057  if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
3058  Subtarget.hasStdExtF()) {
3059  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
3060  SDValue FPConv =
3061  DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
3062  return FPConv;
3063  }
3064 
3065  // Consider other scalar<->scalar casts as legal if the types are legal.
3066  // Otherwise expand them.
3067  if (!VT.isVector() && !Op0VT.isVector()) {
3068  if (isTypeLegal(VT) && isTypeLegal(Op0VT))
3069  return Op;
3070  return SDValue();
3071  }
3072 
3073  assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
3074  "Unexpected types");
3075 
3076  if (VT.isFixedLengthVector()) {
3077  // We can handle fixed length vector bitcasts with a simple replacement
3078  // in isel.
3079  if (Op0VT.isFixedLengthVector())
3080  return Op;
3081  // When bitcasting from scalar to fixed-length vector, insert the scalar
3082  // into a one-element vector of the result type, and perform a vector
3083  // bitcast.
3084  if (!Op0VT.isVector()) {
3085  EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
3086  if (!isTypeLegal(BVT))
3087  return SDValue();
3088  return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
3089  DAG.getUNDEF(BVT), Op0,
3090  DAG.getConstant(0, DL, XLenVT)));
3091  }
3092  return SDValue();
3093  }
3094  // Custom-legalize bitcasts from fixed-length vector types to scalar types
3095  // thus: bitcast the vector to a one-element vector type whose element type
3096  // is the same as the result type, and extract the first element.
3097  if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
3098  EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
3099  if (!isTypeLegal(BVT))
3100  return SDValue();
3101  SDValue BVec = DAG.getBitcast(BVT, Op0);
3102  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
3103  DAG.getConstant(0, DL, XLenVT));
3104  }
3105  return SDValue();
3106  }
3108  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3110  return LowerINTRINSIC_W_CHAIN(Op, DAG);
3111  case ISD::INTRINSIC_VOID:
3112  return LowerINTRINSIC_VOID(Op, DAG);
3113  case ISD::BSWAP:
3114  case ISD::BITREVERSE: {
3115  MVT VT = Op.getSimpleValueType();
3116  SDLoc DL(Op);
3117  if (Subtarget.hasStdExtZbp()) {
3118  // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
3119  // Start with the maximum immediate value which is the bitwidth - 1.
3120  unsigned Imm = VT.getSizeInBits() - 1;
3121  // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
3122  if (Op.getOpcode() == ISD::BSWAP)
3123  Imm &= ~0x7U;
3124  return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
3125  DAG.getConstant(Imm, DL, VT));
3126  }
3127  assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
3128  assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
3129  // Expand bitreverse to a bswap(rev8) followed by brev8.
3130  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
3131  // We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
3132  // as brev8 by an isel pattern.
3133  return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
3134  DAG.getConstant(7, DL, VT));
3135  }
3136  case ISD::FSHL:
3137  case ISD::FSHR: {
3138  MVT VT = Op.getSimpleValueType();
3139  assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
3140  SDLoc DL(Op);
3141  // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
3142  // use log(XLen) bits. Mask the shift amount accordingly to prevent
3143  // accidentally setting the extra bit.
3144  unsigned ShAmtWidth = Subtarget.getXLen() - 1;
3145  SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
3146  DAG.getConstant(ShAmtWidth, DL, VT));
3147  // fshl and fshr concatenate their operands in the same order. fsr and fsl
3148  // instruction use different orders. fshl will return its first operand for
3149  // shift of zero, fshr will return its second operand. fsl and fsr both
3150  // return rs1 so the ISD nodes need to have different operand orders.
3151  // Shift amount is in rs2.
3152  SDValue Op0 = Op.getOperand(0);
3153  SDValue Op1 = Op.getOperand(1);
3154  unsigned Opc = RISCVISD::FSL;
3155  if (Op.getOpcode() == ISD::FSHR) {
3156  std::swap(Op0, Op1);
3157  Opc = RISCVISD::FSR;
3158  }
3159  return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
3160  }
3161  case ISD::TRUNCATE:
3162  // Only custom-lower vector truncates
3163  if (!Op.getSimpleValueType().isVector())
3164  return Op;
3165  return lowerVectorTruncLike(Op, DAG);
3166  case ISD::ANY_EXTEND:
3167  case ISD::ZERO_EXTEND:
3168  if (Op.getOperand(0).getValueType().isVector() &&
3169  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3170  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
3171  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
3172  case ISD::SIGN_EXTEND:
3173  if (Op.getOperand(0).getValueType().isVector() &&
3174  Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3175  return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
3176  return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
3178  return lowerSPLAT_VECTOR_PARTS(Op, DAG);
3180  return lowerINSERT_VECTOR_ELT(Op, DAG);
3182  return lowerEXTRACT_VECTOR_ELT(Op, DAG);
3183  case ISD::VSCALE: {
3184  MVT VT = Op.getSimpleValueType();
3185  SDLoc DL(Op);
3186  SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
3187  // We define our scalable vector types for lmul=1 to use a 64 bit known
3188  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
3189  // vscale as VLENB / 8.
3190  static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
3191  if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
3192  report_fatal_error("Support for VLEN==32 is incomplete.");
3193  // We assume VLENB is a multiple of 8. We manually choose the best shift
3194  // here because SimplifyDemandedBits isn't always able to simplify it.
3195  uint64_t Val = Op.getConstantOperandVal(0);
3196  if (isPowerOf2_64(Val)) {
3197  uint64_t Log2 = Log2_64(Val);
3198  if (Log2 < 3)
3199  return DAG.getNode(ISD::SRL, DL, VT, VLENB,
3200  DAG.getConstant(3 - Log2, DL, VT));
3201  if (Log2 > 3)
3202  return DAG.getNode(ISD::SHL, DL, VT, VLENB,
3203  DAG.getConstant(Log2 - 3, DL, VT));
3204  return VLENB;
3205  }
3206  // If the multiplier is a multiple of 8, scale it down to avoid needing
3207  // to shift the VLENB value.
3208  if ((Val % 8) == 0)
3209  return DAG.getNode(ISD::MUL, DL, VT, VLENB,
3210  DAG.getConstant(Val / 8, DL, VT));
3211 
3212  SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
3213  DAG.getConstant(3, DL, VT));
3214  return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
3215  }
3216  case ISD::FPOWI: {
3217  // Custom promote f16 powi with illegal i32 integer type on RV64. Once
3218  // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
3219  if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
3220  Op.getOperand(1).getValueType() == MVT::i32) {
3221  SDLoc DL(Op);
3222  SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
3223  SDValue Powi =
3224  DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
3225  return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
3226  DAG.getIntPtrConstant(0, DL));
3227  }
3228  return SDValue();
3229  }
3230  case ISD::FP_EXTEND:
3231  case ISD::FP_ROUND:
3232  if (!Op.getValueType().isVector())
3233  return Op;
3234  return lowerVectorFPExtendOrRoundLike(Op, DAG);
3235  case ISD::FP_TO_SINT:
3236  case ISD::FP_TO_UINT:
3237  case ISD::SINT_TO_FP:
3238  case ISD::UINT_TO_FP: {
3239  // RVV can only do fp<->int conversions to types half/double the size as
3240  // the source. We custom-lower any conversions that do two hops into
3241  // sequences.
3242  MVT VT = Op.getSimpleValueType();
3243  if (!VT.isVector())
3244  return Op;
3245  SDLoc DL(Op);
3246  SDValue Src = Op.getOperand(0);
3247  MVT EltVT = VT.getVectorElementType();
3248  MVT SrcVT = Src.getSimpleValueType();
3249  MVT SrcEltVT = SrcVT.getVectorElementType();
3250  unsigned EltSize = EltVT.getSizeInBits();
3251  unsigned SrcEltSize = SrcEltVT.getSizeInBits();
3252  assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
3253  "Unexpected vector element types");
3254 
3255  bool IsInt2FP = SrcEltVT.isInteger();
3256  // Widening conversions
3257  if (EltSize > (2 * SrcEltSize)) {
3258  if (IsInt2FP) {
3259  // Do a regular integer sign/zero extension then convert to float.
3260  MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize),
3261  VT.getVectorElementCount());
3262  unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
3264  : ISD::SIGN_EXTEND;
3265  SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
3266  return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
3267  }
3268  // FP2Int
3269  assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
3270  // Do one doubling fp_extend then complete the operation by converting
3271  // to int.
3272  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3273  SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
3274  return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
3275  }
3276 
3277  // Narrowing conversions
3278  if (SrcEltSize > (2 * EltSize)) {
3279  if (IsInt2FP) {
3280  // One narrowing int_to_fp, then an fp_round.
3281  assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
3282  MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
3283  SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
3284  return DAG.getFPExtendOrRound(Int2FP, DL, VT);
3285  }
3286  // FP2Int
3287  // One narrowing fp_to_int, then truncate the integer. If the float isn't
3288  // representable by the integer, the result is poison.
3289  MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
3290  VT.getVectorElementCount());
3291  SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
3292  return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
3293  }
3294 
3295  // Scalable vectors can exit here. Patterns will handle equally-sized
3296  // conversions halving/doubling ones.
3297  if (!VT.isFixedLengthVector())
3298  return Op;
3299 
3300  // For fixed-length vectors we lower to a custom "VL" node.
3301  unsigned RVVOpc = 0;
3302  switch (Op.getOpcode()) {
3303  default:
3304  llvm_unreachable("Impossible opcode");
3305  case ISD::FP_TO_SINT:
3306  RVVOpc = RISCVISD::FP_TO_SINT_VL;
3307  break;
3308  case ISD::FP_TO_UINT:
3309  RVVOpc = RISCVISD::FP_TO_UINT_VL;
3310  break;
3311  case ISD::SINT_TO_FP:
3312  RVVOpc = RISCVISD::SINT_TO_FP_VL;
3313  break;
3314  case ISD::UINT_TO_FP:
3315  RVVOpc = RISCVISD::UINT_TO_FP_VL;
3316  break;
3317  }
3318 
3319  MVT ContainerVT, SrcContainerVT;
3320  // Derive the reference container type from the larger vector type.
3321  if (SrcEltSize > EltSize) {
3322  SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
3323  ContainerVT =
3324  SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
3325  } else {
3326  ContainerVT = getContainerForFixedLengthVector(VT);
3327  SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
3328  }
3329 
3330  SDValue Mask, VL;
3331  std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3332 
3333  Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3334  Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
3335  return convertFromScalableVector(VT, Src, DAG, Subtarget);
3336  }
3337  case ISD::FP_TO_SINT_SAT:
3338  case ISD::FP_TO_UINT_SAT:
3339  return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
3340  case ISD::FTRUNC:
3341  case ISD::FCEIL:
3342  case ISD::FFLOOR:
3343  return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
3344  case ISD::FROUND:
3345  return lowerFROUND(Op, DAG);
3346  case ISD::VECREDUCE_ADD:
3347  case ISD::VECREDUCE_UMAX:
3348  case ISD::VECREDUCE_SMAX:
3349  case ISD::VECREDUCE_UMIN:
3350  case ISD::VECREDUCE_SMIN:
3351  return lowerVECREDUCE(Op, DAG);
3352  case ISD::VECREDUCE_AND:
3353  case ISD::VECREDUCE_OR:
3354  case ISD::VECREDUCE_XOR:
3355  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
3356  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
3357  return lowerVECREDUCE(Op, DAG);
3358  case ISD::VECREDUCE_FADD:
3360  case ISD::VECREDUCE_FMIN:
3361  case ISD::VECREDUCE_FMAX:
3362  return lowerFPVECREDUCE(Op, DAG);
3363  case ISD::VP_REDUCE_ADD:
3364  case ISD::VP_REDUCE_UMAX:
3365  case ISD::VP_REDUCE_SMAX:
3366  case ISD::VP_REDUCE_UMIN:
3367  case ISD::VP_REDUCE_SMIN:
3368  case ISD::VP_REDUCE_FADD:
3369  case ISD::VP_REDUCE_SEQ_FADD:
3370  case ISD::VP_REDUCE_FMIN:
3371  case ISD::VP_REDUCE_FMAX:
3372  return lowerVPREDUCE(Op, DAG);
3373  case ISD::VP_REDUCE_AND:
3374  case ISD::VP_REDUCE_OR:
3375  case ISD::VP_REDUCE_XOR:
3376  if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
3377  return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
3378  return lowerVPREDUCE(Op, DAG);
3379  case ISD::INSERT_SUBVECTOR:
3380  return lowerINSERT_SUBVECTOR(Op, DAG);
3382  return lowerEXTRACT_SUBVECTOR(Op, DAG);
3383  case ISD::STEP_VECTOR:
3384  return lowerSTEP_VECTOR(Op, DAG);
3385  case ISD::VECTOR_REVERSE:
3386  return lowerVECTOR_REVERSE(Op, DAG);
3387  case ISD::VECTOR_SPLICE:
3388  return lowerVECTOR_SPLICE(Op, DAG);
3389  case ISD::BUILD_VECTOR:
3390  return lowerBUILD_VECTOR(Op, DAG, Subtarget);
3391  case ISD::SPLAT_VECTOR:
3392  if (Op.getValueType().getVectorElementType() == MVT::i1)
3393  return lowerVectorMaskSplat(Op, DAG);
3394  return SDValue();
3395  case ISD::VECTOR_SHUFFLE:
3396  return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
3397  case ISD::CONCAT_VECTORS: {
3398  // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
3399  // better than going through the stack, as the default expansion does.
3400  SDLoc DL(Op);
3401  MVT VT = Op.getSimpleValueType();
3402  unsigned NumOpElts =
3403  Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
3404  SDValue Vec = DAG.getUNDEF(VT);
3405  for (const auto &OpIdx : enumerate(Op->ops())) {
3406  SDValue SubVec = OpIdx.value();
3407  // Don't insert undef subvectors.
3408  if (SubVec.isUndef())
3409  continue;
3410  Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
3411  DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
3412  }
3413  return Vec;
3414  }
3415  case ISD::LOAD:
3416  if (auto V = expandUnalignedRVVLoad(Op, DAG))
3417  return V;
3418  if (Op.getValueType().isFixedLengthVector())
3419  return lowerFixedLengthVectorLoadToRVV(Op, DAG);
3420  return Op;
3421  case ISD::STORE:
3422  if (auto V = expandUnalignedRVVStore(Op, DAG))
3423  return V;
3424  if (Op.getOperand(1).getValueType().isFixedLengthVector())
3425  return lowerFixedLengthVectorStoreToRVV(Op, DAG);
3426  return Op;
3427  case ISD::MLOAD:
3428  case ISD::VP_LOAD:
3429  return lowerMaskedLoad(Op, DAG);
3430  case ISD::MSTORE:
3431  case ISD::VP_STORE:
3432  return lowerMaskedStore(Op, DAG);
3433  case ISD::SETCC:
3434  return lowerFixedLengthVectorSetccToRVV(Op, DAG);
3435  case ISD::ADD:
3436  return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
3437  case ISD::SUB:
3438  return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
3439  case ISD::MUL:
3440  return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
3441  case ISD::MULHS:
3442  return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
3443  case ISD::MULHU:
3444  return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
3445  case ISD::AND:
3446  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
3448  case ISD::OR:
3449  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
3450  RISCVISD::OR_VL);
3451  case ISD::XOR:
3452  return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
3454  case ISD::SDIV:
3455  return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
3456  case ISD::SREM:
3457  return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
3458  case ISD::UDIV:
3459  return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
3460  case ISD::UREM:
3461  return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
3462  case ISD::SHL:
3463  case ISD::SRA:
3464  case ISD::SRL:
3465  if (Op.getSimpleValueType().isFixedLengthVector())
3466  return lowerFixedLengthVectorShiftToRVV(Op, DAG);
3467  // This can be called for an i32 shift amount that needs to be promoted.
3468  assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
3469  "Unexpected custom legalisation");
3470  return SDValue();
3471  case ISD::SADDSAT:
3472  return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
3473  case ISD::UADDSAT:
3474  return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
3475  case ISD::SSUBSAT:
3476  return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
3477  case ISD::USUBSAT:
3478  return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
3479  case ISD::FADD:
3480  return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
3481  case ISD::FSUB:
3482  return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
3483  case ISD::FMUL:
3484  return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
3485  case ISD::FDIV:
3486  return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
3487  case ISD::FNEG:
3488  return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
3489  case ISD::FABS:
3490  return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
3491  case ISD::FSQRT:
3492  return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
3493  case ISD::FMA:
3494  return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL);
3495  case ISD::SMIN:
3496  return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
3497  case ISD::SMAX:
3498  return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
3499  case ISD::UMIN:
3500  return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
3501  case ISD::UMAX:
3502  return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
3503  case ISD::FMINNUM:
3504  return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
3505  case ISD::FMAXNUM:
3506  return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
3507  case ISD::ABS:
3508  return lowerABS(Op, DAG);
3509  case ISD::CTLZ_ZERO_UNDEF:
3510  case ISD::CTTZ_ZERO_UNDEF:
3511  return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
3512  case ISD::VSELECT:
3513  return lowerFixedLengthVectorSelectToRVV(Op, DAG);
3514  case ISD::FCOPYSIGN:
3515  return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
3516  case ISD::MGATHER:
3517  case ISD::VP_GATHER:
3518  return lowerMaskedGather(Op, DAG);
3519  case ISD::MSCATTER:
3520  case ISD::VP_SCATTER:
3521  return lowerMaskedScatter(Op, DAG);
3522  case ISD::FLT_ROUNDS_:
3523  return lowerGET_ROUNDING(Op, DAG);
3524  case ISD::SET_ROUNDING:
3525  return lowerSET_ROUNDING(Op, DAG);
3526  case ISD::EH_DWARF_CFA:
3527  return lowerEH_DWARF_CFA(Op, DAG);
3528  case ISD::VP_SELECT:
3529  return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
3530  case ISD::VP_MERGE:
3531  return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
3532  case ISD::VP_ADD:
3533  return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
3534  case ISD::VP_SUB:
3535  return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
3536  case ISD::VP_MUL:
3537  return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
3538  case ISD::VP_SDIV:
3539  return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
3540  case ISD::VP_UDIV:
3541  return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
3542  case ISD::VP_SREM:
3543  return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
3544  case ISD::VP_UREM:
3545  return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
3546  case ISD::VP_AND:
3547  return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
3548  case ISD::VP_OR:
3549  return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
3550  case ISD::VP_XOR:
3551  return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
3552  case ISD::VP_ASHR:
3553  return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
3554  case ISD::VP_LSHR:
3555  return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
3556  case ISD::VP_SHL:
3557  return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
3558  case ISD::VP_FADD:
3559  return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
3560  case ISD::VP_FSUB:
3561  return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
3562  case ISD::VP_FMUL:
3563  return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
3564  case ISD::VP_FDIV:
3565  return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
3566  case ISD::VP_FNEG:
3567  return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
3568  case ISD::VP_FMA:
3569  return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
3570  case ISD::VP_SIGN_EXTEND:
3571  case ISD::VP_ZERO_EXTEND:
3572  if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3573  return lowerVPExtMaskOp(Op, DAG);
3574  return lowerVPOp(Op, DAG,
3575  Op.getOpcode() == ISD::VP_SIGN_EXTEND
3577  : RISCVISD::VZEXT_VL);
3578  case ISD::VP_TRUNCATE:
3579  return lowerVectorTruncLike(Op, DAG);
3580  case ISD::VP_FP_EXTEND:
3581  case ISD::VP_FP_ROUND:
3582  return lowerVectorFPExtendOrRoundLike(Op, DAG);
3583  case ISD::VP_FPTOSI:
3584  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
3585  case ISD::VP_FPTOUI:
3586  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_UINT_VL);
3587  case ISD::VP_SITOFP:
3588  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
3589  case ISD::VP_UITOFP:
3590  return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
3591  case ISD::VP_SETCC:
3592  if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
3593  return lowerVPSetCCMaskOp(Op, DAG);
3594  return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
3595  }
3596 }
3597 
3599  SelectionDAG &DAG, unsigned Flags) {
3600  return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
3601 }
3602 
3604  SelectionDAG &DAG, unsigned Flags) {
3605  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
3606  Flags);
3607 }
3608 
3610  SelectionDAG &DAG, unsigned Flags) {
3611  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
3612  N->getOffset(), Flags);
3613 }
3614 
3616  SelectionDAG &DAG, unsigned Flags) {
3617  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
3618 }
3619 
3620 template <class NodeTy>
3621 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3622  bool IsLocal) const {
3623  SDLoc DL(N);
3624  EVT Ty = getPointerTy(DAG.getDataLayout());
3625 
3626  if (isPositionIndependent()) {
3627  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3628  if (IsLocal)
3629  // Use PC-relative addressing to access the symbol. This generates the
3630  // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
3631  // %pcrel_lo(auipc)).
3632  return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
3633 
3634  // Use PC-relative addressing to access the GOT for this symbol, then load
3635  // the address from the GOT. This generates the pattern (PseudoLA sym),
3636  // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
3637  MachineFunction &MF = DAG.getMachineFunction();
3642  LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3643  SDValue Load =
3645  {DAG.getEntryNode(), Addr}, Ty, MemOp);
3646  return Load;
3647  }
3648 
3649  switch (getTargetMachine().getCodeModel()) {
3650  default:
3651  report_fatal_error("Unsupported code model for lowering");
3652  case CodeModel::Small: {
3653  // Generate a sequence for accessing addresses within the first 2 GiB of
3654  // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
3655  SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
3656  SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
3657  SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
3658  return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
3659  }
3660  case CodeModel::Medium: {
3661  // Generate a sequence for accessing addresses within any 2GiB range within
3662  // the address space. This generates the pattern (PseudoLLA sym), which
3663  // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
3664  SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
3665  return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
3666  }
3667  }
3668 }
3669 
3670 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
3671  SelectionDAG &DAG) const {
3672  SDLoc DL(Op);
3673  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3674  assert(N->getOffset() == 0 && "unexpected offset in global node");
3675 
3676  const GlobalValue *GV = N->getGlobal();
3677  bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
3678  return getAddr(N, DAG, IsLocal);
3679 }
3680 
3681 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
3682  SelectionDAG &DAG) const {
3683  BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
3684 
3685  return getAddr(N, DAG);
3686 }
3687 
3688 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
3689  SelectionDAG &DAG) const {
3690  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
3691 
3692  return getAddr(N, DAG);
3693 }
3694 
3695 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
3696  SelectionDAG &DAG) const {
3697  JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
3698 
3699  return getAddr(N, DAG);
3700 }
3701 
3702 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
3703  SelectionDAG &DAG,
3704  bool UseGOT) const {
3705  SDLoc DL(N);
3706  EVT Ty = getPointerTy(DAG.getDataLayout());
3707  const GlobalValue *GV = N->getGlobal();
3708  MVT XLenVT = Subtarget.getXLenVT();
3709 
3710  if (UseGOT) {
3711  // Use PC-relative addressing to access the GOT for this TLS symbol, then
3712  // load the address from the GOT and add the thread pointer. This generates
3713  // the pattern (PseudoLA_TLS_IE sym), which expands to
3714  // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
3715  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3716  MachineFunction &MF = DAG.getMachineFunction();
3721  LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
3724  {DAG.getEntryNode(), Addr}, Ty, MemOp);
3725 
3726  // Add the thread pointer.
3727  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3728  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
3729  }
3730 
3731  // Generate a sequence for accessing the address relative to the thread
3732  // pointer, with the appropriate adjustment for the thread pointer offset.
3733  // This generates the pattern
3734  // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
3735  SDValue AddrHi =
3737  SDValue AddrAdd =
3739  SDValue AddrLo =
3741 
3742  SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
3743  SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
3744  SDValue MNAdd =
3745  DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
3746  return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
3747 }
3748 
3749 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
3750  SelectionDAG &DAG) const {
3751  SDLoc DL(N);
3752  EVT Ty = getPointerTy(DAG.getDataLayout());
3753  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
3754  const GlobalValue *GV = N->getGlobal();
3755 
3756  // Use a PC-relative addressing mode to access the global dynamic GOT address.
3757  // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
3758  // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
3759  SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
3761 
3762  // Prepare argument list to generate call.
3763  ArgListTy Args;
3764  ArgListEntry Entry;
3765  Entry.Node = Load;
3766  Entry.Ty = CallTy;
3767  Args.push_back(Entry);
3768 
3769  // Setup call to __tls_get_addr.
3771  CLI.setDebugLoc(DL)
3772  .setChain(DAG.getEntryNode())
3773  .setLibCallee(CallingConv::C, CallTy,
3774  DAG.getExternalSymbol("__tls_get_addr", Ty),
3775  std::move(Args));
3776 
3777  return LowerCallTo(CLI).first;
3778 }
3779 
3780 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
3781  SelectionDAG &DAG) const {
3782  SDLoc DL(Op);
3783  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
3784  assert(N->getOffset() == 0 && "unexpected offset in global node");
3785 
3787 
3790  report_fatal_error("In GHC calling convention TLS is not supported");
3791 
3792  SDValue Addr;
3793  switch (Model) {
3794  case TLSModel::LocalExec:
3795  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
3796  break;
3797  case TLSModel::InitialExec:
3798  Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
3799  break;
3802  Addr = getDynamicTLSAddr(N, DAG);
3803  break;
3804  }
3805 
3806  return Addr;
3807 }
3808 
3809 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3810  SDValue CondV = Op.getOperand(0);
3811  SDValue TrueV = Op.getOperand(1);
3812  SDValue FalseV = Op.getOperand(2);
3813  SDLoc DL(Op);
3814  MVT VT = Op.getSimpleValueType();
3815  MVT XLenVT = Subtarget.getXLenVT();
3816 
3817  // Lower vector SELECTs to VSELECTs by splatting the condition.
3818  if (VT.isVector()) {
3819  MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
3820  SDValue CondSplat = VT.isScalableVector()
3821  ? DAG.getSplatVector(SplatCondVT, DL, CondV)
3822  : DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
3823  return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
3824  }
3825 
3826  // If the result type is XLenVT and CondV is the output of a SETCC node
3827  // which also operated on XLenVT inputs, then merge the SETCC node into the
3828  // lowered RISCVISD::SELECT_CC to take advantage of the integer
3829  // compare+branch instructions. i.e.:
3830  // (select (setcc lhs, rhs, cc), truev, falsev)
3831  // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
3832  if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
3833  CondV.getOperand(0).getSimpleValueType() == XLenVT) {
3834  SDValue LHS = CondV.getOperand(0);
3835  SDValue RHS = CondV.getOperand(1);
3836  const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
3837  ISD::CondCode CCVal = CC->get();
3838 
3839  // Special case for a select of 2 constants that have a diffence of 1.
3840  // Normally this is done by DAGCombine, but if the select is introduced by
3841  // type legalization or op legalization, we miss it. Restricting to SETLT
3842  // case for now because that is what signed saturating add/sub need.
3843  // FIXME: We don't need the condition to be SETLT or even a SETCC,
3844  // but we would probably want to swap the true/false values if the condition
3845  // is SETGE/SETLE to avoid an XORI.
3846  if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
3847  CCVal == ISD::SETLT) {
3848  const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
3849  const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
3850  if (TrueVal - 1 == FalseVal)
3851  return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
3852  if (TrueVal + 1 == FalseVal)
3853  return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
3854  }
3855 
3856  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3857 
3858  SDValue TargetCC = DAG.getCondCode(CCVal);
3859  SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
3860  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3861  }
3862 
3863  // Otherwise:
3864  // (select condv, truev, falsev)
3865  // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
3866  SDValue Zero = DAG.getConstant(0, DL, XLenVT);
3867  SDValue SetNE = DAG.getCondCode(ISD::SETNE);
3868 
3869  SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
3870 
3871  return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
3872 }
3873 
3874 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
3875  SDValue CondV = Op.getOperand(1);
3876  SDLoc DL(Op);
3877  MVT XLenVT = Subtarget.getXLenVT();
3878 
3879  if (CondV.getOpcode() == ISD::SETCC &&
3880  CondV.getOperand(0).getValueType() == XLenVT) {
3881  SDValue LHS = CondV.getOperand(0);
3882  SDValue RHS = CondV.getOperand(1);
3883  ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
3884 
3885  translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
3886 
3887  SDValue TargetCC = DAG.getCondCode(CCVal);
3888  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3889  LHS, RHS, TargetCC, Op.getOperand(2));
3890  }
3891 
3892  return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
3893  CondV, DAG.getConstant(0, DL, XLenVT),
3894  DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
3895 }
3896 
3897 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3898  MachineFunction &MF = DAG.getMachineFunction();
3900 
3901  SDLoc DL(Op);
3902  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3903  getPointerTy(MF.getDataLayout()));
3904 
3905  // vastart just stores the address of the VarArgsFrameIndex slot into the
3906  // memory location argument.
3907  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3908  return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
3909  MachinePointerInfo(SV));
3910 }
3911 
3912 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
3913  SelectionDAG &DAG) const {
3914  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3915  MachineFunction &MF = DAG.getMachineFunction();
3916  MachineFrameInfo &MFI = MF.getFrameInfo();
3917  MFI.setFrameAddressIsTaken(true);
3918  Register FrameReg = RI.getFrameRegister(MF);
3919  int XLenInBytes = Subtarget.getXLen() / 8;
3920 
3921  EVT VT = Op.getValueType();
3922  SDLoc DL(Op);
3923  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
3924  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3925  while (Depth--) {
3926  int Offset = -(XLenInBytes * 2);
3927  SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
3928  DAG.getIntPtrConstant(Offset, DL));
3929  FrameAddr =
3930  DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
3931  }
3932  return FrameAddr;
3933 }
3934 
3935 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
3936  SelectionDAG &DAG) const {
3937  const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
3938  MachineFunction &MF = DAG.getMachineFunction();
3939  MachineFrameInfo &MFI = MF.getFrameInfo();
3940  MFI.setReturnAddressIsTaken(true);
3941  MVT XLenVT = Subtarget.getXLenVT();
3942  int XLenInBytes = Subtarget.getXLen() / 8;
3943 
3945  return SDValue();
3946 
3947  EVT VT = Op.getValueType();
3948  SDLoc DL(Op);
3949  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3950  if (Depth) {
3951  int Off = -XLenInBytes;
3952  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3953  SDValue Offset = DAG.getConstant(Off, DL, VT);
3954  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
3955  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
3956  MachinePointerInfo());
3957  }
3958 
3959  // Return the value of the return address register, marking it an implicit
3960  // live-in.
3961  Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
3962  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
3963 }
3964 
3965 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
3966  SelectionDAG &DAG) const {
3967  SDLoc DL(Op);
3968  SDValue Lo = Op.getOperand(0);
3969  SDValue Hi = Op.getOperand(1);
3970  SDValue Shamt = Op.getOperand(2);
3971  EVT VT = Lo.getValueType();
3972 
3973  // if Shamt-XLEN < 0: // Shamt < XLEN
3974  // Lo = Lo << Shamt
3975  // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
3976  // else:
3977  // Lo = 0
3978  // Hi = Lo << (Shamt-XLEN)
3979 
3980  SDValue Zero = DAG.getConstant(0, DL, VT);
3981  SDValue One = DAG.getConstant(1, DL, VT);
3982  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
3983  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
3984  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
3985  SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
3986 
3987  SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
3988  SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
3989  SDValue ShiftRightLo =
3990  DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
3991  SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
3992  SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
3993  SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
3994 
3995  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
3996 
3997  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
3998  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
3999 
4000  SDValue Parts[2] = {Lo, Hi};
4001  return DAG.getMergeValues(Parts, DL);
4002 }
4003 
4004 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
4005  bool IsSRA) const {
4006  SDLoc DL(Op);
4007  SDValue Lo = Op.getOperand(0);
4008  SDValue Hi = Op.getOperand(1);
4009  SDValue Shamt = Op.getOperand(2);
4010  EVT VT = Lo.getValueType();
4011 
4012  // SRA expansion:
4013  // if Shamt-XLEN < 0: // Shamt < XLEN
4014  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
4015  // Hi = Hi >>s Shamt
4016  // else:
4017  // Lo = Hi >>s (Shamt-XLEN);
4018  // Hi = Hi >>s (XLEN-1)
4019  //
4020  // SRL expansion:
4021  // if Shamt-XLEN < 0: // Shamt < XLEN
4022  // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
4023  // Hi = Hi >>u Shamt
4024  // else:
4025  // Lo = Hi >>u (Shamt-XLEN);
4026  // Hi = 0;
4027 
4028  unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
4029 
4030  SDValue Zero = DAG.getConstant(0, DL, VT);
4031  SDValue One = DAG.getConstant(1, DL, VT);
4032  SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
4033  SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
4034  SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
4035  SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
4036 
4037  SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
4038  SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
4039  SDValue ShiftLeftHi =
4040  DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
4041  SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
4042  SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
4043  SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
4044  SDValue HiFalse =
4045  IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
4046 
4047  SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
4048 
4049  Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
4050  Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
4051 
4052  SDValue Parts[2] = {Lo, Hi};
4053  return DAG.getMergeValues(Parts, DL);
4054 }
4055 
4056 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
4057 // legal equivalently-sized i8 type, so we can use that as a go-between.
4058 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
4059  SelectionDAG &DAG) const {
4060  SDLoc DL(Op);
4061  MVT VT = Op.getSimpleValueType();
4062  SDValue SplatVal = Op.getOperand(0);
4063  // All-zeros or all-ones splats are handled specially.
4064  if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
4065  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
4066  return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
4067  }
4068  if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
4069  SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
4070  return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
4071  }
4072  MVT XLenVT = Subtarget.getXLenVT();
4073  assert(SplatVal.getValueType() == XLenVT &&
4074  "Unexpected type for i1 splat value");
4075  MVT InterVT = VT.changeVectorElementType(MVT::i8);
4076  SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
4077  DAG.getConstant(1, DL, XLenVT));
4078  SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
4079  SDValue Zero = DAG.getConstant(0, DL, InterVT);
4080  return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
4081 }
4082 
4083 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
4084 // illegal (currently only vXi64 RV32).
4085 // FIXME: We could also catch non-constant sign-extended i32 values and lower
4086 // them to VMV_V_X_VL.
4087 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
4088  SelectionDAG &DAG) const {
4089  SDLoc DL(Op);
4090  MVT VecVT = Op.getSimpleValueType();
4091  assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
4092  "Unexpected SPLAT_VECTOR_PARTS lowering");
4093 
4094  assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
4095  SDValue Lo = Op.getOperand(0);
4096  SDValue Hi = Op.getOperand(1);
4097 
4098  if (VecVT.isFixedLengthVector()) {
4099  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4100  SDLoc DL(Op);
4101  SDValue Mask, VL;
4102  std::tie(Mask, VL) =
4103  getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4104 
4105  SDValue Res =
4106  splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
4107  return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
4108  }
4109 
4110  if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4111  int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4112  int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4113  // If Hi constant is all the same sign bit as Lo, lower this as a custom
4114  // node in order to try and match RVV vector/scalar instructions.
4115  if ((LoC >> 31) == HiC)
4116  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
4117  Lo, DAG.getRegister(RISCV::X0, MVT::i32));
4118  }
4119 
4120  // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4121  if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4122  isa<ConstantSDNode>(Hi.getOperand(1)) &&
4123  Hi.getConstantOperandVal(1) == 31)
4124  return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
4125  DAG.getRegister(RISCV::X0, MVT::i32));
4126 
4127  // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
4128  return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
4129  DAG.getUNDEF(VecVT), Lo, Hi,
4130  DAG.getRegister(RISCV::X0, MVT::i32));
4131 }
4132 
4133 // Custom-lower extensions from mask vectors by using a vselect either with 1
4134 // for zero/any-extension or -1 for sign-extension:
4135 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
4136 // Note that any-extension is lowered identically to zero-extension.
4137 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
4138  int64_t ExtTrueVal) const {
4139  SDLoc DL(Op);
4140  MVT VecVT = Op.getSimpleValueType();
4141  SDValue Src = Op.getOperand(0);
4142  // Only custom-lower extensions from mask types
4143  assert(Src.getValueType().isVector() &&
4144  Src.getValueType().getVectorElementType() == MVT::i1);
4145 
4146  if (VecVT.isScalableVector()) {
4147  SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
4148  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
4149  return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
4150  }
4151 
4152  MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
4153  MVT I1ContainerVT =
4155 
4156  SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
4157 
4158  SDValue Mask, VL;
4159  std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
4160 
4161  MVT XLenVT = Subtarget.getXLenVT();
4162  SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
4163  SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
4164 
4165  SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4166  DAG.getUNDEF(ContainerVT), SplatZero, VL);
4167  SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
4168  DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
4169  SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
4170  SplatTrueVal, SplatZero, VL);
4171 
4172  return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
4173 }
4174 
4175 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
4176  SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
4177  MVT ExtVT = Op.getSimpleValueType();
4178  // Only custom-lower extensions from fixed-length vector types.
4179  if (!ExtVT.isFixedLengthVector())
4180  return Op;
4181  MVT VT = Op.getOperand(0).getSimpleValueType();
4182  // Grab the canonical container type for the extended type. Infer the smaller
4183  // type from that to ensure the same number of vector elements, as we know
4184  // the LMUL will be sufficient to hold the smaller type.
4185  MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
4186  // Get the extended container type manually to ensure the same number of
4187  // vector elements between source and dest.