LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
41#include "llvm/Support/Debug.h"
47#include <optional>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "riscv-lower"
52
53STATISTIC(NumTailCalls, "Number of tail calls");
54
56 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
57 cl::desc("Give the maximum size (in number of nodes) of the web of "
58 "instructions that we will consider for VW expansion"),
59 cl::init(18));
60
61static cl::opt<bool>
62 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
63 cl::desc("Allow the formation of VW_W operations (e.g., "
64 "VWADD_W) with splat constants"),
65 cl::init(false));
66
68 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
69 cl::desc("Set the minimum number of repetitions of a divisor to allow "
70 "transformation to multiplications by the reciprocal"),
71 cl::init(2));
72
73static cl::opt<int>
75 cl::desc("Give the maximum number of instructions that we will "
76 "use for creating a floating-point immediate value"),
77 cl::init(2));
78
79static cl::opt<bool>
80 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
81 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
82
84 const RISCVSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86
87 RISCVABI::ABI ABI = Subtarget.getTargetABI();
88 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
89
90 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
91 !Subtarget.hasStdExtF()) {
92 errs() << "Hard-float 'f' ABI can't be used for a target that "
93 "doesn't support the F instruction set extension (ignoring "
94 "target-abi)\n";
96 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
97 !Subtarget.hasStdExtD()) {
98 errs() << "Hard-float 'd' ABI can't be used for a target that "
99 "doesn't support the D instruction set extension (ignoring "
100 "target-abi)\n";
101 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
102 }
103
104 switch (ABI) {
105 default:
106 report_fatal_error("Don't know how to lower this ABI");
115 break;
116 }
117
118 MVT XLenVT = Subtarget.getXLenVT();
119
120 // Set up the register classes.
121 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
122 if (Subtarget.is64Bit() && RV64LegalI32)
123 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
124
125 if (Subtarget.hasStdExtZfhmin())
126 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
127 if (Subtarget.hasStdExtZfbfmin())
128 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
129 if (Subtarget.hasStdExtF())
130 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
131 if (Subtarget.hasStdExtD())
132 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
133 if (Subtarget.hasStdExtZhinxmin())
134 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
135 if (Subtarget.hasStdExtZfinx())
136 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
137 if (Subtarget.hasStdExtZdinx()) {
138 if (Subtarget.is64Bit())
139 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
140 else
141 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
142 }
143
144 static const MVT::SimpleValueType BoolVecVTs[] = {
145 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
146 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
147 static const MVT::SimpleValueType IntVecVTs[] = {
148 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
149 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
150 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
151 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
152 MVT::nxv4i64, MVT::nxv8i64};
153 static const MVT::SimpleValueType F16VecVTs[] = {
154 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
155 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
156 static const MVT::SimpleValueType BF16VecVTs[] = {
157 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
158 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
159 static const MVT::SimpleValueType F32VecVTs[] = {
160 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
161 static const MVT::SimpleValueType F64VecVTs[] = {
162 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
163
164 if (Subtarget.hasVInstructions()) {
165 auto addRegClassForRVV = [this](MVT VT) {
166 // Disable the smallest fractional LMUL types if ELEN is less than
167 // RVVBitsPerBlock.
168 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
169 if (VT.getVectorMinNumElements() < MinElts)
170 return;
171
172 unsigned Size = VT.getSizeInBits().getKnownMinValue();
173 const TargetRegisterClass *RC;
175 RC = &RISCV::VRRegClass;
176 else if (Size == 2 * RISCV::RVVBitsPerBlock)
177 RC = &RISCV::VRM2RegClass;
178 else if (Size == 4 * RISCV::RVVBitsPerBlock)
179 RC = &RISCV::VRM4RegClass;
180 else if (Size == 8 * RISCV::RVVBitsPerBlock)
181 RC = &RISCV::VRM8RegClass;
182 else
183 llvm_unreachable("Unexpected size");
184
185 addRegisterClass(VT, RC);
186 };
187
188 for (MVT VT : BoolVecVTs)
189 addRegClassForRVV(VT);
190 for (MVT VT : IntVecVTs) {
191 if (VT.getVectorElementType() == MVT::i64 &&
192 !Subtarget.hasVInstructionsI64())
193 continue;
194 addRegClassForRVV(VT);
195 }
196
197 if (Subtarget.hasVInstructionsF16Minimal())
198 for (MVT VT : F16VecVTs)
199 addRegClassForRVV(VT);
200
201 if (Subtarget.hasVInstructionsBF16())
202 for (MVT VT : BF16VecVTs)
203 addRegClassForRVV(VT);
204
205 if (Subtarget.hasVInstructionsF32())
206 for (MVT VT : F32VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsF64())
210 for (MVT VT : F64VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.useRVVForFixedLengthVectors()) {
214 auto addRegClassForFixedVectors = [this](MVT VT) {
215 MVT ContainerVT = getContainerForFixedLengthVector(VT);
216 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
217 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
218 addRegisterClass(VT, TRI.getRegClass(RCID));
219 };
221 if (useRVVForFixedLengthVectorVT(VT))
222 addRegClassForFixedVectors(VT);
223
225 if (useRVVForFixedLengthVectorVT(VT))
226 addRegClassForFixedVectors(VT);
227 }
228 }
229
230 // Compute derived properties from the register classes.
232
234
236 MVT::i1, Promote);
237 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
239 MVT::i1, Promote);
240
241 // TODO: add all necessary setOperationAction calls.
243
246 if (RV64LegalI32 && Subtarget.is64Bit())
250 if (RV64LegalI32 && Subtarget.is64Bit())
252
259
260 if (RV64LegalI32 && Subtarget.is64Bit())
262
264
267 if (RV64LegalI32 && Subtarget.is64Bit())
269
271
273
274 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
275 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
276
277 if (Subtarget.is64Bit()) {
279
280 if (!RV64LegalI32) {
283 MVT::i32, Custom);
285 MVT::i32, Custom);
286 if (!Subtarget.hasStdExtZbb())
288 } else {
290 if (Subtarget.hasStdExtZbb()) {
293 }
294 }
296 } else {
298 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
299 nullptr);
300 setLibcallName(RTLIB::MULO_I64, nullptr);
301 }
302
303 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
305 if (RV64LegalI32 && Subtarget.is64Bit())
307 } else if (Subtarget.is64Bit()) {
309 if (!RV64LegalI32)
311 else
313 } else {
315 }
316
317 if (!Subtarget.hasStdExtM()) {
319 XLenVT, Expand);
320 if (RV64LegalI32 && Subtarget.is64Bit())
322 Promote);
323 } else if (Subtarget.is64Bit()) {
324 if (!RV64LegalI32)
326 {MVT::i8, MVT::i16, MVT::i32}, Custom);
327 }
328
329 if (RV64LegalI32 && Subtarget.is64Bit()) {
333 Expand);
334 }
335
338 Expand);
339
341 Custom);
342
343 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
344 if (!RV64LegalI32 && Subtarget.is64Bit())
346 } else if (Subtarget.hasVendorXTHeadBb()) {
347 if (Subtarget.is64Bit())
350 } else if (Subtarget.hasVendorXCVbitmanip()) {
352 } else {
354 if (RV64LegalI32 && Subtarget.is64Bit())
356 }
357
358 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
359 // pattern match it directly in isel.
361 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
362 Subtarget.hasVendorXTHeadBb())
363 ? Legal
364 : Expand);
365 if (RV64LegalI32 && Subtarget.is64Bit())
367 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
368 Subtarget.hasVendorXTHeadBb())
369 ? Promote
370 : Expand);
371
372
373 if (Subtarget.hasVendorXCVbitmanip()) {
375 } else {
376 // Zbkb can use rev8+brev8 to implement bitreverse.
378 Subtarget.hasStdExtZbkb() ? Custom : Expand);
379 }
380
381 if (Subtarget.hasStdExtZbb()) {
383 Legal);
384 if (RV64LegalI32 && Subtarget.is64Bit())
386 Promote);
387
388 if (Subtarget.is64Bit()) {
389 if (RV64LegalI32)
391 else
393 }
394 } else if (!Subtarget.hasVendorXCVbitmanip()) {
396 if (RV64LegalI32 && Subtarget.is64Bit())
398 }
399
400 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
401 Subtarget.hasVendorXCVbitmanip()) {
402 // We need the custom lowering to make sure that the resulting sequence
403 // for the 32bit case is efficient on 64bit targets.
404 if (Subtarget.is64Bit()) {
405 if (RV64LegalI32) {
407 Subtarget.hasStdExtZbb() ? Legal : Promote);
408 if (!Subtarget.hasStdExtZbb())
410 } else
412 }
413 } else {
415 if (RV64LegalI32 && Subtarget.is64Bit())
417 }
418
419 if (!RV64LegalI32 && Subtarget.is64Bit() &&
420 !Subtarget.hasShortForwardBranchOpt())
422
423 // We can use PseudoCCSUB to implement ABS.
424 if (Subtarget.hasShortForwardBranchOpt())
426
427 if (!Subtarget.hasVendorXTHeadCondMov()) {
429 if (RV64LegalI32 && Subtarget.is64Bit())
431 }
432
433 static const unsigned FPLegalNodeTypes[] = {
440
441 static const ISD::CondCode FPCCToExpand[] = {
445
446 static const unsigned FPOpToExpand[] = {
448 ISD::FREM};
449
450 static const unsigned FPRndMode[] = {
453
454 if (Subtarget.hasStdExtZfhminOrZhinxmin())
456
457 static const unsigned ZfhminZfbfminPromoteOps[] = {
467
468 if (Subtarget.hasStdExtZfbfmin()) {
477 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
479 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
480 // DAGCombiner::visitFP_ROUND probably needs improvements first.
482 }
483
484 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
485 if (Subtarget.hasStdExtZfhOrZhinx()) {
486 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
487 setOperationAction(FPRndMode, MVT::f16,
488 Subtarget.hasStdExtZfa() ? Legal : Custom);
491 } else {
492 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
495 MVT::f16, Legal);
496 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
497 // DAGCombiner::visitFP_ROUND probably needs improvements first.
499 }
500
503 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
506
508 Subtarget.hasStdExtZfa() ? Legal : Promote);
513 MVT::f16, Promote);
514
515 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
516 // complete support for all operations in LegalizeDAG.
521 MVT::f16, Promote);
522
523 // We need to custom promote this.
524 if (Subtarget.is64Bit())
526
528 Subtarget.hasStdExtZfa() ? Legal : Custom);
529 }
530
531 if (Subtarget.hasStdExtFOrZfinx()) {
532 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
533 setOperationAction(FPRndMode, MVT::f32,
534 Subtarget.hasStdExtZfa() ? Legal : Custom);
535 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
539 setOperationAction(FPOpToExpand, MVT::f32, Expand);
540 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
541 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
542 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
543 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
547 Subtarget.isSoftFPABI() ? LibCall : Custom);
550
551 if (Subtarget.hasStdExtZfa()) {
554 } else {
556 }
557 }
558
559 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
561
562 if (Subtarget.hasStdExtDOrZdinx()) {
563 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
564
565 if (!Subtarget.is64Bit())
567
568 if (Subtarget.hasStdExtZfa()) {
569 setOperationAction(FPRndMode, MVT::f64, Legal);
572 } else {
573 if (Subtarget.is64Bit())
574 setOperationAction(FPRndMode, MVT::f64, Custom);
575
577 }
578
581 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
586 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
587 setOperationAction(FPOpToExpand, MVT::f64, Expand);
588 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
589 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
590 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
591 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
595 Subtarget.isSoftFPABI() ? LibCall : Custom);
598 }
599
600 if (Subtarget.is64Bit()) {
603 MVT::i32, Custom);
605 }
606
607 if (Subtarget.hasStdExtFOrZfinx()) {
609 Custom);
610
613 XLenVT, Legal);
614
615 if (RV64LegalI32 && Subtarget.is64Bit())
618 MVT::i32, Legal);
619
622 }
623
626 XLenVT, Custom);
627
629
630 if (Subtarget.is64Bit())
632
633 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
634 // Unfortunately this can't be determined just from the ISA naming string.
636 Subtarget.is64Bit() ? Legal : Custom);
638 Subtarget.is64Bit() ? Legal : Custom);
639
642 if (Subtarget.is64Bit())
644
645 if (Subtarget.hasStdExtZicbop()) {
647 }
648
649 if (Subtarget.hasStdExtA()) {
651 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
653 else
655 } else if (Subtarget.hasForcedAtomics()) {
657 } else {
659 }
660
662
664
665 if (Subtarget.hasVInstructions()) {
667
669 if (RV64LegalI32 && Subtarget.is64Bit())
671
672 // RVV intrinsics may have illegal operands.
673 // We also need to custom legalize vmv.x.s.
676 {MVT::i8, MVT::i16}, Custom);
677 if (Subtarget.is64Bit())
679 MVT::i32, Custom);
680 else
682 MVT::i64, Custom);
683
685 MVT::Other, Custom);
686
687 static const unsigned IntegerVPOps[] = {
688 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
689 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
690 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
691 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
692 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
693 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
694 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
695 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
696 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
697 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
698 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
699 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
700 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
701 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF};
702
703 static const unsigned FloatingPointVPOps[] = {
704 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
705 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
706 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
707 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
708 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
709 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
710 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
711 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
712 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
713 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
714 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
715 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
716 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
717 ISD::VP_REDUCE_FMAXIMUM};
718
719 static const unsigned IntegerVecReduceOps[] = {
723
724 static const unsigned FloatingPointVecReduceOps[] = {
727
728 if (!Subtarget.is64Bit()) {
729 // We must custom-lower certain vXi64 operations on RV32 due to the vector
730 // element type being illegal.
732 MVT::i64, Custom);
733
734 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
735
736 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
737 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
738 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
739 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
740 MVT::i64, Custom);
741 }
742
743 for (MVT VT : BoolVecVTs) {
744 if (!isTypeLegal(VT))
745 continue;
746
748
749 // Mask VTs are custom-expanded into a series of standard nodes
753 VT, Custom);
754
756 Custom);
757
760 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
761 Expand);
762
763 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
764 Custom);
765
766 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
767
770 Custom);
771
773 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
774 Custom);
775
776 // RVV has native int->float & float->int conversions where the
777 // element type sizes are within one power-of-two of each other. Any
778 // wider distances between type sizes have to be lowered as sequences
779 // which progressively narrow the gap in stages.
784 VT, Custom);
786 Custom);
787
788 // Expand all extending loads to types larger than this, and truncating
789 // stores from types larger than this.
791 setTruncStoreAction(VT, OtherVT, Expand);
793 OtherVT, Expand);
794 }
795
796 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
797 ISD::VP_TRUNCATE, ISD::VP_SETCC},
798 VT, Custom);
799
802
804
805 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
806 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
807
810 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
811 }
812
813 for (MVT VT : IntVecVTs) {
814 if (!isTypeLegal(VT))
815 continue;
816
819
820 // Vectors implement MULHS/MULHU.
822
823 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
824 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
826
828 Legal);
829
831
832 // Custom-lower extensions and truncations from/to mask types.
834 VT, Custom);
835
836 // RVV has native int->float & float->int conversions where the
837 // element type sizes are within one power-of-two of each other. Any
838 // wider distances between type sizes have to be lowered as sequences
839 // which progressively narrow the gap in stages.
844 VT, Custom);
846 Custom);
850 VT, Legal);
851
852 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
853 // nodes which truncate by one power of two at a time.
855
856 // Custom-lower insert/extract operations to simplify patterns.
858 Custom);
859
860 // Custom-lower reduction operations to set up the corresponding custom
861 // nodes' operands.
862 setOperationAction(IntegerVecReduceOps, VT, Custom);
863
864 setOperationAction(IntegerVPOps, VT, Custom);
865
867
869 VT, Custom);
870
872 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
873 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
874 VT, Custom);
875
878 VT, Custom);
879
882
884
886 setTruncStoreAction(VT, OtherVT, Expand);
888 OtherVT, Expand);
889 }
890
893
894 // Splice
896
897 if (Subtarget.hasStdExtZvkb()) {
899 setOperationAction(ISD::VP_BSWAP, VT, Custom);
900 } else {
901 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
903 }
904
905 if (Subtarget.hasStdExtZvbb()) {
907 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
908 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
909 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
910 VT, Custom);
911 } else {
912 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
914 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
915 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
916 VT, Expand);
917
918 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
919 // range of f32.
920 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
921 if (isTypeLegal(FloatVT)) {
923 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
924 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
925 VT, Custom);
926 }
927 }
928 }
929
930 // Expand various CCs to best match the RVV ISA, which natively supports UNE
931 // but no other unordered comparisons, and supports all ordered comparisons
932 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
933 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
934 // and we pattern-match those back to the "original", swapping operands once
935 // more. This way we catch both operations and both "vf" and "fv" forms with
936 // fewer patterns.
937 static const ISD::CondCode VFPCCToExpand[] = {
941 };
942
943 // TODO: support more ops.
944 static const unsigned ZvfhminPromoteOps[] = {
952
953 // TODO: support more vp ops.
954 static const unsigned ZvfhminPromoteVPOps[] = {
955 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
956 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
957 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
958 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
959 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
960 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
961 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
962 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
963 ISD::VP_FMAXIMUM, ISD::VP_REDUCE_FMINIMUM, ISD::VP_REDUCE_FMAXIMUM};
964
965 // Sets common operation actions on RVV floating-point vector types.
966 const auto SetCommonVFPActions = [&](MVT VT) {
968 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
969 // sizes are within one power-of-two of each other. Therefore conversions
970 // between vXf16 and vXf64 must be lowered as sequences which convert via
971 // vXf32.
974 // Custom-lower insert/extract operations to simplify patterns.
976 Custom);
977 // Expand various condition codes (explained above).
978 setCondCodeAction(VFPCCToExpand, VT, Expand);
979
982
986 VT, Custom);
987
988 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
989
990 // Expand FP operations that need libcalls.
1002
1004
1006
1008 VT, Custom);
1009
1011 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1012 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1013 VT, Custom);
1014
1017
1020 VT, Custom);
1021
1024
1026
1027 setOperationAction(FloatingPointVPOps, VT, Custom);
1028
1030 Custom);
1033 VT, Legal);
1038 VT, Custom);
1039 };
1040
1041 // Sets common extload/truncstore actions on RVV floating-point vector
1042 // types.
1043 const auto SetCommonVFPExtLoadTruncStoreActions =
1044 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1045 for (auto SmallVT : SmallerVTs) {
1046 setTruncStoreAction(VT, SmallVT, Expand);
1047 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1048 }
1049 };
1050
1051 if (Subtarget.hasVInstructionsF16()) {
1052 for (MVT VT : F16VecVTs) {
1053 if (!isTypeLegal(VT))
1054 continue;
1055 SetCommonVFPActions(VT);
1056 }
1057 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1058 for (MVT VT : F16VecVTs) {
1059 if (!isTypeLegal(VT))
1060 continue;
1063 Custom);
1064 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1065 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1066 Custom);
1069 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1070 VT, Custom);
1073 VT, Custom);
1074 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1076 // load/store
1078
1079 // Custom split nxv32f16 since nxv32f32 if not legal.
1080 if (VT == MVT::nxv32f16) {
1081 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1082 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1083 continue;
1084 }
1085 // Add more promote ops.
1086 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1087 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1088 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1089 }
1090 }
1091
1092 // TODO: Could we merge some code with zvfhmin?
1093 if (Subtarget.hasVInstructionsBF16()) {
1094 for (MVT VT : BF16VecVTs) {
1095 if (!isTypeLegal(VT))
1096 continue;
1098 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1100 Custom);
1103 VT, Custom);
1105 if (Subtarget.hasStdExtZfbfmin()) {
1106 if (Subtarget.hasVInstructionsF16())
1108 else if (Subtarget.hasVInstructionsF16Minimal())
1110 }
1111 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1112 Custom);
1114 // TODO: Promote to fp32.
1115 }
1116 }
1117
1118 if (Subtarget.hasVInstructionsF32()) {
1119 for (MVT VT : F32VecVTs) {
1120 if (!isTypeLegal(VT))
1121 continue;
1122 SetCommonVFPActions(VT);
1123 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1124 }
1125 }
1126
1127 if (Subtarget.hasVInstructionsF64()) {
1128 for (MVT VT : F64VecVTs) {
1129 if (!isTypeLegal(VT))
1130 continue;
1131 SetCommonVFPActions(VT);
1132 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1133 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1134 }
1135 }
1136
1137 if (Subtarget.useRVVForFixedLengthVectors()) {
1139 if (!useRVVForFixedLengthVectorVT(VT))
1140 continue;
1141
1142 // By default everything must be expanded.
1143 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1146 setTruncStoreAction(VT, OtherVT, Expand);
1148 OtherVT, Expand);
1149 }
1150
1151 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1152 // expansion to a build_vector of 0s.
1154
1155 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1157 Custom);
1158
1160 Custom);
1161
1163 VT, Custom);
1164
1166
1168
1170
1172
1174
1176
1179 Custom);
1180
1182 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1183 Custom);
1184
1186 {
1195 },
1196 VT, Custom);
1198 Custom);
1199
1201
1202 // Operations below are different for between masks and other vectors.
1203 if (VT.getVectorElementType() == MVT::i1) {
1204 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1205 ISD::OR, ISD::XOR},
1206 VT, Custom);
1207
1208 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1209 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1210 VT, Custom);
1211
1212 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1213 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1214 continue;
1215 }
1216
1217 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1218 // it before type legalization for i64 vectors on RV32. It will then be
1219 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1220 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1221 // improvements first.
1222 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1225 }
1226
1229
1230 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1231 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1232 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1233 ISD::VP_SCATTER},
1234 VT, Custom);
1235
1239 VT, Custom);
1240
1243
1245
1246 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1247 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1249
1253 VT, Custom);
1254
1257
1260
1261 // Custom-lower reduction operations to set up the corresponding custom
1262 // nodes' operands.
1266 VT, Custom);
1267
1268 setOperationAction(IntegerVPOps, VT, Custom);
1269
1270 if (Subtarget.hasStdExtZvkb())
1272
1273 if (Subtarget.hasStdExtZvbb()) {
1276 VT, Custom);
1277 } else {
1278 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1279 // range of f32.
1280 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1281 if (isTypeLegal(FloatVT))
1284 Custom);
1285 }
1286 }
1287
1289 // There are no extending loads or truncating stores.
1290 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1291 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1292 setTruncStoreAction(VT, InnerVT, Expand);
1293 }
1294
1295 if (!useRVVForFixedLengthVectorVT(VT))
1296 continue;
1297
1298 // By default everything must be expanded.
1299 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1301
1302 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1303 // expansion to a build_vector of 0s.
1305
1306 if (VT.getVectorElementType() == MVT::f16 &&
1307 !Subtarget.hasVInstructionsF16()) {
1310 Custom);
1311 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1313 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1314 Custom);
1316 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1317 VT, Custom);
1320 VT, Custom);
1323 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1324 // Don't promote f16 vector operations to f32 if f32 vector type is
1325 // not legal.
1326 // TODO: could split the f16 vector into two vectors and do promotion.
1327 if (!isTypeLegal(F32VecVT))
1328 continue;
1329 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1330 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1331 continue;
1332 }
1333
1334 if (VT.getVectorElementType() == MVT::bf16) {
1336 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1338 Custom);
1341 VT, Custom);
1343 if (Subtarget.hasStdExtZfbfmin()) {
1344 if (Subtarget.hasVInstructionsF16())
1346 else if (Subtarget.hasVInstructionsF16Minimal())
1348 }
1350 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1351 Custom);
1352 // TODO: Promote to fp32.
1353 continue;
1354 }
1355
1356 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1358 Custom);
1359
1363 VT, Custom);
1364
1367 VT, Custom);
1368
1369 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1370 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1371 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1372 ISD::VP_SCATTER},
1373 VT, Custom);
1374
1379 VT, Custom);
1380
1382
1385 VT, Custom);
1386
1387 setCondCodeAction(VFPCCToExpand, VT, Expand);
1388
1392
1394
1395 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1396
1397 setOperationAction(FloatingPointVPOps, VT, Custom);
1398
1400 Custom);
1407 VT, Custom);
1408 }
1409
1410 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1411 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1412 Custom);
1413 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1415 if (Subtarget.hasStdExtFOrZfinx())
1417 if (Subtarget.hasStdExtDOrZdinx())
1419 }
1420 }
1421
1422 if (Subtarget.hasStdExtA()) {
1424 if (RV64LegalI32 && Subtarget.is64Bit())
1426 }
1427
1428 if (Subtarget.hasForcedAtomics()) {
1429 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1435 XLenVT, LibCall);
1436 }
1437
1438 if (Subtarget.hasVendorXTHeadMemIdx()) {
1439 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1440 setIndexedLoadAction(im, MVT::i8, Legal);
1441 setIndexedStoreAction(im, MVT::i8, Legal);
1442 setIndexedLoadAction(im, MVT::i16, Legal);
1443 setIndexedStoreAction(im, MVT::i16, Legal);
1444 setIndexedLoadAction(im, MVT::i32, Legal);
1445 setIndexedStoreAction(im, MVT::i32, Legal);
1446
1447 if (Subtarget.is64Bit()) {
1448 setIndexedLoadAction(im, MVT::i64, Legal);
1449 setIndexedStoreAction(im, MVT::i64, Legal);
1450 }
1451 }
1452 }
1453
1454 if (Subtarget.hasVendorXCVmem()) {
1458
1462 }
1463
1464 // Function alignments.
1465 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1466 setMinFunctionAlignment(FunctionAlignment);
1467 // Set preferred alignments.
1470
1474 if (Subtarget.is64Bit())
1476
1477 if (Subtarget.hasStdExtFOrZfinx())
1479
1480 if (Subtarget.hasStdExtZbb())
1482
1483 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1484 Subtarget.hasStdExtV())
1486
1487 if (Subtarget.hasStdExtZbkb())
1489 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1491 if (Subtarget.hasStdExtFOrZfinx())
1494 if (Subtarget.hasVInstructions())
1496 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1499 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1502 if (Subtarget.hasVendorXTHeadMemPair())
1504 if (Subtarget.useRVVForFixedLengthVectors())
1506
1507 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1508 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1509
1510 // Disable strict node mutation.
1511 IsStrictFPEnabled = true;
1512}
1513
1515 LLVMContext &Context,
1516 EVT VT) const {
1517 if (!VT.isVector())
1518 return getPointerTy(DL);
1519 if (Subtarget.hasVInstructions() &&
1520 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1521 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1523}
1524
1525MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1526 return Subtarget.getXLenVT();
1527}
1528
1529// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1530bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1531 unsigned VF,
1532 bool IsScalable) const {
1533 if (!Subtarget.hasVInstructions())
1534 return true;
1535
1536 if (!IsScalable)
1537 return true;
1538
1539 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1540 return true;
1541
1542 // Don't allow VF=1 if those types are't legal.
1543 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1544 return true;
1545
1546 // VLEN=32 support is incomplete.
1547 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1548 return true;
1549
1550 // The maximum VF is for the smallest element width with LMUL=8.
1551 // VF must be a power of 2.
1552 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1553 return VF > MaxVF || !isPowerOf2_32(VF);
1554}
1555
1557 return !Subtarget.hasVInstructions() ||
1558 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1559}
1560
1562 const CallInst &I,
1563 MachineFunction &MF,
1564 unsigned Intrinsic) const {
1565 auto &DL = I.getModule()->getDataLayout();
1566
1567 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1568 bool IsUnitStrided, bool UsePtrVal = false) {
1570 // We can't use ptrVal if the intrinsic can access memory before the
1571 // pointer. This means we can't use it for strided or indexed intrinsics.
1572 if (UsePtrVal)
1573 Info.ptrVal = I.getArgOperand(PtrOp);
1574 else
1575 Info.fallbackAddressSpace =
1576 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1577 Type *MemTy;
1578 if (IsStore) {
1579 // Store value is the first operand.
1580 MemTy = I.getArgOperand(0)->getType();
1581 } else {
1582 // Use return type. If it's segment load, return type is a struct.
1583 MemTy = I.getType();
1584 if (MemTy->isStructTy())
1585 MemTy = MemTy->getStructElementType(0);
1586 }
1587 if (!IsUnitStrided)
1588 MemTy = MemTy->getScalarType();
1589
1590 Info.memVT = getValueType(DL, MemTy);
1591 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1593 Info.flags |=
1595 return true;
1596 };
1597
1598 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1600
1602 switch (Intrinsic) {
1603 default:
1604 return false;
1605 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1606 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1607 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1608 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1609 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1610 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1611 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1612 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1613 case Intrinsic::riscv_masked_cmpxchg_i32:
1615 Info.memVT = MVT::i32;
1616 Info.ptrVal = I.getArgOperand(0);
1617 Info.offset = 0;
1618 Info.align = Align(4);
1621 return true;
1622 case Intrinsic::riscv_masked_strided_load:
1623 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1624 /*IsUnitStrided*/ false);
1625 case Intrinsic::riscv_masked_strided_store:
1626 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1627 /*IsUnitStrided*/ false);
1628 case Intrinsic::riscv_seg2_load:
1629 case Intrinsic::riscv_seg3_load:
1630 case Intrinsic::riscv_seg4_load:
1631 case Intrinsic::riscv_seg5_load:
1632 case Intrinsic::riscv_seg6_load:
1633 case Intrinsic::riscv_seg7_load:
1634 case Intrinsic::riscv_seg8_load:
1635 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1636 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1637 case Intrinsic::riscv_seg2_store:
1638 case Intrinsic::riscv_seg3_store:
1639 case Intrinsic::riscv_seg4_store:
1640 case Intrinsic::riscv_seg5_store:
1641 case Intrinsic::riscv_seg6_store:
1642 case Intrinsic::riscv_seg7_store:
1643 case Intrinsic::riscv_seg8_store:
1644 // Operands are (vec, ..., vec, ptr, vl)
1645 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1646 /*IsStore*/ true,
1647 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1648 case Intrinsic::riscv_vle:
1649 case Intrinsic::riscv_vle_mask:
1650 case Intrinsic::riscv_vleff:
1651 case Intrinsic::riscv_vleff_mask:
1652 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1653 /*IsStore*/ false,
1654 /*IsUnitStrided*/ true,
1655 /*UsePtrVal*/ true);
1656 case Intrinsic::riscv_vse:
1657 case Intrinsic::riscv_vse_mask:
1658 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1659 /*IsStore*/ true,
1660 /*IsUnitStrided*/ true,
1661 /*UsePtrVal*/ true);
1662 case Intrinsic::riscv_vlse:
1663 case Intrinsic::riscv_vlse_mask:
1664 case Intrinsic::riscv_vloxei:
1665 case Intrinsic::riscv_vloxei_mask:
1666 case Intrinsic::riscv_vluxei:
1667 case Intrinsic::riscv_vluxei_mask:
1668 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1669 /*IsStore*/ false,
1670 /*IsUnitStrided*/ false);
1671 case Intrinsic::riscv_vsse:
1672 case Intrinsic::riscv_vsse_mask:
1673 case Intrinsic::riscv_vsoxei:
1674 case Intrinsic::riscv_vsoxei_mask:
1675 case Intrinsic::riscv_vsuxei:
1676 case Intrinsic::riscv_vsuxei_mask:
1677 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1678 /*IsStore*/ true,
1679 /*IsUnitStrided*/ false);
1680 case Intrinsic::riscv_vlseg2:
1681 case Intrinsic::riscv_vlseg3:
1682 case Intrinsic::riscv_vlseg4:
1683 case Intrinsic::riscv_vlseg5:
1684 case Intrinsic::riscv_vlseg6:
1685 case Intrinsic::riscv_vlseg7:
1686 case Intrinsic::riscv_vlseg8:
1687 case Intrinsic::riscv_vlseg2ff:
1688 case Intrinsic::riscv_vlseg3ff:
1689 case Intrinsic::riscv_vlseg4ff:
1690 case Intrinsic::riscv_vlseg5ff:
1691 case Intrinsic::riscv_vlseg6ff:
1692 case Intrinsic::riscv_vlseg7ff:
1693 case Intrinsic::riscv_vlseg8ff:
1694 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1695 /*IsStore*/ false,
1696 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1697 case Intrinsic::riscv_vlseg2_mask:
1698 case Intrinsic::riscv_vlseg3_mask:
1699 case Intrinsic::riscv_vlseg4_mask:
1700 case Intrinsic::riscv_vlseg5_mask:
1701 case Intrinsic::riscv_vlseg6_mask:
1702 case Intrinsic::riscv_vlseg7_mask:
1703 case Intrinsic::riscv_vlseg8_mask:
1704 case Intrinsic::riscv_vlseg2ff_mask:
1705 case Intrinsic::riscv_vlseg3ff_mask:
1706 case Intrinsic::riscv_vlseg4ff_mask:
1707 case Intrinsic::riscv_vlseg5ff_mask:
1708 case Intrinsic::riscv_vlseg6ff_mask:
1709 case Intrinsic::riscv_vlseg7ff_mask:
1710 case Intrinsic::riscv_vlseg8ff_mask:
1711 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1712 /*IsStore*/ false,
1713 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1714 case Intrinsic::riscv_vlsseg2:
1715 case Intrinsic::riscv_vlsseg3:
1716 case Intrinsic::riscv_vlsseg4:
1717 case Intrinsic::riscv_vlsseg5:
1718 case Intrinsic::riscv_vlsseg6:
1719 case Intrinsic::riscv_vlsseg7:
1720 case Intrinsic::riscv_vlsseg8:
1721 case Intrinsic::riscv_vloxseg2:
1722 case Intrinsic::riscv_vloxseg3:
1723 case Intrinsic::riscv_vloxseg4:
1724 case Intrinsic::riscv_vloxseg5:
1725 case Intrinsic::riscv_vloxseg6:
1726 case Intrinsic::riscv_vloxseg7:
1727 case Intrinsic::riscv_vloxseg8:
1728 case Intrinsic::riscv_vluxseg2:
1729 case Intrinsic::riscv_vluxseg3:
1730 case Intrinsic::riscv_vluxseg4:
1731 case Intrinsic::riscv_vluxseg5:
1732 case Intrinsic::riscv_vluxseg6:
1733 case Intrinsic::riscv_vluxseg7:
1734 case Intrinsic::riscv_vluxseg8:
1735 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1736 /*IsStore*/ false,
1737 /*IsUnitStrided*/ false);
1738 case Intrinsic::riscv_vlsseg2_mask:
1739 case Intrinsic::riscv_vlsseg3_mask:
1740 case Intrinsic::riscv_vlsseg4_mask:
1741 case Intrinsic::riscv_vlsseg5_mask:
1742 case Intrinsic::riscv_vlsseg6_mask:
1743 case Intrinsic::riscv_vlsseg7_mask:
1744 case Intrinsic::riscv_vlsseg8_mask:
1745 case Intrinsic::riscv_vloxseg2_mask:
1746 case Intrinsic::riscv_vloxseg3_mask:
1747 case Intrinsic::riscv_vloxseg4_mask:
1748 case Intrinsic::riscv_vloxseg5_mask:
1749 case Intrinsic::riscv_vloxseg6_mask:
1750 case Intrinsic::riscv_vloxseg7_mask:
1751 case Intrinsic::riscv_vloxseg8_mask:
1752 case Intrinsic::riscv_vluxseg2_mask:
1753 case Intrinsic::riscv_vluxseg3_mask:
1754 case Intrinsic::riscv_vluxseg4_mask:
1755 case Intrinsic::riscv_vluxseg5_mask:
1756 case Intrinsic::riscv_vluxseg6_mask:
1757 case Intrinsic::riscv_vluxseg7_mask:
1758 case Intrinsic::riscv_vluxseg8_mask:
1759 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1760 /*IsStore*/ false,
1761 /*IsUnitStrided*/ false);
1762 case Intrinsic::riscv_vsseg2:
1763 case Intrinsic::riscv_vsseg3:
1764 case Intrinsic::riscv_vsseg4:
1765 case Intrinsic::riscv_vsseg5:
1766 case Intrinsic::riscv_vsseg6:
1767 case Intrinsic::riscv_vsseg7:
1768 case Intrinsic::riscv_vsseg8:
1769 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1770 /*IsStore*/ true,
1771 /*IsUnitStrided*/ false);
1772 case Intrinsic::riscv_vsseg2_mask:
1773 case Intrinsic::riscv_vsseg3_mask:
1774 case Intrinsic::riscv_vsseg4_mask:
1775 case Intrinsic::riscv_vsseg5_mask:
1776 case Intrinsic::riscv_vsseg6_mask:
1777 case Intrinsic::riscv_vsseg7_mask:
1778 case Intrinsic::riscv_vsseg8_mask:
1779 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1780 /*IsStore*/ true,
1781 /*IsUnitStrided*/ false);
1782 case Intrinsic::riscv_vssseg2:
1783 case Intrinsic::riscv_vssseg3:
1784 case Intrinsic::riscv_vssseg4:
1785 case Intrinsic::riscv_vssseg5:
1786 case Intrinsic::riscv_vssseg6:
1787 case Intrinsic::riscv_vssseg7:
1788 case Intrinsic::riscv_vssseg8:
1789 case Intrinsic::riscv_vsoxseg2:
1790 case Intrinsic::riscv_vsoxseg3:
1791 case Intrinsic::riscv_vsoxseg4:
1792 case Intrinsic::riscv_vsoxseg5:
1793 case Intrinsic::riscv_vsoxseg6:
1794 case Intrinsic::riscv_vsoxseg7:
1795 case Intrinsic::riscv_vsoxseg8:
1796 case Intrinsic::riscv_vsuxseg2:
1797 case Intrinsic::riscv_vsuxseg3:
1798 case Intrinsic::riscv_vsuxseg4:
1799 case Intrinsic::riscv_vsuxseg5:
1800 case Intrinsic::riscv_vsuxseg6:
1801 case Intrinsic::riscv_vsuxseg7:
1802 case Intrinsic::riscv_vsuxseg8:
1803 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1804 /*IsStore*/ true,
1805 /*IsUnitStrided*/ false);
1806 case Intrinsic::riscv_vssseg2_mask:
1807 case Intrinsic::riscv_vssseg3_mask:
1808 case Intrinsic::riscv_vssseg4_mask:
1809 case Intrinsic::riscv_vssseg5_mask:
1810 case Intrinsic::riscv_vssseg6_mask:
1811 case Intrinsic::riscv_vssseg7_mask:
1812 case Intrinsic::riscv_vssseg8_mask:
1813 case Intrinsic::riscv_vsoxseg2_mask:
1814 case Intrinsic::riscv_vsoxseg3_mask:
1815 case Intrinsic::riscv_vsoxseg4_mask:
1816 case Intrinsic::riscv_vsoxseg5_mask:
1817 case Intrinsic::riscv_vsoxseg6_mask:
1818 case Intrinsic::riscv_vsoxseg7_mask:
1819 case Intrinsic::riscv_vsoxseg8_mask:
1820 case Intrinsic::riscv_vsuxseg2_mask:
1821 case Intrinsic::riscv_vsuxseg3_mask:
1822 case Intrinsic::riscv_vsuxseg4_mask:
1823 case Intrinsic::riscv_vsuxseg5_mask:
1824 case Intrinsic::riscv_vsuxseg6_mask:
1825 case Intrinsic::riscv_vsuxseg7_mask:
1826 case Intrinsic::riscv_vsuxseg8_mask:
1827 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1828 /*IsStore*/ true,
1829 /*IsUnitStrided*/ false);
1830 }
1831}
1832
1834 const AddrMode &AM, Type *Ty,
1835 unsigned AS,
1836 Instruction *I) const {
1837 // No global is ever allowed as a base.
1838 if (AM.BaseGV)
1839 return false;
1840
1841 // RVV instructions only support register addressing.
1842 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1843 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1844
1845 // Require a 12-bit signed offset.
1846 if (!isInt<12>(AM.BaseOffs))
1847 return false;
1848
1849 switch (AM.Scale) {
1850 case 0: // "r+i" or just "i", depending on HasBaseReg.
1851 break;
1852 case 1:
1853 if (!AM.HasBaseReg) // allow "r+i".
1854 break;
1855 return false; // disallow "r+r" or "r+r+i".
1856 default:
1857 return false;
1858 }
1859
1860 return true;
1861}
1862
1864 return isInt<12>(Imm);
1865}
1866
1868 return isInt<12>(Imm);
1869}
1870
1871// On RV32, 64-bit integers are split into their high and low parts and held
1872// in two different registers, so the trunc is free since the low register can
1873// just be used.
1874// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1875// isTruncateFree?
1877 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1878 return false;
1879 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1880 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1881 return (SrcBits == 64 && DestBits == 32);
1882}
1883
1885 // We consider i64->i32 free on RV64 since we have good selection of W
1886 // instructions that make promoting operations back to i64 free in many cases.
1887 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1888 !DstVT.isInteger())
1889 return false;
1890 unsigned SrcBits = SrcVT.getSizeInBits();
1891 unsigned DestBits = DstVT.getSizeInBits();
1892 return (SrcBits == 64 && DestBits == 32);
1893}
1894
1896 // Zexts are free if they can be combined with a load.
1897 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1898 // poorly with type legalization of compares preferring sext.
1899 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1900 EVT MemVT = LD->getMemoryVT();
1901 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1902 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1903 LD->getExtensionType() == ISD::ZEXTLOAD))
1904 return true;
1905 }
1906
1907 return TargetLowering::isZExtFree(Val, VT2);
1908}
1909
1911 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1912}
1913
1915 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1916}
1917
1919 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1920}
1921
1923 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1924 Subtarget.hasVendorXCVbitmanip();
1925}
1926
1928 const Instruction &AndI) const {
1929 // We expect to be able to match a bit extraction instruction if the Zbs
1930 // extension is supported and the mask is a power of two. However, we
1931 // conservatively return false if the mask would fit in an ANDI instruction,
1932 // on the basis that it's possible the sinking+duplication of the AND in
1933 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1934 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1935 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1936 return false;
1937 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1938 if (!Mask)
1939 return false;
1940 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1941}
1942
1944 EVT VT = Y.getValueType();
1945
1946 // FIXME: Support vectors once we have tests.
1947 if (VT.isVector())
1948 return false;
1949
1950 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1951 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
1952}
1953
1955 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1956 if (Subtarget.hasStdExtZbs())
1957 return X.getValueType().isScalarInteger();
1958 auto *C = dyn_cast<ConstantSDNode>(Y);
1959 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1960 if (Subtarget.hasVendorXTHeadBs())
1961 return C != nullptr;
1962 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1963 return C && C->getAPIntValue().ule(10);
1964}
1965
1967 EVT VT) const {
1968 // Only enable for rvv.
1969 if (!VT.isVector() || !Subtarget.hasVInstructions())
1970 return false;
1971
1972 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1973 return false;
1974
1975 return true;
1976}
1977
1979 Type *Ty) const {
1980 assert(Ty->isIntegerTy());
1981
1982 unsigned BitSize = Ty->getIntegerBitWidth();
1983 if (BitSize > Subtarget.getXLen())
1984 return false;
1985
1986 // Fast path, assume 32-bit immediates are cheap.
1987 int64_t Val = Imm.getSExtValue();
1988 if (isInt<32>(Val))
1989 return true;
1990
1991 // A constant pool entry may be more aligned thant he load we're trying to
1992 // replace. If we don't support unaligned scalar mem, prefer the constant
1993 // pool.
1994 // TODO: Can the caller pass down the alignment?
1995 if (!Subtarget.enableUnalignedScalarMem())
1996 return true;
1997
1998 // Prefer to keep the load if it would require many instructions.
1999 // This uses the same threshold we use for constant pools but doesn't
2000 // check useConstantPoolForLargeInts.
2001 // TODO: Should we keep the load only when we're definitely going to emit a
2002 // constant pool?
2003
2005 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2006}
2007
2011 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2012 SelectionDAG &DAG) const {
2013 // One interesting pattern that we'd want to form is 'bit extract':
2014 // ((1 >> Y) & 1) ==/!= 0
2015 // But we also need to be careful not to try to reverse that fold.
2016
2017 // Is this '((1 >> Y) & 1)'?
2018 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2019 return false; // Keep the 'bit extract' pattern.
2020
2021 // Will this be '((1 >> Y) & 1)' after the transform?
2022 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2023 return true; // Do form the 'bit extract' pattern.
2024
2025 // If 'X' is a constant, and we transform, then we will immediately
2026 // try to undo the fold, thus causing endless combine loop.
2027 // So only do the transform if X is not a constant. This matches the default
2028 // implementation of this function.
2029 return !XC;
2030}
2031
2032bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
2033 switch (Opcode) {
2034 case Instruction::Add:
2035 case Instruction::Sub:
2036 case Instruction::Mul:
2037 case Instruction::And:
2038 case Instruction::Or:
2039 case Instruction::Xor:
2040 case Instruction::FAdd:
2041 case Instruction::FSub:
2042 case Instruction::FMul:
2043 case Instruction::FDiv:
2044 case Instruction::ICmp:
2045 case Instruction::FCmp:
2046 return true;
2047 case Instruction::Shl:
2048 case Instruction::LShr:
2049 case Instruction::AShr:
2050 case Instruction::UDiv:
2051 case Instruction::SDiv:
2052 case Instruction::URem:
2053 case Instruction::SRem:
2054 case Instruction::Select:
2055 return Operand == 1;
2056 default:
2057 return false;
2058 }
2059}
2060
2061
2063 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2064 return false;
2065
2066 if (canSplatOperand(I->getOpcode(), Operand))
2067 return true;
2068
2069 auto *II = dyn_cast<IntrinsicInst>(I);
2070 if (!II)
2071 return false;
2072
2073 switch (II->getIntrinsicID()) {
2074 case Intrinsic::fma:
2075 case Intrinsic::vp_fma:
2076 return Operand == 0 || Operand == 1;
2077 case Intrinsic::vp_shl:
2078 case Intrinsic::vp_lshr:
2079 case Intrinsic::vp_ashr:
2080 case Intrinsic::vp_udiv:
2081 case Intrinsic::vp_sdiv:
2082 case Intrinsic::vp_urem:
2083 case Intrinsic::vp_srem:
2084 case Intrinsic::ssub_sat:
2085 case Intrinsic::vp_ssub_sat:
2086 case Intrinsic::usub_sat:
2087 case Intrinsic::vp_usub_sat:
2088 return Operand == 1;
2089 // These intrinsics are commutative.
2090 case Intrinsic::vp_add:
2091 case Intrinsic::vp_mul:
2092 case Intrinsic::vp_and:
2093 case Intrinsic::vp_or:
2094 case Intrinsic::vp_xor:
2095 case Intrinsic::vp_fadd:
2096 case Intrinsic::vp_fmul:
2097 case Intrinsic::vp_icmp:
2098 case Intrinsic::vp_fcmp:
2099 case Intrinsic::smin:
2100 case Intrinsic::vp_smin:
2101 case Intrinsic::umin:
2102 case Intrinsic::vp_umin:
2103 case Intrinsic::smax:
2104 case Intrinsic::vp_smax:
2105 case Intrinsic::umax:
2106 case Intrinsic::vp_umax:
2107 case Intrinsic::sadd_sat:
2108 case Intrinsic::vp_sadd_sat:
2109 case Intrinsic::uadd_sat:
2110 case Intrinsic::vp_uadd_sat:
2111 // These intrinsics have 'vr' versions.
2112 case Intrinsic::vp_sub:
2113 case Intrinsic::vp_fsub:
2114 case Intrinsic::vp_fdiv:
2115 return Operand == 0 || Operand == 1;
2116 default:
2117 return false;
2118 }
2119}
2120
2121/// Check if sinking \p I's operands to I's basic block is profitable, because
2122/// the operands can be folded into a target instruction, e.g.
2123/// splats of scalars can fold into vector instructions.
2125 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2126 using namespace llvm::PatternMatch;
2127
2128 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2129 return false;
2130
2131 // Don't sink splat operands if the target prefers it. Some targets requires
2132 // S2V transfer buffers and we can run out of them copying the same value
2133 // repeatedly.
2134 // FIXME: It could still be worth doing if it would improve vector register
2135 // pressure and prevent a vector spill.
2136 if (!Subtarget.sinkSplatOperands())
2137 return false;
2138
2139 for (auto OpIdx : enumerate(I->operands())) {
2140 if (!canSplatOperand(I, OpIdx.index()))
2141 continue;
2142
2143 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2144 // Make sure we are not already sinking this operand
2145 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2146 continue;
2147
2148 // We are looking for a splat that can be sunk.
2150 m_Undef(), m_ZeroMask())))
2151 continue;
2152
2153 // Don't sink i1 splats.
2154 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2155 continue;
2156
2157 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2158 // and vector registers
2159 for (Use &U : Op->uses()) {
2160 Instruction *Insn = cast<Instruction>(U.getUser());
2161 if (!canSplatOperand(Insn, U.getOperandNo()))
2162 return false;
2163 }
2164
2165 Ops.push_back(&Op->getOperandUse(0));
2166 Ops.push_back(&OpIdx.value());
2167 }
2168 return true;
2169}
2170
2172 unsigned Opc = VecOp.getOpcode();
2173
2174 // Assume target opcodes can't be scalarized.
2175 // TODO - do we have any exceptions?
2176 if (Opc >= ISD::BUILTIN_OP_END)
2177 return false;
2178
2179 // If the vector op is not supported, try to convert to scalar.
2180 EVT VecVT = VecOp.getValueType();
2181 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2182 return true;
2183
2184 // If the vector op is supported, but the scalar op is not, the transform may
2185 // not be worthwhile.
2186 // Permit a vector binary operation can be converted to scalar binary
2187 // operation which is custom lowered with illegal type.
2188 EVT ScalarVT = VecVT.getScalarType();
2189 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2190 isOperationCustom(Opc, ScalarVT);
2191}
2192
2194 const GlobalAddressSDNode *GA) const {
2195 // In order to maximise the opportunity for common subexpression elimination,
2196 // keep a separate ADD node for the global address offset instead of folding
2197 // it in the global address node. Later peephole optimisations may choose to
2198 // fold it back in when profitable.
2199 return false;
2200}
2201
2202// Return one of the followings:
2203// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2204// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2205// positive counterpart, which will be materialized from the first returned
2206// element. The second returned element indicated that there should be a FNEG
2207// followed.
2208// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2209std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2210 EVT VT) const {
2211 if (!Subtarget.hasStdExtZfa())
2212 return std::make_pair(-1, false);
2213
2214 bool IsSupportedVT = false;
2215 if (VT == MVT::f16) {
2216 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2217 } else if (VT == MVT::f32) {
2218 IsSupportedVT = true;
2219 } else if (VT == MVT::f64) {
2220 assert(Subtarget.hasStdExtD() && "Expect D extension");
2221 IsSupportedVT = true;
2222 }
2223
2224 if (!IsSupportedVT)
2225 return std::make_pair(-1, false);
2226
2228 if (Index < 0 && Imm.isNegative())
2229 // Try the combination of its positive counterpart + FNEG.
2230 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2231 else
2232 return std::make_pair(Index, false);
2233}
2234
2236 bool ForCodeSize) const {
2237 bool IsLegalVT = false;
2238 if (VT == MVT::f16)
2239 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2240 else if (VT == MVT::f32)
2241 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2242 else if (VT == MVT::f64)
2243 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2244 else if (VT == MVT::bf16)
2245 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2246
2247 if (!IsLegalVT)
2248 return false;
2249
2250 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2251 return true;
2252
2253 // Cannot create a 64 bit floating-point immediate value for rv32.
2254 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2255 // td can handle +0.0 or -0.0 already.
2256 // -0.0 can be created by fmv + fneg.
2257 return Imm.isZero();
2258 }
2259
2260 // Special case: fmv + fneg
2261 if (Imm.isNegZero())
2262 return true;
2263
2264 // Building an integer and then converting requires a fmv at the end of
2265 // the integer sequence.
2266 const int Cost =
2267 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2268 Subtarget);
2269 return Cost <= FPImmCost;
2270}
2271
2272// TODO: This is very conservative.
2274 unsigned Index) const {
2276 return false;
2277
2278 // Only support extracting a fixed from a fixed vector for now.
2279 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2280 return false;
2281
2282 EVT EltVT = ResVT.getVectorElementType();
2283 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2284
2285 // The smallest type we can slide is i8.
2286 // TODO: We can extract index 0 from a mask vector without a slide.
2287 if (EltVT == MVT::i1)
2288 return false;
2289
2290 unsigned ResElts = ResVT.getVectorNumElements();
2291 unsigned SrcElts = SrcVT.getVectorNumElements();
2292
2293 unsigned MinVLen = Subtarget.getRealMinVLen();
2294 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2295
2296 // If we're extracting only data from the first VLEN bits of the source
2297 // then we can always do this with an m1 vslidedown.vx. Restricting the
2298 // Index ensures we can use a vslidedown.vi.
2299 // TODO: We can generalize this when the exact VLEN is known.
2300 if (Index + ResElts <= MinVLMAX && Index < 31)
2301 return true;
2302
2303 // Convervatively only handle extracting half of a vector.
2304 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2305 // a cheap extract. However, this case is important in practice for
2306 // shuffled extracts of longer vectors. How resolve?
2307 if ((ResElts * 2) != SrcElts)
2308 return false;
2309
2310 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2311 // cheap.
2312 if (Index >= 32)
2313 return false;
2314
2315 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2316 // the upper half of a vector until we have more test coverage.
2317 return Index == 0 || Index == ResElts;
2318}
2319
2322 EVT VT) const {
2323 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2324 // We might still end up using a GPR but that will be decided based on ABI.
2325 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2326 !Subtarget.hasStdExtZfhminOrZhinxmin())
2327 return MVT::f32;
2328
2330
2331 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2332 return MVT::i64;
2333
2334 return PartVT;
2335}
2336
2339 EVT VT) const {
2340 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2341 // We might still end up using a GPR but that will be decided based on ABI.
2342 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2343 !Subtarget.hasStdExtZfhminOrZhinxmin())
2344 return 1;
2345
2347}
2348
2350 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2351 unsigned &NumIntermediates, MVT &RegisterVT) const {
2353 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2354
2355 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2356 IntermediateVT = MVT::i64;
2357
2358 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2359 RegisterVT = MVT::i64;
2360
2361 return NumRegs;
2362}
2363
2364// Changes the condition code and swaps operands if necessary, so the SetCC
2365// operation matches one of the comparisons supported directly by branches
2366// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2367// with 1/-1.
2368static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2369 ISD::CondCode &CC, SelectionDAG &DAG) {
2370 // If this is a single bit test that can't be handled by ANDI, shift the
2371 // bit to be tested to the MSB and perform a signed compare with 0.
2372 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2373 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2374 isa<ConstantSDNode>(LHS.getOperand(1))) {
2375 uint64_t Mask = LHS.getConstantOperandVal(1);
2376 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2377 unsigned ShAmt = 0;
2378 if (isPowerOf2_64(Mask)) {
2380 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2381 } else {
2382 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2383 }
2384
2385 LHS = LHS.getOperand(0);
2386 if (ShAmt != 0)
2387 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2388 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2389 return;
2390 }
2391 }
2392
2393 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2394 int64_t C = RHSC->getSExtValue();
2395 switch (CC) {
2396 default: break;
2397 case ISD::SETGT:
2398 // Convert X > -1 to X >= 0.
2399 if (C == -1) {
2400 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2401 CC = ISD::SETGE;
2402 return;
2403 }
2404 break;
2405 case ISD::SETLT:
2406 // Convert X < 1 to 0 >= X.
2407 if (C == 1) {
2408 RHS = LHS;
2409 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2410 CC = ISD::SETGE;
2411 return;
2412 }
2413 break;
2414 }
2415 }
2416
2417 switch (CC) {
2418 default:
2419 break;
2420 case ISD::SETGT:
2421 case ISD::SETLE:
2422 case ISD::SETUGT:
2423 case ISD::SETULE:
2425 std::swap(LHS, RHS);
2426 break;
2427 }
2428}
2429
2431 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2432 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2433 if (VT.getVectorElementType() == MVT::i1)
2434 KnownSize *= 8;
2435
2436 switch (KnownSize) {
2437 default:
2438 llvm_unreachable("Invalid LMUL.");
2439 case 8:
2441 case 16:
2443 case 32:
2445 case 64:
2447 case 128:
2449 case 256:
2451 case 512:
2453 }
2454}
2455
2457 switch (LMul) {
2458 default:
2459 llvm_unreachable("Invalid LMUL.");
2464 return RISCV::VRRegClassID;
2466 return RISCV::VRM2RegClassID;
2468 return RISCV::VRM4RegClassID;
2470 return RISCV::VRM8RegClassID;
2471 }
2472}
2473
2475 RISCVII::VLMUL LMUL = getLMUL(VT);
2476 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2477 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2478 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2479 LMUL == RISCVII::VLMUL::LMUL_1) {
2480 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2481 "Unexpected subreg numbering");
2482 return RISCV::sub_vrm1_0 + Index;
2483 }
2484 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2485 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2486 "Unexpected subreg numbering");
2487 return RISCV::sub_vrm2_0 + Index;
2488 }
2489 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2490 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2491 "Unexpected subreg numbering");
2492 return RISCV::sub_vrm4_0 + Index;
2493 }
2494 llvm_unreachable("Invalid vector type.");
2495}
2496
2498 if (VT.getVectorElementType() == MVT::i1)
2499 return RISCV::VRRegClassID;
2500 return getRegClassIDForLMUL(getLMUL(VT));
2501}
2502
2503// Attempt to decompose a subvector insert/extract between VecVT and
2504// SubVecVT via subregister indices. Returns the subregister index that
2505// can perform the subvector insert/extract with the given element index, as
2506// well as the index corresponding to any leftover subvectors that must be
2507// further inserted/extracted within the register class for SubVecVT.
2508std::pair<unsigned, unsigned>
2510 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2511 const RISCVRegisterInfo *TRI) {
2512 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2513 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2514 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2515 "Register classes not ordered");
2516 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2517 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2518 // Try to compose a subregister index that takes us from the incoming
2519 // LMUL>1 register class down to the outgoing one. At each step we half
2520 // the LMUL:
2521 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2522 // Note that this is not guaranteed to find a subregister index, such as
2523 // when we are extracting from one VR type to another.
2524 unsigned SubRegIdx = RISCV::NoSubRegister;
2525 for (const unsigned RCID :
2526 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2527 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2528 VecVT = VecVT.getHalfNumVectorElementsVT();
2529 bool IsHi =
2530 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2531 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2532 getSubregIndexByMVT(VecVT, IsHi));
2533 if (IsHi)
2534 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2535 }
2536 return {SubRegIdx, InsertExtractIdx};
2537}
2538
2539// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2540// stores for those types.
2541bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2542 return !Subtarget.useRVVForFixedLengthVectors() ||
2543 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2544}
2545
2547 if (!ScalarTy.isSimple())
2548 return false;
2549 switch (ScalarTy.getSimpleVT().SimpleTy) {
2550 case MVT::iPTR:
2551 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2552 case MVT::i8:
2553 case MVT::i16:
2554 case MVT::i32:
2555 return true;
2556 case MVT::i64:
2557 return Subtarget.hasVInstructionsI64();
2558 case MVT::f16:
2559 return Subtarget.hasVInstructionsF16();
2560 case MVT::f32:
2561 return Subtarget.hasVInstructionsF32();
2562 case MVT::f64:
2563 return Subtarget.hasVInstructionsF64();
2564 default:
2565 return false;
2566 }
2567}
2568
2569
2570unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2571 return NumRepeatedDivisors;
2572}
2573
2575 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2576 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2577 "Unexpected opcode");
2578 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2579 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2581 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2582 if (!II)
2583 return SDValue();
2584 return Op.getOperand(II->VLOperand + 1 + HasChain);
2585}
2586
2588 const RISCVSubtarget &Subtarget) {
2589 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2590 if (!Subtarget.useRVVForFixedLengthVectors())
2591 return false;
2592
2593 // We only support a set of vector types with a consistent maximum fixed size
2594 // across all supported vector element types to avoid legalization issues.
2595 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2596 // fixed-length vector type we support is 1024 bytes.
2597 if (VT.getFixedSizeInBits() > 1024 * 8)
2598 return false;
2599
2600 unsigned MinVLen = Subtarget.getRealMinVLen();
2601
2602 MVT EltVT = VT.getVectorElementType();
2603
2604 // Don't use RVV for vectors we cannot scalarize if required.
2605 switch (EltVT.SimpleTy) {
2606 // i1 is supported but has different rules.
2607 default:
2608 return false;
2609 case MVT::i1:
2610 // Masks can only use a single register.
2611 if (VT.getVectorNumElements() > MinVLen)
2612 return false;
2613 MinVLen /= 8;
2614 break;
2615 case MVT::i8:
2616 case MVT::i16:
2617 case MVT::i32:
2618 break;
2619 case MVT::i64:
2620 if (!Subtarget.hasVInstructionsI64())
2621 return false;
2622 break;
2623 case MVT::f16:
2624 if (!Subtarget.hasVInstructionsF16Minimal())
2625 return false;
2626 break;
2627 case MVT::bf16:
2628 if (!Subtarget.hasVInstructionsBF16())
2629 return false;
2630 break;
2631 case MVT::f32:
2632 if (!Subtarget.hasVInstructionsF32())
2633 return false;
2634 break;
2635 case MVT::f64:
2636 if (!Subtarget.hasVInstructionsF64())
2637 return false;
2638 break;
2639 }
2640
2641 // Reject elements larger than ELEN.
2642 if (EltVT.getSizeInBits() > Subtarget.getELen())
2643 return false;
2644
2645 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2646 // Don't use RVV for types that don't fit.
2647 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2648 return false;
2649
2650 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2651 // the base fixed length RVV support in place.
2652 if (!VT.isPow2VectorType())
2653 return false;
2654
2655 return true;
2656}
2657
2658bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2659 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2660}
2661
2662// Return the largest legal scalable vector type that matches VT's element type.
2664 const RISCVSubtarget &Subtarget) {
2665 // This may be called before legal types are setup.
2666 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2667 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2668 "Expected legal fixed length vector!");
2669
2670 unsigned MinVLen = Subtarget.getRealMinVLen();
2671 unsigned MaxELen = Subtarget.getELen();
2672
2673 MVT EltVT = VT.getVectorElementType();
2674 switch (EltVT.SimpleTy) {
2675 default:
2676 llvm_unreachable("unexpected element type for RVV container");
2677 case MVT::i1:
2678 case MVT::i8:
2679 case MVT::i16:
2680 case MVT::i32:
2681 case MVT::i64:
2682 case MVT::bf16:
2683 case MVT::f16:
2684 case MVT::f32:
2685 case MVT::f64: {
2686 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2687 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2688 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2689 unsigned NumElts =
2691 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2692 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2693 return MVT::getScalableVectorVT(EltVT, NumElts);
2694 }
2695 }
2696}
2697
2699 const RISCVSubtarget &Subtarget) {
2701 Subtarget);
2702}
2703
2705 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2706}
2707
2708// Grow V to consume an entire RVV register.
2710 const RISCVSubtarget &Subtarget) {
2711 assert(VT.isScalableVector() &&
2712 "Expected to convert into a scalable vector!");
2713 assert(V.getValueType().isFixedLengthVector() &&
2714 "Expected a fixed length vector operand!");
2715 SDLoc DL(V);
2716 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2717 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2718}
2719
2720// Shrink V so it's just big enough to maintain a VT's worth of data.
2722 const RISCVSubtarget &Subtarget) {
2724 "Expected to convert into a fixed length vector!");
2725 assert(V.getValueType().isScalableVector() &&
2726 "Expected a scalable vector operand!");
2727 SDLoc DL(V);
2728 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2729 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2730}
2731
2732/// Return the type of the mask type suitable for masking the provided
2733/// vector type. This is simply an i1 element type vector of the same
2734/// (possibly scalable) length.
2735static MVT getMaskTypeFor(MVT VecVT) {
2736 assert(VecVT.isVector());
2738 return MVT::getVectorVT(MVT::i1, EC);
2739}
2740
2741/// Creates an all ones mask suitable for masking a vector of type VecTy with
2742/// vector length VL. .
2743static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2744 SelectionDAG &DAG) {
2745 MVT MaskVT = getMaskTypeFor(VecVT);
2746 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2747}
2748
2749static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2750 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2751 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2752 // canonicalize the representation. InsertVSETVLI will pick the immediate
2753 // encoding later if profitable.
2754 const auto [MinVLMAX, MaxVLMAX] =
2755 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2756 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2757 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2758
2759 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2760}
2761
2762static std::pair<SDValue, SDValue>
2764 const RISCVSubtarget &Subtarget) {
2765 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2766 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2767 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2768 return {Mask, VL};
2769}
2770
2771static std::pair<SDValue, SDValue>
2772getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2773 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2774 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2775 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2776 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2777 return {Mask, VL};
2778}
2779
2780// Gets the two common "VL" operands: an all-ones mask and the vector length.
2781// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2782// the vector type that the fixed-length vector is contained in. Otherwise if
2783// VecVT is scalable, then ContainerVT should be the same as VecVT.
2784static std::pair<SDValue, SDValue>
2785getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2786 const RISCVSubtarget &Subtarget) {
2787 if (VecVT.isFixedLengthVector())
2788 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2789 Subtarget);
2790 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2791 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2792}
2793
2795 SelectionDAG &DAG) const {
2796 assert(VecVT.isScalableVector() && "Expected scalable vector");
2797 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2798 VecVT.getVectorElementCount());
2799}
2800
2801std::pair<unsigned, unsigned>
2803 const RISCVSubtarget &Subtarget) {
2804 assert(VecVT.isScalableVector() && "Expected scalable vector");
2805
2806 unsigned EltSize = VecVT.getScalarSizeInBits();
2807 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2808
2809 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2810 unsigned MaxVLMAX =
2811 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2812
2813 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2814 unsigned MinVLMAX =
2815 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2816
2817 return std::make_pair(MinVLMAX, MaxVLMAX);
2818}
2819
2820// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2821// of either is (currently) supported. This can get us into an infinite loop
2822// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2823// as a ..., etc.
2824// Until either (or both) of these can reliably lower any node, reporting that
2825// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2826// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2827// which is not desirable.
2829 EVT VT, unsigned DefinedValues) const {
2830 return false;
2831}
2832
2834 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2835 // implementation-defined.
2836 if (!VT.isVector())
2838 unsigned DLenFactor = Subtarget.getDLenFactor();
2839 unsigned Cost;
2840 if (VT.isScalableVector()) {
2841 unsigned LMul;
2842 bool Fractional;
2843 std::tie(LMul, Fractional) =
2845 if (Fractional)
2846 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2847 else
2848 Cost = (LMul * DLenFactor);
2849 } else {
2850 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2851 }
2852 return Cost;
2853}
2854
2855
2856/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2857/// is generally quadratic in the number of vreg implied by LMUL. Note that
2858/// operand (index and possibly mask) are handled separately.
2860 return getLMULCost(VT) * getLMULCost(VT);
2861}
2862
2863/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2864/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2865/// or may track the vrgather.vv cost. It is implementation-dependent.
2867 return getLMULCost(VT);
2868}
2869
2870/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2871/// for the type VT. (This does not cover the vslide1up or vslide1down
2872/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2873/// or may track the vrgather.vv cost. It is implementation-dependent.
2875 return getLMULCost(VT);
2876}
2877
2878/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2879/// for the type VT. (This does not cover the vslide1up or vslide1down
2880/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2881/// or may track the vrgather.vv cost. It is implementation-dependent.
2883 return getLMULCost(VT);
2884}
2885
2887 const RISCVSubtarget &Subtarget) {
2888 // RISC-V FP-to-int conversions saturate to the destination register size, but
2889 // don't produce 0 for nan. We can use a conversion instruction and fix the
2890 // nan case with a compare and a select.
2891 SDValue Src = Op.getOperand(0);
2892
2893 MVT DstVT = Op.getSimpleValueType();
2894 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2895
2896 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2897
2898 if (!DstVT.isVector()) {
2899 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2900 // the result.
2901 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2902 Src.getValueType() == MVT::bf16) {
2903 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2904 }
2905
2906 unsigned Opc;
2907 if (SatVT == DstVT)
2908 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2909 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2911 else
2912 return SDValue();
2913 // FIXME: Support other SatVTs by clamping before or after the conversion.
2914
2915 SDLoc DL(Op);
2916 SDValue FpToInt = DAG.getNode(
2917 Opc, DL, DstVT, Src,
2919
2920 if (Opc == RISCVISD::FCVT_WU_RV64)
2921 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2922
2923 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2924 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2926 }
2927
2928 // Vectors.
2929
2930 MVT DstEltVT = DstVT.getVectorElementType();
2931 MVT SrcVT = Src.getSimpleValueType();
2932 MVT SrcEltVT = SrcVT.getVectorElementType();
2933 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2934 unsigned DstEltSize = DstEltVT.getSizeInBits();
2935
2936 // Only handle saturating to the destination type.
2937 if (SatVT != DstEltVT)
2938 return SDValue();
2939
2940 // FIXME: Don't support narrowing by more than 1 steps for now.
2941 if (SrcEltSize > (2 * DstEltSize))
2942 return SDValue();
2943
2944 MVT DstContainerVT = DstVT;
2945 MVT SrcContainerVT = SrcVT;
2946 if (DstVT.isFixedLengthVector()) {
2947 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2948 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2949 assert(DstContainerVT.getVectorElementCount() ==
2950 SrcContainerVT.getVectorElementCount() &&
2951 "Expected same element count");
2952 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2953 }
2954
2955 SDLoc DL(Op);
2956
2957 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2958
2959 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2960 {Src, Src, DAG.getCondCode(ISD::SETNE),
2961 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2962
2963 // Need to widen by more than 1 step, promote the FP type, then do a widening
2964 // convert.
2965 if (DstEltSize > (2 * SrcEltSize)) {
2966 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2967 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2968 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2969 }
2970
2971 unsigned RVVOpc =
2973 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2974
2975 SDValue SplatZero = DAG.getNode(
2976 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2977 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2978 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2979 Res, DAG.getUNDEF(DstContainerVT), VL);
2980
2981 if (DstVT.isFixedLengthVector())
2982 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2983
2984 return Res;
2985}
2986
2988 switch (Opc) {
2989 case ISD::FROUNDEVEN:
2991 case ISD::VP_FROUNDEVEN:
2992 return RISCVFPRndMode::RNE;
2993 case ISD::FTRUNC:
2994 case ISD::STRICT_FTRUNC:
2995 case ISD::VP_FROUNDTOZERO:
2996 return RISCVFPRndMode::RTZ;
2997 case ISD::FFLOOR:
2998 case ISD::STRICT_FFLOOR:
2999 case ISD::VP_FFLOOR:
3000 return RISCVFPRndMode::RDN;
3001 case ISD::FCEIL:
3002 case ISD::STRICT_FCEIL:
3003 case ISD::VP_FCEIL:
3004 return RISCVFPRndMode::RUP;
3005 case ISD::FROUND:
3006 case ISD::STRICT_FROUND:
3007 case ISD::VP_FROUND:
3008 return RISCVFPRndMode::RMM;
3009 case ISD::FRINT:
3010 return RISCVFPRndMode::DYN;
3011 }
3012
3014}
3015
3016// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3017// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3018// the integer domain and back. Taking care to avoid converting values that are
3019// nan or already correct.
3020static SDValue
3022 const RISCVSubtarget &Subtarget) {
3023 MVT VT = Op.getSimpleValueType();
3024 assert(VT.isVector() && "Unexpected type");
3025
3026 SDLoc DL(Op);
3027
3028 SDValue Src = Op.getOperand(0);
3029
3030 MVT ContainerVT = VT;
3031 if (VT.isFixedLengthVector()) {
3032 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3033 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3034 }
3035
3036 SDValue Mask, VL;
3037 if (Op->isVPOpcode()) {
3038 Mask = Op.getOperand(1);
3039 if (VT.isFixedLengthVector())
3040 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3041 Subtarget);
3042 VL = Op.getOperand(2);
3043 } else {
3044 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3045 }
3046
3047 // Freeze the source since we are increasing the number of uses.
3048 Src = DAG.getFreeze(Src);
3049
3050 // We do the conversion on the absolute value and fix the sign at the end.
3051 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3052
3053 // Determine the largest integer that can be represented exactly. This and
3054 // values larger than it don't have any fractional bits so don't need to
3055 // be converted.
3056 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3057 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3058 APFloat MaxVal = APFloat(FltSem);
3059 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3060 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3061 SDValue MaxValNode =
3062 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3063 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3064 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3065
3066 // If abs(Src) was larger than MaxVal or nan, keep it.
3067 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3068 Mask =
3069 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3070 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3071 Mask, Mask, VL});
3072
3073 // Truncate to integer and convert back to FP.
3074 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3075 MVT XLenVT = Subtarget.getXLenVT();
3076 SDValue Truncated;
3077
3078 switch (Op.getOpcode()) {
3079 default:
3080 llvm_unreachable("Unexpected opcode");
3081 case ISD::FCEIL:
3082 case ISD::VP_FCEIL:
3083 case ISD::FFLOOR:
3084 case ISD::VP_FFLOOR:
3085 case ISD::FROUND:
3086 case ISD::FROUNDEVEN:
3087 case ISD::VP_FROUND:
3088 case ISD::VP_FROUNDEVEN:
3089 case ISD::VP_FROUNDTOZERO: {
3092 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3093 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3094 break;
3095 }
3096 case ISD::FTRUNC:
3097 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3098 Mask, VL);
3099 break;
3100 case ISD::FRINT:
3101 case ISD::VP_FRINT:
3102 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3103 break;
3104 case ISD::FNEARBYINT:
3105 case ISD::VP_FNEARBYINT:
3106 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3107 Mask, VL);
3108 break;
3109 }
3110
3111 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3112 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3113 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3114 Mask, VL);
3115
3116 // Restore the original sign so that -0.0 is preserved.
3117 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3118 Src, Src, Mask, VL);
3119
3120 if (!VT.isFixedLengthVector())
3121 return Truncated;
3122
3123 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3124}
3125
3126// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3127// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3128// qNan and coverting the new source to integer and back to FP.
3129static SDValue
3131 const RISCVSubtarget &Subtarget) {
3132 SDLoc DL(Op);
3133 MVT VT = Op.getSimpleValueType();
3134 SDValue Chain = Op.getOperand(0);
3135 SDValue Src = Op.getOperand(1);
3136
3137 MVT ContainerVT = VT;
3138 if (VT.isFixedLengthVector()) {
3139 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3140 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3141 }
3142
3143 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3144
3145 // Freeze the source since we are increasing the number of uses.
3146 Src = DAG.getFreeze(Src);
3147
3148 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3149 MVT MaskVT = Mask.getSimpleValueType();
3151 DAG.getVTList(MaskVT, MVT::Other),
3152 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3153 DAG.getUNDEF(MaskVT), Mask, VL});
3154 Chain = Unorder.getValue(1);
3156 DAG.getVTList(ContainerVT, MVT::Other),
3157 {Chain, Src, Src, Src, Unorder, VL});
3158 Chain = Src.getValue(1);
3159
3160 // We do the conversion on the absolute value and fix the sign at the end.
3161 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3162
3163 // Determine the largest integer that can be represented exactly. This and
3164 // values larger than it don't have any fractional bits so don't need to
3165 // be converted.
3166 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3167 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3168 APFloat MaxVal = APFloat(FltSem);
3169 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3170 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3171 SDValue MaxValNode =
3172 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3173 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3174 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3175
3176 // If abs(Src) was larger than MaxVal or nan, keep it.
3177 Mask = DAG.getNode(
3178 RISCVISD::SETCC_VL, DL, MaskVT,
3179 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3180
3181 // Truncate to integer and convert back to FP.
3182 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3183 MVT XLenVT = Subtarget.getXLenVT();
3184 SDValue Truncated;
3185
3186 switch (Op.getOpcode()) {
3187 default:
3188 llvm_unreachable("Unexpected opcode");
3189 case ISD::STRICT_FCEIL:
3190 case ISD::STRICT_FFLOOR:
3191 case ISD::STRICT_FROUND:
3195 Truncated = DAG.getNode(
3196 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3197 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3198 break;
3199 }
3200 case ISD::STRICT_FTRUNC:
3201 Truncated =
3203 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3204 break;
3207 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3208 Mask, VL);
3209 break;
3210 }
3211 Chain = Truncated.getValue(1);
3212
3213 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3214 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3215 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3216 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3217 Truncated, Mask, VL);
3218 Chain = Truncated.getValue(1);
3219 }
3220
3221 // Restore the original sign so that -0.0 is preserved.
3222 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3223 Src, Src, Mask, VL);
3224
3225 if (VT.isFixedLengthVector())
3226 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3227 return DAG.getMergeValues({Truncated, Chain}, DL);
3228}
3229
3230static SDValue
3232 const RISCVSubtarget &Subtarget) {
3233 MVT VT = Op.getSimpleValueType();
3234 if (VT.isVector())
3235 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3236
3237 if (DAG.shouldOptForSize())
3238 return SDValue();
3239
3240 SDLoc DL(Op);
3241 SDValue Src = Op.getOperand(0);
3242
3243 // Create an integer the size of the mantissa with the MSB set. This and all
3244 // values larger than it don't have any fractional bits so don't need to be
3245 // converted.
3246 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3247 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3248 APFloat MaxVal = APFloat(FltSem);
3249 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3250 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3251 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3252
3254 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3255 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3256}
3257
3258// Expand vector LRINT and LLRINT by converting to the integer domain.
3260 const RISCVSubtarget &Subtarget) {
3261 MVT VT = Op.getSimpleValueType();
3262 assert(VT.isVector() && "Unexpected type");
3263
3264 SDLoc DL(Op);
3265 SDValue Src = Op.getOperand(0);
3266 MVT ContainerVT = VT;
3267
3268 if (VT.isFixedLengthVector()) {
3269 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3270 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3271 }
3272
3273 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3274 SDValue Truncated =
3275 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3276
3277 if (!VT.isFixedLengthVector())
3278 return Truncated;
3279
3280 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3281}
3282
3283static SDValue
3285 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3286 SDValue Offset, SDValue Mask, SDValue VL,
3288 if (Merge.isUndef())
3290 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3291 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3292 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3293}
3294
3295static SDValue
3296getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3298 SDValue VL,
3300 if (Merge.isUndef())
3302 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3303 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3304 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3305}
3306
3307static MVT getLMUL1VT(MVT VT) {
3309 "Unexpected vector MVT");
3313}
3314
3318 int64_t Addend;
3319};
3320
3321static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3323 // We will use a SINT_TO_FP to materialize this constant so we should use a
3324 // signed APSInt here.
3325 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3326 // We use an arbitrary rounding mode here. If a floating-point is an exact
3327 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3328 // the rounding mode changes the output value, then it is not an exact
3329 // integer.
3331 bool IsExact;
3332 // If it is out of signed integer range, it will return an invalid operation.
3333 // If it is not an exact integer, IsExact is false.
3334 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3336 !IsExact)
3337 return std::nullopt;
3338 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3339}
3340
3341// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3342// to the (non-zero) step S and start value X. This can be then lowered as the
3343// RVV sequence (VID * S) + X, for example.
3344// The step S is represented as an integer numerator divided by a positive
3345// denominator. Note that the implementation currently only identifies
3346// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3347// cannot detect 2/3, for example.
3348// Note that this method will also match potentially unappealing index
3349// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3350// determine whether this is worth generating code for.
3351static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3352 unsigned EltSizeInBits) {
3353 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3354 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3355 return std::nullopt;
3356 bool IsInteger = Op.getValueType().isInteger();
3357
3358 std::optional<unsigned> SeqStepDenom;
3359 std::optional<int64_t> SeqStepNum, SeqAddend;
3360 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3361 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3362
3363 // First extract the ops into a list of constant integer values. This may not
3364 // be possible for floats if they're not all representable as integers.
3366 const unsigned OpSize = Op.getScalarValueSizeInBits();
3367 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3368 if (Elt.isUndef()) {
3369 Elts[Idx] = std::nullopt;
3370 continue;
3371 }
3372 if (IsInteger) {
3373 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3374 } else {
3375 auto ExactInteger =
3376 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3377 if (!ExactInteger)
3378 return std::nullopt;
3379 Elts[Idx] = *ExactInteger;
3380 }
3381 }
3382
3383 for (auto [Idx, Elt] : enumerate(Elts)) {
3384 // Assume undef elements match the sequence; we just have to be careful
3385 // when interpolating across them.
3386 if (!Elt)
3387 continue;
3388
3389 if (PrevElt) {
3390 // Calculate the step since the last non-undef element, and ensure
3391 // it's consistent across the entire sequence.
3392 unsigned IdxDiff = Idx - PrevElt->second;
3393 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3394
3395 // A zero-value value difference means that we're somewhere in the middle
3396 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3397 // step change before evaluating the sequence.
3398 if (ValDiff == 0)
3399 continue;
3400
3401 int64_t Remainder = ValDiff % IdxDiff;
3402 // Normalize the step if it's greater than 1.
3403 if (Remainder != ValDiff) {
3404 // The difference must cleanly divide the element span.
3405 if (Remainder != 0)
3406 return std::nullopt;
3407 ValDiff /= IdxDiff;
3408 IdxDiff = 1;
3409 }
3410
3411 if (!SeqStepNum)
3412 SeqStepNum = ValDiff;
3413 else if (ValDiff != SeqStepNum)
3414 return std::nullopt;
3415
3416 if (!SeqStepDenom)
3417 SeqStepDenom = IdxDiff;
3418 else if (IdxDiff != *SeqStepDenom)
3419 return std::nullopt;
3420 }
3421
3422 // Record this non-undef element for later.
3423 if (!PrevElt || PrevElt->first != *Elt)
3424 PrevElt = std::make_pair(*Elt, Idx);
3425 }
3426
3427 // We need to have logged a step for this to count as a legal index sequence.
3428 if (!SeqStepNum || !SeqStepDenom)
3429 return std::nullopt;
3430
3431 // Loop back through the sequence and validate elements we might have skipped
3432 // while waiting for a valid step. While doing this, log any sequence addend.
3433 for (auto [Idx, Elt] : enumerate(Elts)) {
3434 if (!Elt)
3435 continue;
3436 uint64_t ExpectedVal =
3437 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3438 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3439 if (!SeqAddend)
3440 SeqAddend = Addend;
3441 else if (Addend != SeqAddend)
3442 return std::nullopt;
3443 }
3444
3445 assert(SeqAddend && "Must have an addend if we have a step");
3446
3447 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3448}
3449
3450// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3451// and lower it as a VRGATHER_VX_VL from the source vector.
3452static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3453 SelectionDAG &DAG,
3454 const RISCVSubtarget &Subtarget) {
3455 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3456 return SDValue();
3457 SDValue Vec = SplatVal.getOperand(0);
3458 // Only perform this optimization on vectors of the same size for simplicity.
3459 // Don't perform this optimization for i1 vectors.
3460 // FIXME: Support i1 vectors, maybe by promoting to i8?
3461 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3462 return SDValue();
3463 SDValue Idx = SplatVal.getOperand(1);
3464 // The index must be a legal type.
3465 if (Idx.getValueType() != Subtarget.getXLenVT())
3466 return SDValue();
3467
3468 MVT ContainerVT = VT;
3469 if (VT.isFixedLengthVector()) {
3470 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3471 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3472 }
3473
3474 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3475
3476 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3477 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3478
3479 if (!VT.isFixedLengthVector())
3480 return Gather;
3481
3482 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3483}
3484
3485
3486/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3487/// which constitute a large proportion of the elements. In such cases we can
3488/// splat a vector with the dominant element and make up the shortfall with
3489/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3490/// Note that this includes vectors of 2 elements by association. The
3491/// upper-most element is the "dominant" one, allowing us to use a splat to
3492/// "insert" the upper element, and an insert of the lower element at position
3493/// 0, which improves codegen.
3495 const RISCVSubtarget &Subtarget) {
3496 MVT VT = Op.getSimpleValueType();
3497 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3498
3499 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3500
3501 SDLoc DL(Op);
3502 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3503
3504 MVT XLenVT = Subtarget.getXLenVT();
3505 unsigned NumElts = Op.getNumOperands();
3506
3507 SDValue DominantValue;
3508 unsigned MostCommonCount = 0;
3509 DenseMap<SDValue, unsigned> ValueCounts;
3510 unsigned NumUndefElts =
3511 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3512
3513 // Track the number of scalar loads we know we'd be inserting, estimated as
3514 // any non-zero floating-point constant. Other kinds of element are either
3515 // already in registers or are materialized on demand. The threshold at which
3516 // a vector load is more desirable than several scalar materializion and
3517 // vector-insertion instructions is not known.
3518 unsigned NumScalarLoads = 0;
3519
3520 for (SDValue V : Op->op_values()) {
3521 if (V.isUndef())
3522 continue;
3523
3524 ValueCounts.insert(std::make_pair(V, 0));
3525 unsigned &Count = ValueCounts[V];
3526 if (0 == Count)
3527 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3528 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3529
3530 // Is this value dominant? In case of a tie, prefer the highest element as
3531 // it's cheaper to insert near the beginning of a vector than it is at the
3532 // end.
3533 if (++Count >= MostCommonCount) {
3534 DominantValue = V;
3535 MostCommonCount = Count;
3536 }
3537 }
3538
3539 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3540 unsigned NumDefElts = NumElts - NumUndefElts;
3541 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3542
3543 // Don't perform this optimization when optimizing for size, since
3544 // materializing elements and inserting them tends to cause code bloat.
3545 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3546 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3547 ((MostCommonCount > DominantValueCountThreshold) ||
3548 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3549 // Start by splatting the most common element.
3550 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3551
3552 DenseSet<SDValue> Processed{DominantValue};
3553
3554 // We can handle an insert into the last element (of a splat) via
3555 // v(f)slide1down. This is slightly better than the vslideup insert
3556 // lowering as it avoids the need for a vector group temporary. It
3557 // is also better than using vmerge.vx as it avoids the need to
3558 // materialize the mask in a vector register.
3559 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3560 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3561 LastOp != DominantValue) {
3562 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3563 auto OpCode =
3565 if (!VT.isFloatingPoint())
3566 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3567 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3568 LastOp, Mask, VL);
3569 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3570 Processed.insert(LastOp);
3571 }
3572
3573 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3574 for (const auto &OpIdx : enumerate(Op->ops())) {
3575 const SDValue &V = OpIdx.value();
3576 if (V.isUndef() || !Processed.insert(V).second)
3577 continue;
3578 if (ValueCounts[V] == 1) {
3579 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3580 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3581 } else {
3582 // Blend in all instances of this value using a VSELECT, using a
3583 // mask where each bit signals whether that element is the one
3584 // we're after.
3586 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3587 return DAG.getConstant(V == V1, DL, XLenVT);
3588 });
3589 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3590 DAG.getBuildVector(SelMaskTy, DL, Ops),
3591 DAG.getSplatBuildVector(VT, DL, V), Vec);
3592 }
3593 }
3594
3595 return Vec;
3596 }
3597
3598 return SDValue();
3599}
3600
3602 const RISCVSubtarget &Subtarget) {
3603 MVT VT = Op.getSimpleValueType();
3604 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3605
3606 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3607
3608 SDLoc DL(Op);
3609 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3610
3611 MVT XLenVT = Subtarget.getXLenVT();
3612 unsigned NumElts = Op.getNumOperands();
3613
3614 if (VT.getVectorElementType() == MVT::i1) {
3615 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3616 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3617 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3618 }
3619
3620 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3621 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3622 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3623 }
3624
3625 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3626 // scalar integer chunks whose bit-width depends on the number of mask
3627 // bits and XLEN.
3628 // First, determine the most appropriate scalar integer type to use. This
3629 // is at most XLenVT, but may be shrunk to a smaller vector element type
3630 // according to the size of the final vector - use i8 chunks rather than
3631 // XLenVT if we're producing a v8i1. This results in more consistent
3632 // codegen across RV32 and RV64.
3633 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3634 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3635 // If we have to use more than one INSERT_VECTOR_ELT then this
3636 // optimization is likely to increase code size; avoid peforming it in
3637 // such a case. We can use a load from a constant pool in this case.
3638 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3639 return SDValue();
3640 // Now we can create our integer vector type. Note that it may be larger
3641 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3642 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3643 MVT IntegerViaVecVT =
3644 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3645 IntegerViaVecElts);
3646
3647 uint64_t Bits = 0;
3648 unsigned BitPos = 0, IntegerEltIdx = 0;
3649 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3650
3651 for (unsigned I = 0; I < NumElts;) {
3652 SDValue V = Op.getOperand(I);
3653 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3654 Bits |= ((uint64_t)BitValue << BitPos);
3655 ++BitPos;
3656 ++I;
3657
3658 // Once we accumulate enough bits to fill our scalar type or process the
3659 // last element, insert into our vector and clear our accumulated data.
3660 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3661 if (NumViaIntegerBits <= 32)
3662 Bits = SignExtend64<32>(Bits);
3663 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3664 Elts[IntegerEltIdx] = Elt;
3665 Bits = 0;
3666 BitPos = 0;
3667 IntegerEltIdx++;
3668 }
3669 }
3670
3671 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3672
3673 if (NumElts < NumViaIntegerBits) {
3674 // If we're producing a smaller vector than our minimum legal integer
3675 // type, bitcast to the equivalent (known-legal) mask type, and extract
3676 // our final mask.
3677 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3678 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3679 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3680 DAG.getConstant(0, DL, XLenVT));
3681 } else {
3682 // Else we must have produced an integer type with the same size as the
3683 // mask type; bitcast for the final result.
3684 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3685 Vec = DAG.getBitcast(VT, Vec);
3686 }
3687
3688 return Vec;
3689 }
3690
3691 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3692 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3694 if (!VT.isFloatingPoint())
3695 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3696 Splat =
3697 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3698 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3699 }
3700
3701 // Try and match index sequences, which we can lower to the vid instruction
3702 // with optional modifications. An all-undef vector is matched by
3703 // getSplatValue, above.
3704 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3705 int64_t StepNumerator = SimpleVID->StepNumerator;
3706 unsigned StepDenominator = SimpleVID->StepDenominator;
3707 int64_t Addend = SimpleVID->Addend;
3708
3709 assert(StepNumerator != 0 && "Invalid step");
3710 bool Negate = false;
3711 int64_t SplatStepVal = StepNumerator;
3712 unsigned StepOpcode = ISD::MUL;
3713 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3714 // anyway as the shift of 63 won't fit in uimm5.
3715 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3716 isPowerOf2_64(std::abs(StepNumerator))) {
3717 Negate = StepNumerator < 0;
3718 StepOpcode = ISD::SHL;
3719 SplatStepVal = Log2_64(std::abs(StepNumerator));
3720 }
3721
3722 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3723 // threshold since it's the immediate value many RVV instructions accept.
3724 // There is no vmul.vi instruction so ensure multiply constant can fit in
3725 // a single addi instruction.
3726 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3727 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3728 isPowerOf2_32(StepDenominator) &&
3729 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3730 MVT VIDVT =
3732 MVT VIDContainerVT =
3733 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3734 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3735 // Convert right out of the scalable type so we can use standard ISD
3736 // nodes for the rest of the computation. If we used scalable types with
3737 // these, we'd lose the fixed-length vector info and generate worse
3738 // vsetvli code.
3739 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3740 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3741 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3742 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3743 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3744 }
3745 if (StepDenominator != 1) {
3746 SDValue SplatStep =
3747 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3748 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3749 }
3750 if (Addend != 0 || Negate) {
3751 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3752 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3753 VID);
3754 }
3755 if (VT.isFloatingPoint()) {
3756 // TODO: Use vfwcvt to reduce register pressure.
3757 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3758 }
3759 return VID;
3760 }
3761 }
3762
3763 // For very small build_vectors, use a single scalar insert of a constant.
3764 // TODO: Base this on constant rematerialization cost, not size.
3765 const unsigned EltBitSize = VT.getScalarSizeInBits();
3766 if (VT.getSizeInBits() <= 32 &&
3768 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3769 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3770 "Unexpected sequence type");
3771 // If we can use the original VL with the modified element type, this
3772 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3773 // be moved into InsertVSETVLI?
3774 unsigned ViaVecLen =
3775 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3776 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3777
3778 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3779 uint64_t SplatValue = 0;
3780 // Construct the amalgamated value at this larger vector type.
3781 for (const auto &OpIdx : enumerate(Op->op_values())) {
3782 const auto &SeqV = OpIdx.value();
3783 if (!SeqV.isUndef())
3784 SplatValue |=
3785 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3786 }
3787
3788 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3789 // achieve better constant materializion.
3790 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3791 SplatValue = SignExtend64<32>(SplatValue);
3792
3793 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3794 DAG.getUNDEF(ViaVecVT),
3795 DAG.getConstant(SplatValue, DL, XLenVT),
3796 DAG.getVectorIdxConstant(0, DL));
3797 if (ViaVecLen != 1)
3799 MVT::getVectorVT(ViaIntVT, 1), Vec,
3800 DAG.getConstant(0, DL, XLenVT));
3801 return DAG.getBitcast(VT, Vec);
3802 }
3803
3804
3805 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3806 // when re-interpreted as a vector with a larger element type. For example,
3807 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3808 // could be instead splat as
3809 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3810 // TODO: This optimization could also work on non-constant splats, but it
3811 // would require bit-manipulation instructions to construct the splat value.
3812 SmallVector<SDValue> Sequence;
3813 const auto *BV = cast<BuildVectorSDNode>(Op);
3814 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3816 BV->getRepeatedSequence(Sequence) &&
3817 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3818 unsigned SeqLen = Sequence.size();
3819 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3820 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3821 ViaIntVT == MVT::i64) &&
3822 "Unexpected sequence type");
3823
3824 // If we can use the original VL with the modified element type, this
3825 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3826 // be moved into InsertVSETVLI?
3827 const unsigned RequiredVL = NumElts / SeqLen;
3828 const unsigned ViaVecLen =
3829 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3830 NumElts : RequiredVL;
3831 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3832
3833 unsigned EltIdx = 0;
3834 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3835 uint64_t SplatValue = 0;
3836 // Construct the amalgamated value which can be splatted as this larger
3837 // vector type.
3838 for (const auto &SeqV : Sequence) {
3839 if (!SeqV.isUndef())
3840 SplatValue |=
3841 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3842 EltIdx++;
3843 }
3844
3845 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3846 // achieve better constant materializion.
3847 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3848 SplatValue = SignExtend64<32>(SplatValue);
3849
3850 // Since we can't introduce illegal i64 types at this stage, we can only
3851 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3852 // way we can use RVV instructions to splat.
3853 assert((ViaIntVT.bitsLE(XLenVT) ||
3854 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3855 "Unexpected bitcast sequence");
3856 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3857 SDValue ViaVL =
3858 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3859 MVT ViaContainerVT =
3860 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3861 SDValue Splat =
3862 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3863 DAG.getUNDEF(ViaContainerVT),
3864 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3865 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3866 if (ViaVecLen != RequiredVL)
3868 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3869 DAG.getConstant(0, DL, XLenVT));
3870 return DAG.getBitcast(VT, Splat);
3871 }
3872 }
3873
3874 // If the number of signbits allows, see if we can lower as a <N x i8>.
3875 // Our main goal here is to reduce LMUL (and thus work) required to
3876 // build the constant, but we will also narrow if the resulting
3877 // narrow vector is known to materialize cheaply.
3878 // TODO: We really should be costing the smaller vector. There are
3879 // profitable cases this misses.
3880 if (EltBitSize > 8 && VT.isInteger() &&
3881 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3882 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3883 if (EltBitSize - SignBits < 8) {
3884 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3885 DL, Op->ops());
3886 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3887 Source, DAG, Subtarget);
3888 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3889 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3890 }
3891 }
3892
3893 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3894 return Res;
3895
3896 // For constant vectors, use generic constant pool lowering. Otherwise,
3897 // we'd have to materialize constants in GPRs just to move them into the
3898 // vector.
3899 return SDValue();
3900}
3901
3903 const RISCVSubtarget &Subtarget) {
3904 MVT VT = Op.getSimpleValueType();
3905 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3906
3907 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3909 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3910
3911 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3912
3913 SDLoc DL(Op);
3914 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3915
3916 MVT XLenVT = Subtarget.getXLenVT();
3917
3918 if (VT.getVectorElementType() == MVT::i1) {
3919 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3920 // vector type, we have a legal equivalently-sized i8 type, so we can use
3921 // that.
3922 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3923 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3924
3925 SDValue WideVec;
3926 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3927 // For a splat, perform a scalar truncate before creating the wider
3928 // vector.
3929 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3930 DAG.getConstant(1, DL, Splat.getValueType()));
3931 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3932 } else {
3933 SmallVector<SDValue, 8> Ops(Op->op_values());
3934 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3935 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3936 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3937 }
3938
3939 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3940 }
3941
3942 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3943 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3944 return Gather;
3945 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3947 if (!VT.isFloatingPoint())
3948 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3949 Splat =
3950 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3951 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3952 }
3953
3954 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3955 return Res;
3956
3957 // If we're compiling for an exact VLEN value, we can split our work per
3958 // register in the register group.
3959 if (const auto VLen = Subtarget.getRealVLen();
3960 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3961 MVT ElemVT = VT.getVectorElementType();
3962 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3963 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3964 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3965 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3966 assert(M1VT == getLMUL1VT(M1VT));
3967
3968 // The following semantically builds up a fixed length concat_vector
3969 // of the component build_vectors. We eagerly lower to scalable and
3970 // insert_subvector here to avoid DAG combining it back to a large
3971 // build_vector.
3972 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3973 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3974 SDValue Vec = DAG.getUNDEF(ContainerVT);
3975 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3976 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3977 SDValue SubBV =
3978 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3979 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3980 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3981 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3982 DAG.getVectorIdxConstant(InsertIdx, DL));
3983 }
3984 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3985 }
3986
3987 // For m1 vectors, if we have non-undef values in both halves of our vector,
3988 // split the vector into low and high halves, build them separately, then
3989 // use a vselect to combine them. For long vectors, this cuts the critical
3990 // path of the vslide1down sequence in half, and gives us an opportunity
3991 // to special case each half independently. Note that we don't change the
3992 // length of the sub-vectors here, so if both fallback to the generic
3993 // vslide1down path, we should be able to fold the vselect into the final
3994 // vslidedown (for the undef tail) for the first half w/ masking.
3995 unsigned NumElts = VT.getVectorNumElements();
3996 unsigned NumUndefElts =
3997 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3998 unsigned NumDefElts = NumElts - NumUndefElts;
3999 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4000 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4001 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4002 SmallVector<SDValue> MaskVals;
4003 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4004 SubVecAOps.reserve(NumElts);
4005 SubVecBOps.reserve(NumElts);
4006 for (unsigned i = 0; i < NumElts; i++) {
4007 SDValue Elem = Op->getOperand(i);
4008 if (i < NumElts / 2) {
4009 SubVecAOps.push_back(Elem);
4010 SubVecBOps.push_back(UndefElem);
4011 } else {
4012 SubVecAOps.push_back(UndefElem);
4013 SubVecBOps.push_back(Elem);
4014 }
4015 bool SelectMaskVal = (i < NumElts / 2);
4016 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4017 }
4018 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4019 MaskVals.size() == NumElts);
4020
4021 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4022 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4023 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4024 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4025 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4026 }
4027
4028 // Cap the cost at a value linear to the number of elements in the vector.
4029 // The default lowering is to use the stack. The vector store + scalar loads
4030 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4031 // being (at least) linear in LMUL. As a result, using the vslidedown
4032 // lowering for every element ends up being VL*LMUL..
4033 // TODO: Should we be directly costing the stack alternative? Doing so might
4034 // give us a more accurate upper bound.
4035 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4036
4037 // TODO: unify with TTI getSlideCost.
4038 InstructionCost PerSlideCost = 1;
4039 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4040 default: break;
4042 PerSlideCost = 2;
4043 break;
4045 PerSlideCost = 4;
4046 break;
4048 PerSlideCost = 8;
4049 break;
4050 }
4051
4052 // TODO: Should we be using the build instseq then cost + evaluate scheme
4053 // we use for integer constants here?
4054 unsigned UndefCount = 0;
4055 for (const SDValue &V : Op->ops()) {
4056 if (V.isUndef()) {
4057 UndefCount++;
4058 continue;
4059 }
4060 if (UndefCount) {
4061 LinearBudget -= PerSlideCost;
4062 UndefCount = 0;
4063 }
4064 LinearBudget -= PerSlideCost;
4065 }
4066 if (UndefCount) {
4067 LinearBudget -= PerSlideCost;
4068 }
4069
4070 if (LinearBudget < 0)
4071 return SDValue();
4072
4073 assert((!VT.isFloatingPoint() ||
4074 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4075 "Illegal type which will result in reserved encoding");
4076
4077 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4078
4079 SDValue Vec;
4080 UndefCount = 0;
4081 for (SDValue V : Op->ops()) {
4082 if (V.isUndef()) {
4083 UndefCount++;
4084 continue;
4085 }
4086
4087 // Start our sequence with a TA splat in the hopes that hardware is able to
4088 // recognize there's no dependency on the prior value of our temporary
4089 // register.
4090 if (!Vec) {
4091 Vec = DAG.getSplatVector(VT, DL, V);
4092 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4093 UndefCount = 0;
4094 continue;
4095 }
4096
4097 if (UndefCount) {
4098 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4099 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4100 Vec, Offset, Mask, VL, Policy);
4101 UndefCount = 0;
4102 }
4103 auto OpCode =
4105 if (!VT.isFloatingPoint())
4106 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4107 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4108 V, Mask, VL);
4109 }
4110 if (UndefCount) {
4111 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4112 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4113 Vec, Offset, Mask, VL, Policy);
4114 }
4115 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4116}
4117
4118static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4120 SelectionDAG &DAG) {
4121 if (!Passthru)
4122 Passthru = DAG.getUNDEF(VT);
4123 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4124 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4125 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4126 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4127 // node in order to try and match RVV vector/scalar instructions.
4128 if ((LoC >> 31) == HiC)
4129 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4130
4131 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4132 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4133 // vlmax vsetvli or vsetivli to change the VL.
4134 // FIXME: Support larger constants?
4135 // FIXME: Support non-constant VLs by saturating?
4136 if (LoC == HiC) {
4137 SDValue NewVL;
4138 if (isAllOnesConstant(VL) ||
4139 (isa<RegisterSDNode>(VL) &&
4140 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4141 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4142 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4143 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4144
4145 if (NewVL) {
4146 MVT InterVT =
4147 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4148 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4149 DAG.getUNDEF(InterVT), Lo, NewVL);
4150 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4151 }
4152 }
4153 }
4154
4155 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4156 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4157 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4158