LLVM 19.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
35#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/Support/Debug.h"
46#include <optional>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "riscv-lower"
51
52STATISTIC(NumTailCalls, "Number of tail calls");
53
55 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
56 cl::desc("Give the maximum size (in number of nodes) of the web of "
57 "instructions that we will consider for VW expansion"),
58 cl::init(18));
59
60static cl::opt<bool>
61 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
62 cl::desc("Allow the formation of VW_W operations (e.g., "
63 "VWADD_W) with splat constants"),
64 cl::init(false));
65
67 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
68 cl::desc("Set the minimum number of repetitions of a divisor to allow "
69 "transformation to multiplications by the reciprocal"),
70 cl::init(2));
71
72static cl::opt<int>
74 cl::desc("Give the maximum number of instructions that we will "
75 "use for creating a floating-point immediate value"),
76 cl::init(2));
77
78static cl::opt<bool>
79 RV64LegalI32("riscv-experimental-rv64-legal-i32", cl::ReallyHidden,
80 cl::desc("Make i32 a legal type for SelectionDAG on RV64."));
81
83 const RISCVSubtarget &STI)
84 : TargetLowering(TM), Subtarget(STI) {
85
86 RISCVABI::ABI ABI = Subtarget.getTargetABI();
87 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
88
89 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
90 !Subtarget.hasStdExtF()) {
91 errs() << "Hard-float 'f' ABI can't be used for a target that "
92 "doesn't support the F instruction set extension (ignoring "
93 "target-abi)\n";
95 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
96 !Subtarget.hasStdExtD()) {
97 errs() << "Hard-float 'd' ABI can't be used for a target that "
98 "doesn't support the D instruction set extension (ignoring "
99 "target-abi)\n";
100 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
101 }
102
103 switch (ABI) {
104 default:
105 report_fatal_error("Don't know how to lower this ABI");
114 break;
115 }
116
117 MVT XLenVT = Subtarget.getXLenVT();
118
119 // Set up the register classes.
120 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
121 if (Subtarget.is64Bit() && RV64LegalI32)
122 addRegisterClass(MVT::i32, &RISCV::GPRRegClass);
123
124 if (Subtarget.hasStdExtZfhmin())
125 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
126 if (Subtarget.hasStdExtZfbfmin())
127 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
128 if (Subtarget.hasStdExtF())
129 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
130 if (Subtarget.hasStdExtD())
131 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
132 if (Subtarget.hasStdExtZhinxmin())
133 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
134 if (Subtarget.hasStdExtZfinx())
135 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
136 if (Subtarget.hasStdExtZdinx()) {
137 if (Subtarget.is64Bit())
138 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
139 else
140 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
141 }
142
143 static const MVT::SimpleValueType BoolVecVTs[] = {
144 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
145 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
146 static const MVT::SimpleValueType IntVecVTs[] = {
147 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
148 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
149 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
150 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
151 MVT::nxv4i64, MVT::nxv8i64};
152 static const MVT::SimpleValueType F16VecVTs[] = {
153 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
154 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
155 static const MVT::SimpleValueType BF16VecVTs[] = {
156 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
157 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
158 static const MVT::SimpleValueType F32VecVTs[] = {
159 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
160 static const MVT::SimpleValueType F64VecVTs[] = {
161 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
162
163 if (Subtarget.hasVInstructions()) {
164 auto addRegClassForRVV = [this](MVT VT) {
165 // Disable the smallest fractional LMUL types if ELEN is less than
166 // RVVBitsPerBlock.
167 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
168 if (VT.getVectorMinNumElements() < MinElts)
169 return;
170
171 unsigned Size = VT.getSizeInBits().getKnownMinValue();
172 const TargetRegisterClass *RC;
174 RC = &RISCV::VRRegClass;
175 else if (Size == 2 * RISCV::RVVBitsPerBlock)
176 RC = &RISCV::VRM2RegClass;
177 else if (Size == 4 * RISCV::RVVBitsPerBlock)
178 RC = &RISCV::VRM4RegClass;
179 else if (Size == 8 * RISCV::RVVBitsPerBlock)
180 RC = &RISCV::VRM8RegClass;
181 else
182 llvm_unreachable("Unexpected size");
183
184 addRegisterClass(VT, RC);
185 };
186
187 for (MVT VT : BoolVecVTs)
188 addRegClassForRVV(VT);
189 for (MVT VT : IntVecVTs) {
190 if (VT.getVectorElementType() == MVT::i64 &&
191 !Subtarget.hasVInstructionsI64())
192 continue;
193 addRegClassForRVV(VT);
194 }
195
196 if (Subtarget.hasVInstructionsF16Minimal())
197 for (MVT VT : F16VecVTs)
198 addRegClassForRVV(VT);
199
200 if (Subtarget.hasVInstructionsBF16())
201 for (MVT VT : BF16VecVTs)
202 addRegClassForRVV(VT);
203
204 if (Subtarget.hasVInstructionsF32())
205 for (MVT VT : F32VecVTs)
206 addRegClassForRVV(VT);
207
208 if (Subtarget.hasVInstructionsF64())
209 for (MVT VT : F64VecVTs)
210 addRegClassForRVV(VT);
211
212 if (Subtarget.useRVVForFixedLengthVectors()) {
213 auto addRegClassForFixedVectors = [this](MVT VT) {
214 MVT ContainerVT = getContainerForFixedLengthVector(VT);
215 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
216 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
217 addRegisterClass(VT, TRI.getRegClass(RCID));
218 };
220 if (useRVVForFixedLengthVectorVT(VT))
221 addRegClassForFixedVectors(VT);
222
224 if (useRVVForFixedLengthVectorVT(VT))
225 addRegClassForFixedVectors(VT);
226 }
227 }
228
229 // Compute derived properties from the register classes.
231
233
235 MVT::i1, Promote);
236 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
238 MVT::i1, Promote);
239
240 // TODO: add all necessary setOperationAction calls.
242
245 if (RV64LegalI32 && Subtarget.is64Bit())
249 if (RV64LegalI32 && Subtarget.is64Bit())
251
258
259 if (RV64LegalI32 && Subtarget.is64Bit())
261
263
266 if (RV64LegalI32 && Subtarget.is64Bit())
268
270
272
273 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
274 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
275
276 if (Subtarget.is64Bit()) {
278
279 if (!RV64LegalI32) {
282 MVT::i32, Custom);
284 MVT::i32, Custom);
285 if (!Subtarget.hasStdExtZbb())
287 } else {
289 if (Subtarget.hasStdExtZbb()) {
292 }
293 }
295 } else {
297 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
298 nullptr);
299 setLibcallName(RTLIB::MULO_I64, nullptr);
300 }
301
302 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
304 if (RV64LegalI32 && Subtarget.is64Bit())
306 } else if (Subtarget.is64Bit()) {
308 if (!RV64LegalI32)
310 else
312 } else {
314 }
315
316 if (!Subtarget.hasStdExtM()) {
318 XLenVT, Expand);
319 if (RV64LegalI32 && Subtarget.is64Bit())
321 Promote);
322 } else if (Subtarget.is64Bit()) {
323 if (!RV64LegalI32)
325 {MVT::i8, MVT::i16, MVT::i32}, Custom);
326 }
327
328 if (RV64LegalI32 && Subtarget.is64Bit()) {
332 Expand);
333 }
334
337 Expand);
338
340 Custom);
341
342 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
343 if (!RV64LegalI32 && Subtarget.is64Bit())
345 } else if (Subtarget.hasVendorXTHeadBb()) {
346 if (Subtarget.is64Bit())
349 } else if (Subtarget.hasVendorXCVbitmanip()) {
351 } else {
353 if (RV64LegalI32 && Subtarget.is64Bit())
355 }
356
357 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
358 // pattern match it directly in isel.
360 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
361 Subtarget.hasVendorXTHeadBb())
362 ? Legal
363 : Expand);
364 if (RV64LegalI32 && Subtarget.is64Bit())
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Promote
369 : Expand);
370
371
372 if (Subtarget.hasVendorXCVbitmanip()) {
374 } else {
375 // Zbkb can use rev8+brev8 to implement bitreverse.
377 Subtarget.hasStdExtZbkb() ? Custom : Expand);
378 }
379
380 if (Subtarget.hasStdExtZbb()) {
382 Legal);
383 if (RV64LegalI32 && Subtarget.is64Bit())
385 Promote);
386
387 if (Subtarget.is64Bit()) {
388 if (RV64LegalI32)
390 else
392 }
393 } else if (!Subtarget.hasVendorXCVbitmanip()) {
395 if (RV64LegalI32 && Subtarget.is64Bit())
397 }
398
399 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
400 Subtarget.hasVendorXCVbitmanip()) {
401 // We need the custom lowering to make sure that the resulting sequence
402 // for the 32bit case is efficient on 64bit targets.
403 if (Subtarget.is64Bit()) {
404 if (RV64LegalI32) {
406 Subtarget.hasStdExtZbb() ? Legal : Promote);
407 if (!Subtarget.hasStdExtZbb())
409 } else
411 }
412 } else {
414 if (RV64LegalI32 && Subtarget.is64Bit())
416 }
417
418 if (!RV64LegalI32 && Subtarget.is64Bit() &&
419 !Subtarget.hasShortForwardBranchOpt())
421
422 // We can use PseudoCCSUB to implement ABS.
423 if (Subtarget.hasShortForwardBranchOpt())
425
426 if (!Subtarget.hasVendorXTHeadCondMov()) {
428 if (RV64LegalI32 && Subtarget.is64Bit())
430 }
431
432 static const unsigned FPLegalNodeTypes[] = {
439
440 static const ISD::CondCode FPCCToExpand[] = {
444
445 static const unsigned FPOpToExpand[] = {
447 ISD::FREM};
448
449 static const unsigned FPRndMode[] = {
452
453 if (Subtarget.hasStdExtZfhminOrZhinxmin())
455
456 static const unsigned ZfhminZfbfminPromoteOps[] = {
466
467 if (Subtarget.hasStdExtZfbfmin()) {
476 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
478 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
479 // DAGCombiner::visitFP_ROUND probably needs improvements first.
481 }
482
483 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
484 if (Subtarget.hasStdExtZfhOrZhinx()) {
485 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
486 setOperationAction(FPRndMode, MVT::f16,
487 Subtarget.hasStdExtZfa() ? Legal : Custom);
490 } else {
491 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
494 MVT::f16, Legal);
495 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
496 // DAGCombiner::visitFP_ROUND probably needs improvements first.
498 }
499
502 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
505
507 Subtarget.hasStdExtZfa() ? Legal : Promote);
512 MVT::f16, Promote);
513
514 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
515 // complete support for all operations in LegalizeDAG.
520 MVT::f16, Promote);
521
522 // We need to custom promote this.
523 if (Subtarget.is64Bit())
525
526 if (!Subtarget.hasStdExtZfa())
528 }
529
530 if (Subtarget.hasStdExtFOrZfinx()) {
531 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
532 setOperationAction(FPRndMode, MVT::f32,
533 Subtarget.hasStdExtZfa() ? Legal : Custom);
534 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
538 setOperationAction(FPOpToExpand, MVT::f32, Expand);
539 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
540 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
541 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
542 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
546 Subtarget.isSoftFPABI() ? LibCall : Custom);
549
550 if (Subtarget.hasStdExtZfa())
552 else
554 }
555
556 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
558
559 if (Subtarget.hasStdExtDOrZdinx()) {
560 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
561
562 if (Subtarget.hasStdExtZfa()) {
563 setOperationAction(FPRndMode, MVT::f64, Legal);
567 } else {
568 if (Subtarget.is64Bit())
569 setOperationAction(FPRndMode, MVT::f64, Custom);
570
572 }
573
576 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
580 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
581 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
582 setOperationAction(FPOpToExpand, MVT::f64, Expand);
583 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
584 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
585 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
586 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
590 Subtarget.isSoftFPABI() ? LibCall : Custom);
593 }
594
595 if (Subtarget.is64Bit()) {
598 MVT::i32, Custom);
600 }
601
602 if (Subtarget.hasStdExtFOrZfinx()) {
604 Custom);
605
608 XLenVT, Legal);
609
610 if (RV64LegalI32 && Subtarget.is64Bit())
613 MVT::i32, Legal);
614
617 }
618
621 XLenVT, Custom);
622
624
625 if (Subtarget.is64Bit())
627
628 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
629 // Unfortunately this can't be determined just from the ISA naming string.
631 Subtarget.is64Bit() ? Legal : Custom);
633 Subtarget.is64Bit() ? Legal : Custom);
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (Subtarget.hasVInstructions()) {
662
664 if (RV64LegalI32 && Subtarget.is64Bit())
666
667 // RVV intrinsics may have illegal operands.
668 // We also need to custom legalize vmv.x.s.
671 {MVT::i8, MVT::i16}, Custom);
672 if (Subtarget.is64Bit())
674 MVT::i32, Custom);
675 else
677 MVT::i64, Custom);
678
680 MVT::Other, Custom);
681
682 static const unsigned IntegerVPOps[] = {
683 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
684 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
685 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
686 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
687 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
688 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
689 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
690 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
691 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
692 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
693 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
694 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
695 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
696 ISD::VP_USUBSAT};
697
698 static const unsigned FloatingPointVPOps[] = {
699 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
700 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
701 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
702 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
703 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
704 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
705 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
706 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
707 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
708 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
709 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
710 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
711 ISD::EXPERIMENTAL_VP_SPLICE};
712
713 static const unsigned IntegerVecReduceOps[] = {
717
718 static const unsigned FloatingPointVecReduceOps[] = {
721
722 if (!Subtarget.is64Bit()) {
723 // We must custom-lower certain vXi64 operations on RV32 due to the vector
724 // element type being illegal.
726 MVT::i64, Custom);
727
728 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
729
730 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
731 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
732 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
733 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
734 MVT::i64, Custom);
735 }
736
737 for (MVT VT : BoolVecVTs) {
738 if (!isTypeLegal(VT))
739 continue;
740
742
743 // Mask VTs are custom-expanded into a series of standard nodes
747 VT, Custom);
748
750 Custom);
751
754 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
755 Expand);
756
757 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
758
761 Custom);
762
764 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
765 Custom);
766
767 // RVV has native int->float & float->int conversions where the
768 // element type sizes are within one power-of-two of each other. Any
769 // wider distances between type sizes have to be lowered as sequences
770 // which progressively narrow the gap in stages.
775 VT, Custom);
777 Custom);
778
779 // Expand all extending loads to types larger than this, and truncating
780 // stores from types larger than this.
782 setTruncStoreAction(VT, OtherVT, Expand);
784 OtherVT, Expand);
785 }
786
787 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
788 ISD::VP_TRUNCATE, ISD::VP_SETCC},
789 VT, Custom);
790
793
795
796 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
797 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
798
801 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
802 }
803
804 for (MVT VT : IntVecVTs) {
805 if (!isTypeLegal(VT))
806 continue;
807
810
811 // Vectors implement MULHS/MULHU.
813
814 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
815 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
817
819 Legal);
820
821 // Custom-lower extensions and truncations from/to mask types.
823 VT, Custom);
824
825 // RVV has native int->float & float->int conversions where the
826 // element type sizes are within one power-of-two of each other. Any
827 // wider distances between type sizes have to be lowered as sequences
828 // which progressively narrow the gap in stages.
833 VT, Custom);
835 Custom);
838 VT, Legal);
839
840 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
841 // nodes which truncate by one power of two at a time.
843
844 // Custom-lower insert/extract operations to simplify patterns.
846 Custom);
847
848 // Custom-lower reduction operations to set up the corresponding custom
849 // nodes' operands.
850 setOperationAction(IntegerVecReduceOps, VT, Custom);
851
852 setOperationAction(IntegerVPOps, VT, Custom);
853
855
857 VT, Custom);
858
860 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
861 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
862 VT, Custom);
863
866 VT, Custom);
867
870
872
874 setTruncStoreAction(VT, OtherVT, Expand);
876 OtherVT, Expand);
877 }
878
881
882 // Splice
884
885 if (Subtarget.hasStdExtZvkb()) {
887 setOperationAction(ISD::VP_BSWAP, VT, Custom);
888 } else {
889 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
891 }
892
893 if (Subtarget.hasStdExtZvbb()) {
895 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
896 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
897 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
898 VT, Custom);
899 } else {
900 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
902 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
903 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
904 VT, Expand);
905
906 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
907 // range of f32.
908 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
909 if (isTypeLegal(FloatVT)) {
911 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
912 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
913 VT, Custom);
914 }
915 }
916 }
917
918 // Expand various CCs to best match the RVV ISA, which natively supports UNE
919 // but no other unordered comparisons, and supports all ordered comparisons
920 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
921 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
922 // and we pattern-match those back to the "original", swapping operands once
923 // more. This way we catch both operations and both "vf" and "fv" forms with
924 // fewer patterns.
925 static const ISD::CondCode VFPCCToExpand[] = {
929 };
930
931 // TODO: support more ops.
932 static const unsigned ZvfhminPromoteOps[] = {
940
941 // TODO: support more vp ops.
942 static const unsigned ZvfhminPromoteVPOps[] = {
943 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
944 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
945 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
946 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
947 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
948 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
949 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
950 ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
951 ISD::VP_FMAXIMUM};
952
953 // Sets common operation actions on RVV floating-point vector types.
954 const auto SetCommonVFPActions = [&](MVT VT) {
956 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
957 // sizes are within one power-of-two of each other. Therefore conversions
958 // between vXf16 and vXf64 must be lowered as sequences which convert via
959 // vXf32.
962 // Custom-lower insert/extract operations to simplify patterns.
964 Custom);
965 // Expand various condition codes (explained above).
966 setCondCodeAction(VFPCCToExpand, VT, Expand);
967
970
974 VT, Custom);
975
976 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
977
978 // Expand FP operations that need libcalls.
990
992
994
996 VT, Custom);
997
999 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1000 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1001 VT, Custom);
1002
1005
1008 VT, Custom);
1009
1012
1014
1015 setOperationAction(FloatingPointVPOps, VT, Custom);
1016
1018 Custom);
1021 VT, Legal);
1026 VT, Custom);
1027 };
1028
1029 // Sets common extload/truncstore actions on RVV floating-point vector
1030 // types.
1031 const auto SetCommonVFPExtLoadTruncStoreActions =
1032 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1033 for (auto SmallVT : SmallerVTs) {
1034 setTruncStoreAction(VT, SmallVT, Expand);
1035 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1036 }
1037 };
1038
1039 if (Subtarget.hasVInstructionsF16()) {
1040 for (MVT VT : F16VecVTs) {
1041 if (!isTypeLegal(VT))
1042 continue;
1043 SetCommonVFPActions(VT);
1044 }
1045 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1046 for (MVT VT : F16VecVTs) {
1047 if (!isTypeLegal(VT))
1048 continue;
1051 Custom);
1052 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1053 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1054 Custom);
1057 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1058 VT, Custom);
1061 VT, Custom);
1063 // load/store
1065
1066 // Custom split nxv32f16 since nxv32f32 if not legal.
1067 if (VT == MVT::nxv32f16) {
1068 setOperationAction(ZvfhminPromoteOps, VT, Custom);
1069 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
1070 continue;
1071 }
1072 // Add more promote ops.
1073 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1074 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1075 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1076 }
1077 }
1078
1079 if (Subtarget.hasVInstructionsF32()) {
1080 for (MVT VT : F32VecVTs) {
1081 if (!isTypeLegal(VT))
1082 continue;
1083 SetCommonVFPActions(VT);
1084 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1085 }
1086 }
1087
1088 if (Subtarget.hasVInstructionsF64()) {
1089 for (MVT VT : F64VecVTs) {
1090 if (!isTypeLegal(VT))
1091 continue;
1092 SetCommonVFPActions(VT);
1093 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1094 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1095 }
1096 }
1097
1098 if (Subtarget.useRVVForFixedLengthVectors()) {
1100 if (!useRVVForFixedLengthVectorVT(VT))
1101 continue;
1102
1103 // By default everything must be expanded.
1104 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1107 setTruncStoreAction(VT, OtherVT, Expand);
1109 OtherVT, Expand);
1110 }
1111
1112 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1113 // expansion to a build_vector of 0s.
1115
1116 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1118 Custom);
1119
1121 Custom);
1122
1124 VT, Custom);
1125
1127
1129
1131
1133
1135
1137
1140 Custom);
1141
1143 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1144 Custom);
1145
1147 {
1156 },
1157 VT, Custom);
1159 Custom);
1160
1162
1163 // Operations below are different for between masks and other vectors.
1164 if (VT.getVectorElementType() == MVT::i1) {
1165 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1166 ISD::OR, ISD::XOR},
1167 VT, Custom);
1168
1169 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1170 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1171 VT, Custom);
1172
1173 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1174 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1175 continue;
1176 }
1177
1178 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1179 // it before type legalization for i64 vectors on RV32. It will then be
1180 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1181 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1182 // improvements first.
1183 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1186 }
1187
1190
1191 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1192 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1193 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1194 ISD::VP_SCATTER},
1195 VT, Custom);
1196
1200 VT, Custom);
1201
1204
1205 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1206 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1208
1211 VT, Custom);
1212
1215
1218
1219 // Custom-lower reduction operations to set up the corresponding custom
1220 // nodes' operands.
1224 VT, Custom);
1225
1226 setOperationAction(IntegerVPOps, VT, Custom);
1227
1228 if (Subtarget.hasStdExtZvkb())
1230
1231 if (Subtarget.hasStdExtZvbb()) {
1234 VT, Custom);
1235 } else {
1236 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1237 // range of f32.
1238 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1239 if (isTypeLegal(FloatVT))
1242 Custom);
1243 }
1244 }
1245
1247 // There are no extending loads or truncating stores.
1248 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1249 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1250 setTruncStoreAction(VT, InnerVT, Expand);
1251 }
1252
1253 if (!useRVVForFixedLengthVectorVT(VT))
1254 continue;
1255
1256 // By default everything must be expanded.
1257 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1259
1260 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1261 // expansion to a build_vector of 0s.
1263
1264 if (VT.getVectorElementType() == MVT::f16 &&
1265 !Subtarget.hasVInstructionsF16()) {
1268 Custom);
1269 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1271 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1272 Custom);
1274 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1275 VT, Custom);
1278 VT, Custom);
1281 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1282 // Don't promote f16 vector operations to f32 if f32 vector type is
1283 // not legal.
1284 // TODO: could split the f16 vector into two vectors and do promotion.
1285 if (!isTypeLegal(F32VecVT))
1286 continue;
1287 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1288 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1289 continue;
1290 }
1291
1292 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1294 Custom);
1295
1299 VT, Custom);
1300
1303 VT, Custom);
1304
1305 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1306 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1307 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1308 ISD::VP_SCATTER},
1309 VT, Custom);
1310
1315 VT, Custom);
1316
1318
1321 VT, Custom);
1322
1323 setCondCodeAction(VFPCCToExpand, VT, Expand);
1324
1328
1330
1331 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1332
1333 setOperationAction(FloatingPointVPOps, VT, Custom);
1334
1336 Custom);
1343 VT, Custom);
1344 }
1345
1346 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1347 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1348 Custom);
1349 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1351 if (Subtarget.hasStdExtFOrZfinx())
1353 if (Subtarget.hasStdExtDOrZdinx())
1355 }
1356 }
1357
1358 if (Subtarget.hasStdExtA()) {
1360 if (RV64LegalI32 && Subtarget.is64Bit())
1362 }
1363
1364 if (Subtarget.hasForcedAtomics()) {
1365 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1371 XLenVT, LibCall);
1372 }
1373
1374 if (Subtarget.hasVendorXTHeadMemIdx()) {
1375 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1376 setIndexedLoadAction(im, MVT::i8, Legal);
1377 setIndexedStoreAction(im, MVT::i8, Legal);
1378 setIndexedLoadAction(im, MVT::i16, Legal);
1379 setIndexedStoreAction(im, MVT::i16, Legal);
1380 setIndexedLoadAction(im, MVT::i32, Legal);
1381 setIndexedStoreAction(im, MVT::i32, Legal);
1382
1383 if (Subtarget.is64Bit()) {
1384 setIndexedLoadAction(im, MVT::i64, Legal);
1385 setIndexedStoreAction(im, MVT::i64, Legal);
1386 }
1387 }
1388 }
1389
1390 // Function alignments.
1391 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1392 setMinFunctionAlignment(FunctionAlignment);
1393 // Set preferred alignments.
1396
1400 if (Subtarget.is64Bit())
1402
1403 if (Subtarget.hasStdExtFOrZfinx())
1405
1406 if (Subtarget.hasStdExtZbb())
1408
1409 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1411
1412 if (Subtarget.hasStdExtZbkb())
1414 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1416 if (Subtarget.hasStdExtFOrZfinx())
1419 if (Subtarget.hasVInstructions())
1421 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1424 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1426 if (Subtarget.hasVendorXTHeadMemPair())
1428 if (Subtarget.useRVVForFixedLengthVectors())
1430
1431 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1432 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1433
1434 // Disable strict node mutation.
1435 IsStrictFPEnabled = true;
1436}
1437
1439 LLVMContext &Context,
1440 EVT VT) const {
1441 if (!VT.isVector())
1442 return getPointerTy(DL);
1443 if (Subtarget.hasVInstructions() &&
1444 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1445 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1447}
1448
1449MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1450 return Subtarget.getXLenVT();
1451}
1452
1453// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1454bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1455 unsigned VF,
1456 bool IsScalable) const {
1457 if (!Subtarget.hasVInstructions())
1458 return true;
1459
1460 if (!IsScalable)
1461 return true;
1462
1463 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1464 return true;
1465
1466 // Don't allow VF=1 if those types are't legal.
1467 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1468 return true;
1469
1470 // VLEN=32 support is incomplete.
1471 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1472 return true;
1473
1474 // The maximum VF is for the smallest element width with LMUL=8.
1475 // VF must be a power of 2.
1476 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1477 return VF > MaxVF || !isPowerOf2_32(VF);
1478}
1479
1481 const CallInst &I,
1482 MachineFunction &MF,
1483 unsigned Intrinsic) const {
1484 auto &DL = I.getModule()->getDataLayout();
1485
1486 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1487 bool IsUnitStrided, bool UsePtrVal = false) {
1489 // We can't use ptrVal if the intrinsic can access memory before the
1490 // pointer. This means we can't use it for strided or indexed intrinsics.
1491 if (UsePtrVal)
1492 Info.ptrVal = I.getArgOperand(PtrOp);
1493 else
1494 Info.fallbackAddressSpace =
1495 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1496 Type *MemTy;
1497 if (IsStore) {
1498 // Store value is the first operand.
1499 MemTy = I.getArgOperand(0)->getType();
1500 } else {
1501 // Use return type. If it's segment load, return type is a struct.
1502 MemTy = I.getType();
1503 if (MemTy->isStructTy())
1504 MemTy = MemTy->getStructElementType(0);
1505 }
1506 if (!IsUnitStrided)
1507 MemTy = MemTy->getScalarType();
1508
1509 Info.memVT = getValueType(DL, MemTy);
1510 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1512 Info.flags |=
1514 return true;
1515 };
1516
1517 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1519
1521 switch (Intrinsic) {
1522 default:
1523 return false;
1524 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1525 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1526 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1527 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1528 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1529 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1530 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1531 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1532 case Intrinsic::riscv_masked_cmpxchg_i32:
1534 Info.memVT = MVT::i32;
1535 Info.ptrVal = I.getArgOperand(0);
1536 Info.offset = 0;
1537 Info.align = Align(4);
1540 return true;
1541 case Intrinsic::riscv_masked_strided_load:
1542 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1543 /*IsUnitStrided*/ false);
1544 case Intrinsic::riscv_masked_strided_store:
1545 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1546 /*IsUnitStrided*/ false);
1547 case Intrinsic::riscv_seg2_load:
1548 case Intrinsic::riscv_seg3_load:
1549 case Intrinsic::riscv_seg4_load:
1550 case Intrinsic::riscv_seg5_load:
1551 case Intrinsic::riscv_seg6_load:
1552 case Intrinsic::riscv_seg7_load:
1553 case Intrinsic::riscv_seg8_load:
1554 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1555 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1556 case Intrinsic::riscv_seg2_store:
1557 case Intrinsic::riscv_seg3_store:
1558 case Intrinsic::riscv_seg4_store:
1559 case Intrinsic::riscv_seg5_store:
1560 case Intrinsic::riscv_seg6_store:
1561 case Intrinsic::riscv_seg7_store:
1562 case Intrinsic::riscv_seg8_store:
1563 // Operands are (vec, ..., vec, ptr, vl)
1564 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1565 /*IsStore*/ true,
1566 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1567 case Intrinsic::riscv_vle:
1568 case Intrinsic::riscv_vle_mask:
1569 case Intrinsic::riscv_vleff:
1570 case Intrinsic::riscv_vleff_mask:
1571 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1572 /*IsStore*/ false,
1573 /*IsUnitStrided*/ true,
1574 /*UsePtrVal*/ true);
1575 case Intrinsic::riscv_vse:
1576 case Intrinsic::riscv_vse_mask:
1577 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1578 /*IsStore*/ true,
1579 /*IsUnitStrided*/ true,
1580 /*UsePtrVal*/ true);
1581 case Intrinsic::riscv_vlse:
1582 case Intrinsic::riscv_vlse_mask:
1583 case Intrinsic::riscv_vloxei:
1584 case Intrinsic::riscv_vloxei_mask:
1585 case Intrinsic::riscv_vluxei:
1586 case Intrinsic::riscv_vluxei_mask:
1587 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1588 /*IsStore*/ false,
1589 /*IsUnitStrided*/ false);
1590 case Intrinsic::riscv_vsse:
1591 case Intrinsic::riscv_vsse_mask:
1592 case Intrinsic::riscv_vsoxei:
1593 case Intrinsic::riscv_vsoxei_mask:
1594 case Intrinsic::riscv_vsuxei:
1595 case Intrinsic::riscv_vsuxei_mask:
1596 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1597 /*IsStore*/ true,
1598 /*IsUnitStrided*/ false);
1599 case Intrinsic::riscv_vlseg2:
1600 case Intrinsic::riscv_vlseg3:
1601 case Intrinsic::riscv_vlseg4:
1602 case Intrinsic::riscv_vlseg5:
1603 case Intrinsic::riscv_vlseg6:
1604 case Intrinsic::riscv_vlseg7:
1605 case Intrinsic::riscv_vlseg8:
1606 case Intrinsic::riscv_vlseg2ff:
1607 case Intrinsic::riscv_vlseg3ff:
1608 case Intrinsic::riscv_vlseg4ff:
1609 case Intrinsic::riscv_vlseg5ff:
1610 case Intrinsic::riscv_vlseg6ff:
1611 case Intrinsic::riscv_vlseg7ff:
1612 case Intrinsic::riscv_vlseg8ff:
1613 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1614 /*IsStore*/ false,
1615 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1616 case Intrinsic::riscv_vlseg2_mask:
1617 case Intrinsic::riscv_vlseg3_mask:
1618 case Intrinsic::riscv_vlseg4_mask:
1619 case Intrinsic::riscv_vlseg5_mask:
1620 case Intrinsic::riscv_vlseg6_mask:
1621 case Intrinsic::riscv_vlseg7_mask:
1622 case Intrinsic::riscv_vlseg8_mask:
1623 case Intrinsic::riscv_vlseg2ff_mask:
1624 case Intrinsic::riscv_vlseg3ff_mask:
1625 case Intrinsic::riscv_vlseg4ff_mask:
1626 case Intrinsic::riscv_vlseg5ff_mask:
1627 case Intrinsic::riscv_vlseg6ff_mask:
1628 case Intrinsic::riscv_vlseg7ff_mask:
1629 case Intrinsic::riscv_vlseg8ff_mask:
1630 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1631 /*IsStore*/ false,
1632 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1633 case Intrinsic::riscv_vlsseg2:
1634 case Intrinsic::riscv_vlsseg3:
1635 case Intrinsic::riscv_vlsseg4:
1636 case Intrinsic::riscv_vlsseg5:
1637 case Intrinsic::riscv_vlsseg6:
1638 case Intrinsic::riscv_vlsseg7:
1639 case Intrinsic::riscv_vlsseg8:
1640 case Intrinsic::riscv_vloxseg2:
1641 case Intrinsic::riscv_vloxseg3:
1642 case Intrinsic::riscv_vloxseg4:
1643 case Intrinsic::riscv_vloxseg5:
1644 case Intrinsic::riscv_vloxseg6:
1645 case Intrinsic::riscv_vloxseg7:
1646 case Intrinsic::riscv_vloxseg8:
1647 case Intrinsic::riscv_vluxseg2:
1648 case Intrinsic::riscv_vluxseg3:
1649 case Intrinsic::riscv_vluxseg4:
1650 case Intrinsic::riscv_vluxseg5:
1651 case Intrinsic::riscv_vluxseg6:
1652 case Intrinsic::riscv_vluxseg7:
1653 case Intrinsic::riscv_vluxseg8:
1654 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1655 /*IsStore*/ false,
1656 /*IsUnitStrided*/ false);
1657 case Intrinsic::riscv_vlsseg2_mask:
1658 case Intrinsic::riscv_vlsseg3_mask:
1659 case Intrinsic::riscv_vlsseg4_mask:
1660 case Intrinsic::riscv_vlsseg5_mask:
1661 case Intrinsic::riscv_vlsseg6_mask:
1662 case Intrinsic::riscv_vlsseg7_mask:
1663 case Intrinsic::riscv_vlsseg8_mask:
1664 case Intrinsic::riscv_vloxseg2_mask:
1665 case Intrinsic::riscv_vloxseg3_mask:
1666 case Intrinsic::riscv_vloxseg4_mask:
1667 case Intrinsic::riscv_vloxseg5_mask:
1668 case Intrinsic::riscv_vloxseg6_mask:
1669 case Intrinsic::riscv_vloxseg7_mask:
1670 case Intrinsic::riscv_vloxseg8_mask:
1671 case Intrinsic::riscv_vluxseg2_mask:
1672 case Intrinsic::riscv_vluxseg3_mask:
1673 case Intrinsic::riscv_vluxseg4_mask:
1674 case Intrinsic::riscv_vluxseg5_mask:
1675 case Intrinsic::riscv_vluxseg6_mask:
1676 case Intrinsic::riscv_vluxseg7_mask:
1677 case Intrinsic::riscv_vluxseg8_mask:
1678 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1679 /*IsStore*/ false,
1680 /*IsUnitStrided*/ false);
1681 case Intrinsic::riscv_vsseg2:
1682 case Intrinsic::riscv_vsseg3:
1683 case Intrinsic::riscv_vsseg4:
1684 case Intrinsic::riscv_vsseg5:
1685 case Intrinsic::riscv_vsseg6:
1686 case Intrinsic::riscv_vsseg7:
1687 case Intrinsic::riscv_vsseg8:
1688 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1689 /*IsStore*/ true,
1690 /*IsUnitStrided*/ false);
1691 case Intrinsic::riscv_vsseg2_mask:
1692 case Intrinsic::riscv_vsseg3_mask:
1693 case Intrinsic::riscv_vsseg4_mask:
1694 case Intrinsic::riscv_vsseg5_mask:
1695 case Intrinsic::riscv_vsseg6_mask:
1696 case Intrinsic::riscv_vsseg7_mask:
1697 case Intrinsic::riscv_vsseg8_mask:
1698 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1699 /*IsStore*/ true,
1700 /*IsUnitStrided*/ false);
1701 case Intrinsic::riscv_vssseg2:
1702 case Intrinsic::riscv_vssseg3:
1703 case Intrinsic::riscv_vssseg4:
1704 case Intrinsic::riscv_vssseg5:
1705 case Intrinsic::riscv_vssseg6:
1706 case Intrinsic::riscv_vssseg7:
1707 case Intrinsic::riscv_vssseg8:
1708 case Intrinsic::riscv_vsoxseg2:
1709 case Intrinsic::riscv_vsoxseg3:
1710 case Intrinsic::riscv_vsoxseg4:
1711 case Intrinsic::riscv_vsoxseg5:
1712 case Intrinsic::riscv_vsoxseg6:
1713 case Intrinsic::riscv_vsoxseg7:
1714 case Intrinsic::riscv_vsoxseg8:
1715 case Intrinsic::riscv_vsuxseg2:
1716 case Intrinsic::riscv_vsuxseg3:
1717 case Intrinsic::riscv_vsuxseg4:
1718 case Intrinsic::riscv_vsuxseg5:
1719 case Intrinsic::riscv_vsuxseg6:
1720 case Intrinsic::riscv_vsuxseg7:
1721 case Intrinsic::riscv_vsuxseg8:
1722 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1723 /*IsStore*/ true,
1724 /*IsUnitStrided*/ false);
1725 case Intrinsic::riscv_vssseg2_mask:
1726 case Intrinsic::riscv_vssseg3_mask:
1727 case Intrinsic::riscv_vssseg4_mask:
1728 case Intrinsic::riscv_vssseg5_mask:
1729 case Intrinsic::riscv_vssseg6_mask:
1730 case Intrinsic::riscv_vssseg7_mask:
1731 case Intrinsic::riscv_vssseg8_mask:
1732 case Intrinsic::riscv_vsoxseg2_mask:
1733 case Intrinsic::riscv_vsoxseg3_mask:
1734 case Intrinsic::riscv_vsoxseg4_mask:
1735 case Intrinsic::riscv_vsoxseg5_mask:
1736 case Intrinsic::riscv_vsoxseg6_mask:
1737 case Intrinsic::riscv_vsoxseg7_mask:
1738 case Intrinsic::riscv_vsoxseg8_mask:
1739 case Intrinsic::riscv_vsuxseg2_mask:
1740 case Intrinsic::riscv_vsuxseg3_mask:
1741 case Intrinsic::riscv_vsuxseg4_mask:
1742 case Intrinsic::riscv_vsuxseg5_mask:
1743 case Intrinsic::riscv_vsuxseg6_mask:
1744 case Intrinsic::riscv_vsuxseg7_mask:
1745 case Intrinsic::riscv_vsuxseg8_mask:
1746 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1747 /*IsStore*/ true,
1748 /*IsUnitStrided*/ false);
1749 }
1750}
1751
1753 const AddrMode &AM, Type *Ty,
1754 unsigned AS,
1755 Instruction *I) const {
1756 // No global is ever allowed as a base.
1757 if (AM.BaseGV)
1758 return false;
1759
1760 // RVV instructions only support register addressing.
1761 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1762 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1763
1764 // Require a 12-bit signed offset.
1765 if (!isInt<12>(AM.BaseOffs))
1766 return false;
1767
1768 switch (AM.Scale) {
1769 case 0: // "r+i" or just "i", depending on HasBaseReg.
1770 break;
1771 case 1:
1772 if (!AM.HasBaseReg) // allow "r+i".
1773 break;
1774 return false; // disallow "r+r" or "r+r+i".
1775 default:
1776 return false;
1777 }
1778
1779 return true;
1780}
1781
1783 return isInt<12>(Imm);
1784}
1785
1787 return isInt<12>(Imm);
1788}
1789
1790// On RV32, 64-bit integers are split into their high and low parts and held
1791// in two different registers, so the trunc is free since the low register can
1792// just be used.
1793// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1794// isTruncateFree?
1796 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1797 return false;
1798 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1799 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1800 return (SrcBits == 64 && DestBits == 32);
1801}
1802
1804 // We consider i64->i32 free on RV64 since we have good selection of W
1805 // instructions that make promoting operations back to i64 free in many cases.
1806 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1807 !DstVT.isInteger())
1808 return false;
1809 unsigned SrcBits = SrcVT.getSizeInBits();
1810 unsigned DestBits = DstVT.getSizeInBits();
1811 return (SrcBits == 64 && DestBits == 32);
1812}
1813
1815 // Zexts are free if they can be combined with a load.
1816 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1817 // poorly with type legalization of compares preferring sext.
1818 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1819 EVT MemVT = LD->getMemoryVT();
1820 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1821 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1822 LD->getExtensionType() == ISD::ZEXTLOAD))
1823 return true;
1824 }
1825
1826 return TargetLowering::isZExtFree(Val, VT2);
1827}
1828
1830 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1831}
1832
1834 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1835}
1836
1838 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXCVbitmanip();
1839}
1840
1842 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
1843 Subtarget.hasVendorXCVbitmanip();
1844}
1845
1847 const Instruction &AndI) const {
1848 // We expect to be able to match a bit extraction instruction if the Zbs
1849 // extension is supported and the mask is a power of two. However, we
1850 // conservatively return false if the mask would fit in an ANDI instruction,
1851 // on the basis that it's possible the sinking+duplication of the AND in
1852 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1853 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1854 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1855 return false;
1856 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1857 if (!Mask)
1858 return false;
1859 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1860}
1861
1863 EVT VT = Y.getValueType();
1864
1865 // FIXME: Support vectors once we have tests.
1866 if (VT.isVector())
1867 return false;
1868
1869 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1870 !isa<ConstantSDNode>(Y);
1871}
1872
1874 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1875 if (Subtarget.hasStdExtZbs())
1876 return X.getValueType().isScalarInteger();
1877 auto *C = dyn_cast<ConstantSDNode>(Y);
1878 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1879 if (Subtarget.hasVendorXTHeadBs())
1880 return C != nullptr;
1881 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1882 return C && C->getAPIntValue().ule(10);
1883}
1884
1886 EVT VT) const {
1887 // Only enable for rvv.
1888 if (!VT.isVector() || !Subtarget.hasVInstructions())
1889 return false;
1890
1891 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1892 return false;
1893
1894 return true;
1895}
1896
1898 Type *Ty) const {
1899 assert(Ty->isIntegerTy());
1900
1901 unsigned BitSize = Ty->getIntegerBitWidth();
1902 if (BitSize > Subtarget.getXLen())
1903 return false;
1904
1905 // Fast path, assume 32-bit immediates are cheap.
1906 int64_t Val = Imm.getSExtValue();
1907 if (isInt<32>(Val))
1908 return true;
1909
1910 // A constant pool entry may be more aligned thant he load we're trying to
1911 // replace. If we don't support unaligned scalar mem, prefer the constant
1912 // pool.
1913 // TODO: Can the caller pass down the alignment?
1914 if (!Subtarget.hasFastUnalignedAccess())
1915 return true;
1916
1917 // Prefer to keep the load if it would require many instructions.
1918 // This uses the same threshold we use for constant pools but doesn't
1919 // check useConstantPoolForLargeInts.
1920 // TODO: Should we keep the load only when we're definitely going to emit a
1921 // constant pool?
1922
1924 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1925}
1926
1930 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1931 SelectionDAG &DAG) const {
1932 // One interesting pattern that we'd want to form is 'bit extract':
1933 // ((1 >> Y) & 1) ==/!= 0
1934 // But we also need to be careful not to try to reverse that fold.
1935
1936 // Is this '((1 >> Y) & 1)'?
1937 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1938 return false; // Keep the 'bit extract' pattern.
1939
1940 // Will this be '((1 >> Y) & 1)' after the transform?
1941 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1942 return true; // Do form the 'bit extract' pattern.
1943
1944 // If 'X' is a constant, and we transform, then we will immediately
1945 // try to undo the fold, thus causing endless combine loop.
1946 // So only do the transform if X is not a constant. This matches the default
1947 // implementation of this function.
1948 return !XC;
1949}
1950
1951bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1952 switch (Opcode) {
1953 case Instruction::Add:
1954 case Instruction::Sub:
1955 case Instruction::Mul:
1956 case Instruction::And:
1957 case Instruction::Or:
1958 case Instruction::Xor:
1959 case Instruction::FAdd:
1960 case Instruction::FSub:
1961 case Instruction::FMul:
1962 case Instruction::FDiv:
1963 case Instruction::ICmp:
1964 case Instruction::FCmp:
1965 return true;
1966 case Instruction::Shl:
1967 case Instruction::LShr:
1968 case Instruction::AShr:
1969 case Instruction::UDiv:
1970 case Instruction::SDiv:
1971 case Instruction::URem:
1972 case Instruction::SRem:
1973 return Operand == 1;
1974 default:
1975 return false;
1976 }
1977}
1978
1979
1981 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1982 return false;
1983
1984 if (canSplatOperand(I->getOpcode(), Operand))
1985 return true;
1986
1987 auto *II = dyn_cast<IntrinsicInst>(I);
1988 if (!II)
1989 return false;
1990
1991 switch (II->getIntrinsicID()) {
1992 case Intrinsic::fma:
1993 case Intrinsic::vp_fma:
1994 return Operand == 0 || Operand == 1;
1995 case Intrinsic::vp_shl:
1996 case Intrinsic::vp_lshr:
1997 case Intrinsic::vp_ashr:
1998 case Intrinsic::vp_udiv:
1999 case Intrinsic::vp_sdiv:
2000 case Intrinsic::vp_urem:
2001 case Intrinsic::vp_srem:
2002 return Operand == 1;
2003 // These intrinsics are commutative.
2004 case Intrinsic::vp_add:
2005 case Intrinsic::vp_mul:
2006 case Intrinsic::vp_and:
2007 case Intrinsic::vp_or:
2008 case Intrinsic::vp_xor:
2009 case Intrinsic::vp_fadd:
2010 case Intrinsic::vp_fmul:
2011 case Intrinsic::vp_icmp:
2012 case Intrinsic::vp_fcmp:
2013 // These intrinsics have 'vr' versions.
2014 case Intrinsic::vp_sub:
2015 case Intrinsic::vp_fsub:
2016 case Intrinsic::vp_fdiv:
2017 return Operand == 0 || Operand == 1;
2018 default:
2019 return false;
2020 }
2021}
2022
2023/// Check if sinking \p I's operands to I's basic block is profitable, because
2024/// the operands can be folded into a target instruction, e.g.
2025/// splats of scalars can fold into vector instructions.
2027 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
2028 using namespace llvm::PatternMatch;
2029
2030 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
2031 return false;
2032
2033 // Don't sink splat operands if the target prefers it. Some targets requires
2034 // S2V transfer buffers and we can run out of them copying the same value
2035 // repeatedly.
2036 // FIXME: It could still be worth doing if it would improve vector register
2037 // pressure and prevent a vector spill.
2038 if (!Subtarget.sinkSplatOperands())
2039 return false;
2040
2041 for (auto OpIdx : enumerate(I->operands())) {
2042 if (!canSplatOperand(I, OpIdx.index()))
2043 continue;
2044
2045 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
2046 // Make sure we are not already sinking this operand
2047 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
2048 continue;
2049
2050 // We are looking for a splat that can be sunk.
2052 m_Undef(), m_ZeroMask())))
2053 continue;
2054
2055 // Don't sink i1 splats.
2056 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
2057 continue;
2058
2059 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
2060 // and vector registers
2061 for (Use &U : Op->uses()) {
2062 Instruction *Insn = cast<Instruction>(U.getUser());
2063 if (!canSplatOperand(Insn, U.getOperandNo()))
2064 return false;
2065 }
2066
2067 Ops.push_back(&Op->getOperandUse(0));
2068 Ops.push_back(&OpIdx.value());
2069 }
2070 return true;
2071}
2072
2074 unsigned Opc = VecOp.getOpcode();
2075
2076 // Assume target opcodes can't be scalarized.
2077 // TODO - do we have any exceptions?
2078 if (Opc >= ISD::BUILTIN_OP_END)
2079 return false;
2080
2081 // If the vector op is not supported, try to convert to scalar.
2082 EVT VecVT = VecOp.getValueType();
2083 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2084 return true;
2085
2086 // If the vector op is supported, but the scalar op is not, the transform may
2087 // not be worthwhile.
2088 // Permit a vector binary operation can be converted to scalar binary
2089 // operation which is custom lowered with illegal type.
2090 EVT ScalarVT = VecVT.getScalarType();
2091 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2092 isOperationCustom(Opc, ScalarVT);
2093}
2094
2096 const GlobalAddressSDNode *GA) const {
2097 // In order to maximise the opportunity for common subexpression elimination,
2098 // keep a separate ADD node for the global address offset instead of folding
2099 // it in the global address node. Later peephole optimisations may choose to
2100 // fold it back in when profitable.
2101 return false;
2102}
2103
2104// Return one of the followings:
2105// (1) `{0-31 value, false}` if FLI is available for Imm's type and FP value.
2106// (2) `{0-31 value, true}` if Imm is negative and FLI is available for its
2107// positive counterpart, which will be materialized from the first returned
2108// element. The second returned element indicated that there should be a FNEG
2109// followed.
2110// (3) `{-1, _}` if there is no way FLI can be used to materialize Imm.
2111std::pair<int, bool> RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm,
2112 EVT VT) const {
2113 if (!Subtarget.hasStdExtZfa())
2114 return std::make_pair(-1, false);
2115
2116 bool IsSupportedVT = false;
2117 if (VT == MVT::f16) {
2118 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2119 } else if (VT == MVT::f32) {
2120 IsSupportedVT = true;
2121 } else if (VT == MVT::f64) {
2122 assert(Subtarget.hasStdExtD() && "Expect D extension");
2123 IsSupportedVT = true;
2124 }
2125
2126 if (!IsSupportedVT)
2127 return std::make_pair(-1, false);
2128
2130 if (Index < 0 && Imm.isNegative())
2131 // Try the combination of its positive counterpart + FNEG.
2132 return std::make_pair(RISCVLoadFPImm::getLoadFPImm(-Imm), true);
2133 else
2134 return std::make_pair(Index, false);
2135}
2136
2138 bool ForCodeSize) const {
2139 bool IsLegalVT = false;
2140 if (VT == MVT::f16)
2141 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2142 else if (VT == MVT::f32)
2143 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2144 else if (VT == MVT::f64)
2145 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2146 else if (VT == MVT::bf16)
2147 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2148
2149 if (!IsLegalVT)
2150 return false;
2151
2152 if (getLegalZfaFPImm(Imm, VT).first >= 0)
2153 return true;
2154
2155 // Cannot create a 64 bit floating-point immediate value for rv32.
2156 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2157 // td can handle +0.0 or -0.0 already.
2158 // -0.0 can be created by fmv + fneg.
2159 return Imm.isZero();
2160 }
2161
2162 // Special case: fmv + fneg
2163 if (Imm.isNegZero())
2164 return true;
2165
2166 // Building an integer and then converting requires a fmv at the end of
2167 // the integer sequence.
2168 const int Cost =
2169 1 + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(), Subtarget.getXLen(),
2170 Subtarget);
2171 return Cost <= FPImmCost;
2172}
2173
2174// TODO: This is very conservative.
2176 unsigned Index) const {
2178 return false;
2179
2180 // Only support extracting a fixed from a fixed vector for now.
2181 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2182 return false;
2183
2184 EVT EltVT = ResVT.getVectorElementType();
2185 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2186
2187 // The smallest type we can slide is i8.
2188 // TODO: We can extract index 0 from a mask vector without a slide.
2189 if (EltVT == MVT::i1)
2190 return false;
2191
2192 unsigned ResElts = ResVT.getVectorNumElements();
2193 unsigned SrcElts = SrcVT.getVectorNumElements();
2194
2195 unsigned MinVLen = Subtarget.getRealMinVLen();
2196 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2197
2198 // If we're extracting only data from the first VLEN bits of the source
2199 // then we can always do this with an m1 vslidedown.vx. Restricting the
2200 // Index ensures we can use a vslidedown.vi.
2201 // TODO: We can generalize this when the exact VLEN is known.
2202 if (Index + ResElts <= MinVLMAX && Index < 31)
2203 return true;
2204
2205 // Convervatively only handle extracting half of a vector.
2206 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2207 // a cheap extract. However, this case is important in practice for
2208 // shuffled extracts of longer vectors. How resolve?
2209 if ((ResElts * 2) != SrcElts)
2210 return false;
2211
2212 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2213 // cheap.
2214 if (Index >= 32)
2215 return false;
2216
2217 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2218 // the upper half of a vector until we have more test coverage.
2219 return Index == 0 || Index == ResElts;
2220}
2221
2224 EVT VT) const {
2225 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2226 // We might still end up using a GPR but that will be decided based on ABI.
2227 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2228 !Subtarget.hasStdExtZfhminOrZhinxmin())
2229 return MVT::f32;
2230
2232
2233 if (RV64LegalI32 && Subtarget.is64Bit() && PartVT == MVT::i32)
2234 return MVT::i64;
2235
2236 return PartVT;
2237}
2238
2241 EVT VT) const {
2242 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2243 // We might still end up using a GPR but that will be decided based on ABI.
2244 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2245 !Subtarget.hasStdExtZfhminOrZhinxmin())
2246 return 1;
2247
2249}
2250
2252 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2253 unsigned &NumIntermediates, MVT &RegisterVT) const {
2255 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2256
2257 if (RV64LegalI32 && Subtarget.is64Bit() && IntermediateVT == MVT::i32)
2258 IntermediateVT = MVT::i64;
2259
2260 if (RV64LegalI32 && Subtarget.is64Bit() && RegisterVT == MVT::i32)
2261 RegisterVT = MVT::i64;
2262
2263 return NumRegs;
2264}
2265
2266// Changes the condition code and swaps operands if necessary, so the SetCC
2267// operation matches one of the comparisons supported directly by branches
2268// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2269// with 1/-1.
2270static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2271 ISD::CondCode &CC, SelectionDAG &DAG) {
2272 // If this is a single bit test that can't be handled by ANDI, shift the
2273 // bit to be tested to the MSB and perform a signed compare with 0.
2274 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2275 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2276 isa<ConstantSDNode>(LHS.getOperand(1))) {
2277 uint64_t Mask = LHS.getConstantOperandVal(1);
2278 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2279 unsigned ShAmt = 0;
2280 if (isPowerOf2_64(Mask)) {
2282 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2283 } else {
2284 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2285 }
2286
2287 LHS = LHS.getOperand(0);
2288 if (ShAmt != 0)
2289 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2290 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2291 return;
2292 }
2293 }
2294
2295 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2296 int64_t C = RHSC->getSExtValue();
2297 switch (CC) {
2298 default: break;
2299 case ISD::SETGT:
2300 // Convert X > -1 to X >= 0.
2301 if (C == -1) {
2302 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2303 CC = ISD::SETGE;
2304 return;
2305 }
2306 break;
2307 case ISD::SETLT:
2308 // Convert X < 1 to 0 >= X.
2309 if (C == 1) {
2310 RHS = LHS;
2311 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2312 CC = ISD::SETGE;
2313 return;
2314 }
2315 break;
2316 }
2317 }
2318
2319 switch (CC) {
2320 default:
2321 break;
2322 case ISD::SETGT:
2323 case ISD::SETLE:
2324 case ISD::SETUGT:
2325 case ISD::SETULE:
2327 std::swap(LHS, RHS);
2328 break;
2329 }
2330}
2331
2333 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2334 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2335 if (VT.getVectorElementType() == MVT::i1)
2336 KnownSize *= 8;
2337
2338 switch (KnownSize) {
2339 default:
2340 llvm_unreachable("Invalid LMUL.");
2341 case 8:
2343 case 16:
2345 case 32:
2347 case 64:
2349 case 128:
2351 case 256:
2353 case 512:
2355 }
2356}
2357
2359 switch (LMul) {
2360 default:
2361 llvm_unreachable("Invalid LMUL.");
2366 return RISCV::VRRegClassID;
2368 return RISCV::VRM2RegClassID;
2370 return RISCV::VRM4RegClassID;
2372 return RISCV::VRM8RegClassID;
2373 }
2374}
2375
2377 RISCVII::VLMUL LMUL = getLMUL(VT);
2378 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2379 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2380 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2381 LMUL == RISCVII::VLMUL::LMUL_1) {
2382 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2383 "Unexpected subreg numbering");
2384 return RISCV::sub_vrm1_0 + Index;
2385 }
2386 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2387 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2388 "Unexpected subreg numbering");
2389 return RISCV::sub_vrm2_0 + Index;
2390 }
2391 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2392 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2393 "Unexpected subreg numbering");
2394 return RISCV::sub_vrm4_0 + Index;
2395 }
2396 llvm_unreachable("Invalid vector type.");
2397}
2398
2400 if (VT.getVectorElementType() == MVT::i1)
2401 return RISCV::VRRegClassID;
2402 return getRegClassIDForLMUL(getLMUL(VT));
2403}
2404
2405// Attempt to decompose a subvector insert/extract between VecVT and
2406// SubVecVT via subregister indices. Returns the subregister index that
2407// can perform the subvector insert/extract with the given element index, as
2408// well as the index corresponding to any leftover subvectors that must be
2409// further inserted/extracted within the register class for SubVecVT.
2410std::pair<unsigned, unsigned>
2412 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2413 const RISCVRegisterInfo *TRI) {
2414 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2415 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2416 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2417 "Register classes not ordered");
2418 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2419 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2420 // Try to compose a subregister index that takes us from the incoming
2421 // LMUL>1 register class down to the outgoing one. At each step we half
2422 // the LMUL:
2423 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2424 // Note that this is not guaranteed to find a subregister index, such as
2425 // when we are extracting from one VR type to another.
2426 unsigned SubRegIdx = RISCV::NoSubRegister;
2427 for (const unsigned RCID :
2428 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2429 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2430 VecVT = VecVT.getHalfNumVectorElementsVT();
2431 bool IsHi =
2432 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2433 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2434 getSubregIndexByMVT(VecVT, IsHi));
2435 if (IsHi)
2436 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2437 }
2438 return {SubRegIdx, InsertExtractIdx};
2439}
2440
2441// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2442// stores for those types.
2443bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2444 return !Subtarget.useRVVForFixedLengthVectors() ||
2445 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2446}
2447
2449 if (!ScalarTy.isSimple())
2450 return false;
2451 switch (ScalarTy.getSimpleVT().SimpleTy) {
2452 case MVT::iPTR:
2453 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2454 case MVT::i8:
2455 case MVT::i16:
2456 case MVT::i32:
2457 return true;
2458 case MVT::i64:
2459 return Subtarget.hasVInstructionsI64();
2460 case MVT::f16:
2461 return Subtarget.hasVInstructionsF16();
2462 case MVT::f32:
2463 return Subtarget.hasVInstructionsF32();
2464 case MVT::f64:
2465 return Subtarget.hasVInstructionsF64();
2466 default:
2467 return false;
2468 }
2469}
2470
2471
2472unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2473 return NumRepeatedDivisors;
2474}
2475
2477 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2478 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2479 "Unexpected opcode");
2480 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2481 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2483 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2484 if (!II)
2485 return SDValue();
2486 return Op.getOperand(II->VLOperand + 1 + HasChain);
2487}
2488
2490 const RISCVSubtarget &Subtarget) {
2491 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2492 if (!Subtarget.useRVVForFixedLengthVectors())
2493 return false;
2494
2495 // We only support a set of vector types with a consistent maximum fixed size
2496 // across all supported vector element types to avoid legalization issues.
2497 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2498 // fixed-length vector type we support is 1024 bytes.
2499 if (VT.getFixedSizeInBits() > 1024 * 8)
2500 return false;
2501
2502 unsigned MinVLen = Subtarget.getRealMinVLen();
2503
2504 MVT EltVT = VT.getVectorElementType();
2505
2506 // Don't use RVV for vectors we cannot scalarize if required.
2507 switch (EltVT.SimpleTy) {
2508 // i1 is supported but has different rules.
2509 default:
2510 return false;
2511 case MVT::i1:
2512 // Masks can only use a single register.
2513 if (VT.getVectorNumElements() > MinVLen)
2514 return false;
2515 MinVLen /= 8;
2516 break;
2517 case MVT::i8:
2518 case MVT::i16:
2519 case MVT::i32:
2520 break;
2521 case MVT::i64:
2522 if (!Subtarget.hasVInstructionsI64())
2523 return false;
2524 break;
2525 case MVT::f16:
2526 if (!Subtarget.hasVInstructionsF16Minimal())
2527 return false;
2528 break;
2529 case MVT::f32:
2530 if (!Subtarget.hasVInstructionsF32())
2531 return false;
2532 break;
2533 case MVT::f64:
2534 if (!Subtarget.hasVInstructionsF64())
2535 return false;
2536 break;
2537 }
2538
2539 // Reject elements larger than ELEN.
2540 if (EltVT.getSizeInBits() > Subtarget.getELen())
2541 return false;
2542
2543 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2544 // Don't use RVV for types that don't fit.
2545 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2546 return false;
2547
2548 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2549 // the base fixed length RVV support in place.
2550 if (!VT.isPow2VectorType())
2551 return false;
2552
2553 return true;
2554}
2555
2556bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2557 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2558}
2559
2560// Return the largest legal scalable vector type that matches VT's element type.
2562 const RISCVSubtarget &Subtarget) {
2563 // This may be called before legal types are setup.
2564 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2565 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2566 "Expected legal fixed length vector!");
2567
2568 unsigned MinVLen = Subtarget.getRealMinVLen();
2569 unsigned MaxELen = Subtarget.getELen();
2570
2571 MVT EltVT = VT.getVectorElementType();
2572 switch (EltVT.SimpleTy) {
2573 default:
2574 llvm_unreachable("unexpected element type for RVV container");
2575 case MVT::i1:
2576 case MVT::i8:
2577 case MVT::i16:
2578 case MVT::i32:
2579 case MVT::i64:
2580 case MVT::f16:
2581 case MVT::f32:
2582 case MVT::f64: {
2583 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2584 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2585 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2586 unsigned NumElts =
2588 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2589 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2590 return MVT::getScalableVectorVT(EltVT, NumElts);
2591 }
2592 }
2593}
2594
2596 const RISCVSubtarget &Subtarget) {
2598 Subtarget);
2599}
2600
2602 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2603}
2604
2605// Grow V to consume an entire RVV register.
2607 const RISCVSubtarget &Subtarget) {
2608 assert(VT.isScalableVector() &&
2609 "Expected to convert into a scalable vector!");
2610 assert(V.getValueType().isFixedLengthVector() &&
2611 "Expected a fixed length vector operand!");
2612 SDLoc DL(V);
2613 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2614 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2615}
2616
2617// Shrink V so it's just big enough to maintain a VT's worth of data.
2619 const RISCVSubtarget &Subtarget) {
2621 "Expected to convert into a fixed length vector!");
2622 assert(V.getValueType().isScalableVector() &&
2623 "Expected a scalable vector operand!");
2624 SDLoc DL(V);
2625 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2626 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2627}
2628
2629/// Return the type of the mask type suitable for masking the provided
2630/// vector type. This is simply an i1 element type vector of the same
2631/// (possibly scalable) length.
2632static MVT getMaskTypeFor(MVT VecVT) {
2633 assert(VecVT.isVector());
2635 return MVT::getVectorVT(MVT::i1, EC);
2636}
2637
2638/// Creates an all ones mask suitable for masking a vector of type VecTy with
2639/// vector length VL. .
2640static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2641 SelectionDAG &DAG) {
2642 MVT MaskVT = getMaskTypeFor(VecVT);
2643 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2644}
2645
2646static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2647 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2648 // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
2649 // canonicalize the representation. InsertVSETVLI will pick the immediate
2650 // encoding later if profitable.
2651 const auto [MinVLMAX, MaxVLMAX] =
2652 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
2653 if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
2654 return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2655
2656 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2657}
2658
2659static std::pair<SDValue, SDValue>
2661 const RISCVSubtarget &Subtarget) {
2662 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2663 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2664 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2665 return {Mask, VL};
2666}
2667
2668static std::pair<SDValue, SDValue>
2669getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2670 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2671 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2672 SDValue VL = getVLOp(NumElts, ContainerVT, DL, DAG, Subtarget);
2673 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2674 return {Mask, VL};
2675}
2676
2677// Gets the two common "VL" operands: an all-ones mask and the vector length.
2678// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2679// the vector type that the fixed-length vector is contained in. Otherwise if
2680// VecVT is scalable, then ContainerVT should be the same as VecVT.
2681static std::pair<SDValue, SDValue>
2682getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2683 const RISCVSubtarget &Subtarget) {
2684 if (VecVT.isFixedLengthVector())
2685 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2686 Subtarget);
2687 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2688 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2689}
2690
2692 SelectionDAG &DAG) const {
2693 assert(VecVT.isScalableVector() && "Expected scalable vector");
2694 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2695 VecVT.getVectorElementCount());
2696}
2697
2698std::pair<unsigned, unsigned>
2700 const RISCVSubtarget &Subtarget) {
2701 assert(VecVT.isScalableVector() && "Expected scalable vector");
2702
2703 unsigned EltSize = VecVT.getScalarSizeInBits();
2704 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2705
2706 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2707 unsigned MaxVLMAX =
2708 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2709
2710 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2711 unsigned MinVLMAX =
2712 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2713
2714 return std::make_pair(MinVLMAX, MaxVLMAX);
2715}
2716
2717// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2718// of either is (currently) supported. This can get us into an infinite loop
2719// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2720// as a ..., etc.
2721// Until either (or both) of these can reliably lower any node, reporting that
2722// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2723// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2724// which is not desirable.
2726 EVT VT, unsigned DefinedValues) const {
2727 return false;
2728}
2729
2731 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2732 // implementation-defined.
2733 if (!VT.isVector())
2735 unsigned DLenFactor = Subtarget.getDLenFactor();
2736 unsigned Cost;
2737 if (VT.isScalableVector()) {
2738 unsigned LMul;
2739 bool Fractional;
2740 std::tie(LMul, Fractional) =
2742 if (Fractional)
2743 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2744 else
2745 Cost = (LMul * DLenFactor);
2746 } else {
2747 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2748 }
2749 return Cost;
2750}
2751
2752
2753/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2754/// is generally quadratic in the number of vreg implied by LMUL. Note that
2755/// operand (index and possibly mask) are handled separately.
2757 return getLMULCost(VT) * getLMULCost(VT);
2758}
2759
2760/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2761/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2762/// or may track the vrgather.vv cost. It is implementation-dependent.
2764 return getLMULCost(VT);
2765}
2766
2767/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2768/// for the type VT. (This does not cover the vslide1up or vslide1down
2769/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2770/// or may track the vrgather.vv cost. It is implementation-dependent.
2772 return getLMULCost(VT);
2773}
2774
2775/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2776/// for the type VT. (This does not cover the vslide1up or vslide1down
2777/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2778/// or may track the vrgather.vv cost. It is implementation-dependent.
2780 return getLMULCost(VT);
2781}
2782
2784 const RISCVSubtarget &Subtarget) {
2785 // RISC-V FP-to-int conversions saturate to the destination register size, but
2786 // don't produce 0 for nan. We can use a conversion instruction and fix the
2787 // nan case with a compare and a select.
2788 SDValue Src = Op.getOperand(0);
2789
2790 MVT DstVT = Op.getSimpleValueType();
2791 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2792
2793 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2794
2795 if (!DstVT.isVector()) {
2796 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2797 // the result.
2798 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2799 Src.getValueType() == MVT::bf16) {
2800 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2801 }
2802
2803 unsigned Opc;
2804 if (SatVT == DstVT)
2805 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2806 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2808 else
2809 return SDValue();
2810 // FIXME: Support other SatVTs by clamping before or after the conversion.
2811
2812 SDLoc DL(Op);
2813 SDValue FpToInt = DAG.getNode(
2814 Opc, DL, DstVT, Src,
2816
2817 if (Opc == RISCVISD::FCVT_WU_RV64)
2818 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2819
2820 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2821 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2823 }
2824
2825 // Vectors.
2826
2827 MVT DstEltVT = DstVT.getVectorElementType();
2828 MVT SrcVT = Src.getSimpleValueType();
2829 MVT SrcEltVT = SrcVT.getVectorElementType();
2830 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2831 unsigned DstEltSize = DstEltVT.getSizeInBits();
2832
2833 // Only handle saturating to the destination type.
2834 if (SatVT != DstEltVT)
2835 return SDValue();
2836
2837 // FIXME: Don't support narrowing by more than 1 steps for now.
2838 if (SrcEltSize > (2 * DstEltSize))
2839 return SDValue();
2840
2841 MVT DstContainerVT = DstVT;
2842 MVT SrcContainerVT = SrcVT;
2843 if (DstVT.isFixedLengthVector()) {
2844 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2845 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2846 assert(DstContainerVT.getVectorElementCount() ==
2847 SrcContainerVT.getVectorElementCount() &&
2848 "Expected same element count");
2849 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2850 }
2851
2852 SDLoc DL(Op);
2853
2854 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2855
2856 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2857 {Src, Src, DAG.getCondCode(ISD::SETNE),
2858 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2859
2860 // Need to widen by more than 1 step, promote the FP type, then do a widening
2861 // convert.
2862 if (DstEltSize > (2 * SrcEltSize)) {
2863 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2864 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2865 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2866 }
2867
2868 unsigned RVVOpc =
2870 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2871
2872 SDValue SplatZero = DAG.getNode(
2873 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2874 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2875 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
2876 Res, DAG.getUNDEF(DstContainerVT), VL);
2877
2878 if (DstVT.isFixedLengthVector())
2879 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2880
2881 return Res;
2882}
2883
2885 switch (Opc) {
2886 case ISD::FROUNDEVEN:
2888 case ISD::VP_FROUNDEVEN:
2889 return RISCVFPRndMode::RNE;
2890 case ISD::FTRUNC:
2891 case ISD::STRICT_FTRUNC:
2892 case ISD::VP_FROUNDTOZERO:
2893 return RISCVFPRndMode::RTZ;
2894 case ISD::FFLOOR:
2895 case ISD::STRICT_FFLOOR:
2896 case ISD::VP_FFLOOR:
2897 return RISCVFPRndMode::RDN;
2898 case ISD::FCEIL:
2899 case ISD::STRICT_FCEIL:
2900 case ISD::VP_FCEIL:
2901 return RISCVFPRndMode::RUP;
2902 case ISD::FROUND:
2903 case ISD::STRICT_FROUND:
2904 case ISD::VP_FROUND:
2905 return RISCVFPRndMode::RMM;
2906 case ISD::FRINT:
2907 return RISCVFPRndMode::DYN;
2908 }
2909
2911}
2912
2913// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2914// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2915// the integer domain and back. Taking care to avoid converting values that are
2916// nan or already correct.
2917static SDValue
2919 const RISCVSubtarget &Subtarget) {
2920 MVT VT = Op.getSimpleValueType();
2921 assert(VT.isVector() && "Unexpected type");
2922
2923 SDLoc DL(Op);
2924
2925 SDValue Src = Op.getOperand(0);
2926
2927 MVT ContainerVT = VT;
2928 if (VT.isFixedLengthVector()) {
2929 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2930 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2931 }
2932
2933 SDValue Mask, VL;
2934 if (Op->isVPOpcode()) {
2935 Mask = Op.getOperand(1);
2936 if (VT.isFixedLengthVector())
2937 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2938 Subtarget);
2939 VL = Op.getOperand(2);
2940 } else {
2941 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2942 }
2943
2944 // Freeze the source since we are increasing the number of uses.
2945 Src = DAG.getFreeze(Src);
2946
2947 // We do the conversion on the absolute value and fix the sign at the end.
2948 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2949
2950 // Determine the largest integer that can be represented exactly. This and
2951 // values larger than it don't have any fractional bits so don't need to
2952 // be converted.
2953 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2954 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2955 APFloat MaxVal = APFloat(FltSem);
2956 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2957 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2958 SDValue MaxValNode =
2959 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2960 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2961 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2962
2963 // If abs(Src) was larger than MaxVal or nan, keep it.
2964 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2965 Mask =
2966 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2967 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2968 Mask, Mask, VL});
2969
2970 // Truncate to integer and convert back to FP.
2971 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2972 MVT XLenVT = Subtarget.getXLenVT();
2973 SDValue Truncated;
2974
2975 switch (Op.getOpcode()) {
2976 default:
2977 llvm_unreachable("Unexpected opcode");
2978 case ISD::FCEIL:
2979 case ISD::VP_FCEIL:
2980 case ISD::FFLOOR:
2981 case ISD::VP_FFLOOR:
2982 case ISD::FROUND:
2983 case ISD::FROUNDEVEN:
2984 case ISD::VP_FROUND:
2985 case ISD::VP_FROUNDEVEN:
2986 case ISD::VP_FROUNDTOZERO: {
2989 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2990 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2991 break;
2992 }
2993 case ISD::FTRUNC:
2994 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2995 Mask, VL);
2996 break;
2997 case ISD::FRINT:
2998 case ISD::VP_FRINT:
2999 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
3000 break;
3001 case ISD::FNEARBYINT:
3002 case ISD::VP_FNEARBYINT:
3003 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3004 Mask, VL);
3005 break;
3006 }
3007
3008 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3009 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3010 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3011 Mask, VL);
3012
3013 // Restore the original sign so that -0.0 is preserved.
3014 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3015 Src, Src, Mask, VL);
3016
3017 if (!VT.isFixedLengthVector())
3018 return Truncated;
3019
3020 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3021}
3022
3023// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3024// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3025// qNan and coverting the new source to integer and back to FP.
3026static SDValue
3028 const RISCVSubtarget &Subtarget) {
3029 SDLoc DL(Op);
3030 MVT VT = Op.getSimpleValueType();
3031 SDValue Chain = Op.getOperand(0);
3032 SDValue Src = Op.getOperand(1);
3033
3034 MVT ContainerVT = VT;
3035 if (VT.isFixedLengthVector()) {
3036 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3037 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3038 }
3039
3040 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3041
3042 // Freeze the source since we are increasing the number of uses.
3043 Src = DAG.getFreeze(Src);
3044
3045 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3046 MVT MaskVT = Mask.getSimpleValueType();
3048 DAG.getVTList(MaskVT, MVT::Other),
3049 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3050 DAG.getUNDEF(MaskVT), Mask, VL});
3051 Chain = Unorder.getValue(1);
3053 DAG.getVTList(ContainerVT, MVT::Other),
3054 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
3055 Chain = Src.getValue(1);
3056
3057 // We do the conversion on the absolute value and fix the sign at the end.
3058 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3059
3060 // Determine the largest integer that can be represented exactly. This and
3061 // values larger than it don't have any fractional bits so don't need to
3062 // be converted.
3063 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
3064 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3065 APFloat MaxVal = APFloat(FltSem);
3066 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3067 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3068 SDValue MaxValNode =
3069 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3070 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3071 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3072
3073 // If abs(Src) was larger than MaxVal or nan, keep it.
3074 Mask = DAG.getNode(
3075 RISCVISD::SETCC_VL, DL, MaskVT,
3076 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3077
3078 // Truncate to integer and convert back to FP.
3079 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3080 MVT XLenVT = Subtarget.getXLenVT();
3081 SDValue Truncated;
3082
3083 switch (Op.getOpcode()) {
3084 default:
3085 llvm_unreachable("Unexpected opcode");
3086 case ISD::STRICT_FCEIL:
3087 case ISD::STRICT_FFLOOR:
3088 case ISD::STRICT_FROUND:
3092 Truncated = DAG.getNode(
3093 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3094 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3095 break;
3096 }
3097 case ISD::STRICT_FTRUNC:
3098 Truncated =
3100 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3101 break;
3104 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3105 Mask, VL);
3106 break;
3107 }
3108 Chain = Truncated.getValue(1);
3109
3110 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3111 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3112 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3113 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3114 Truncated, Mask, VL);
3115 Chain = Truncated.getValue(1);
3116 }
3117
3118 // Restore the original sign so that -0.0 is preserved.
3119 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3120 Src, Src, Mask, VL);
3121
3122 if (VT.isFixedLengthVector())
3123 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3124 return DAG.getMergeValues({Truncated, Chain}, DL);
3125}
3126
3127static SDValue
3129 const RISCVSubtarget &Subtarget) {
3130 MVT VT = Op.getSimpleValueType();
3131 if (VT.isVector())
3132 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3133
3134 if (DAG.shouldOptForSize())
3135 return SDValue();
3136
3137 SDLoc DL(Op);
3138 SDValue Src = Op.getOperand(0);
3139
3140 // Create an integer the size of the mantissa with the MSB set. This and all
3141 // values larger than it don't have any fractional bits so don't need to be
3142 // converted.
3143 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
3144 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3145 APFloat MaxVal = APFloat(FltSem);
3146 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3147 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3148 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3149
3151 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3152 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3153}
3154
3155// Expand vector LRINT and LLRINT by converting to the integer domain.
3157 const RISCVSubtarget &Subtarget) {
3158 MVT VT = Op.getSimpleValueType();
3159 assert(VT.isVector() && "Unexpected type");
3160
3161 SDLoc DL(Op);
3162 SDValue Src = Op.getOperand(0);
3163 MVT ContainerVT = VT;
3164
3165 if (VT.isFixedLengthVector()) {
3166 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3167 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3168 }
3169
3170 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3171 SDValue Truncated =
3172 DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, ContainerVT, Src, Mask, VL);
3173
3174 if (!VT.isFixedLengthVector())
3175 return Truncated;
3176
3177 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3178}
3179
3180static SDValue
3182 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
3183 SDValue Offset, SDValue Mask, SDValue VL,
3185 if (Merge.isUndef())
3187 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3188 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3189 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3190}
3191
3192static SDValue
3193getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3195 SDValue VL,
3197 if (Merge.isUndef())
3199 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3200 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
3201 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3202}
3203
3204static MVT getLMUL1VT(MVT VT) {
3206 "Unexpected vector MVT");
3210}
3211
3215 int64_t Addend;
3216};
3217
3218static std::optional<uint64_t> getExactInteger(const APFloat &APF,
3220 APSInt ValInt(BitWidth, !APF.isNegative());
3221 // We use an arbitrary rounding mode here. If a floating-point is an exact
3222 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3223 // the rounding mode changes the output value, then it is not an exact
3224 // integer.
3226 bool IsExact;
3227 // If it is out of signed integer range, it will return an invalid operation.
3228 // If it is not an exact integer, IsExact is false.
3229 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3231 !IsExact)
3232 return std::nullopt;
3233 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
3234}
3235
3236// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3237// to the (non-zero) step S and start value X. This can be then lowered as the
3238// RVV sequence (VID * S) + X, for example.
3239// The step S is represented as an integer numerator divided by a positive
3240// denominator. Note that the implementation currently only identifies
3241// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3242// cannot detect 2/3, for example.
3243// Note that this method will also match potentially unappealing index
3244// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3245// determine whether this is worth generating code for.
3246static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3247 unsigned EltSizeInBits) {
3248 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3249 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3250 return std::nullopt;
3251 bool IsInteger = Op.getValueType().isInteger();
3252
3253 std::optional<unsigned> SeqStepDenom;
3254 std::optional<int64_t> SeqStepNum, SeqAddend;
3255 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
3256 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3257
3258 // First extract the ops into a list of constant integer values. This may not
3259 // be possible for floats if they're not all representable as integers.
3261 const unsigned OpSize = Op.getScalarValueSizeInBits();
3262 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3263 if (Elt.isUndef()) {
3264 Elts[Idx] = std::nullopt;
3265 continue;
3266 }
3267 if (IsInteger) {
3268 Elts[Idx] = Elt->getAsZExtVal() & maskTrailingOnes<uint64_t>(OpSize);
3269 } else {
3270 auto ExactInteger =
3271 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3272 if (!ExactInteger)
3273 return std::nullopt;
3274 Elts[Idx] = *ExactInteger;
3275 }
3276 }
3277
3278 for (auto [Idx, Elt] : enumerate(Elts)) {
3279 // Assume undef elements match the sequence; we just have to be careful
3280 // when interpolating across them.
3281 if (!Elt)
3282 continue;
3283
3284 if (PrevElt) {
3285 // Calculate the step since the last non-undef element, and ensure
3286 // it's consistent across the entire sequence.
3287 unsigned IdxDiff = Idx - PrevElt->second;
3288 int64_t ValDiff = SignExtend64(*Elt - PrevElt->first, EltSizeInBits);
3289
3290 // A zero-value value difference means that we're somewhere in the middle
3291 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3292 // step change before evaluating the sequence.
3293 if (ValDiff == 0)
3294 continue;
3295
3296 int64_t Remainder = ValDiff % IdxDiff;
3297 // Normalize the step if it's greater than 1.
3298 if (Remainder != ValDiff) {
3299 // The difference must cleanly divide the element span.
3300 if (Remainder != 0)
3301 return std::nullopt;
3302 ValDiff /= IdxDiff;
3303 IdxDiff = 1;
3304 }
3305
3306 if (!SeqStepNum)
3307 SeqStepNum = ValDiff;
3308 else if (ValDiff != SeqStepNum)
3309 return std::nullopt;
3310
3311 if (!SeqStepDenom)
3312 SeqStepDenom = IdxDiff;
3313 else if (IdxDiff != *SeqStepDenom)
3314 return std::nullopt;
3315 }
3316
3317 // Record this non-undef element for later.
3318 if (!PrevElt || PrevElt->first != *Elt)
3319 PrevElt = std::make_pair(*Elt, Idx);
3320 }
3321
3322 // We need to have logged a step for this to count as a legal index sequence.
3323 if (!SeqStepNum || !SeqStepDenom)
3324 return std::nullopt;
3325
3326 // Loop back through the sequence and validate elements we might have skipped
3327 // while waiting for a valid step. While doing this, log any sequence addend.
3328 for (auto [Idx, Elt] : enumerate(Elts)) {
3329 if (!Elt)
3330 continue;
3331 uint64_t ExpectedVal =
3332 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3333 int64_t Addend = SignExtend64(*Elt - ExpectedVal, EltSizeInBits);
3334 if (!SeqAddend)
3335 SeqAddend = Addend;
3336 else if (Addend != SeqAddend)
3337 return std::nullopt;
3338 }
3339
3340 assert(SeqAddend && "Must have an addend if we have a step");
3341
3342 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3343}
3344
3345// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3346// and lower it as a VRGATHER_VX_VL from the source vector.
3347static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3348 SelectionDAG &DAG,
3349 const RISCVSubtarget &Subtarget) {
3350 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3351 return SDValue();
3352 SDValue Vec = SplatVal.getOperand(0);
3353 // Only perform this optimization on vectors of the same size for simplicity.
3354 // Don't perform this optimization for i1 vectors.
3355 // FIXME: Support i1 vectors, maybe by promoting to i8?
3356 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3357 return SDValue();
3358 SDValue Idx = SplatVal.getOperand(1);
3359 // The index must be a legal type.
3360 if (Idx.getValueType() != Subtarget.getXLenVT())
3361 return SDValue();
3362
3363 MVT ContainerVT = VT;
3364 if (VT.isFixedLengthVector()) {
3365 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3366 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3367 }
3368
3369 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3370
3371 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3372 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3373
3374 if (!VT.isFixedLengthVector())
3375 return Gather;
3376
3377 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3378}
3379
3380
3381/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3382/// which constitute a large proportion of the elements. In such cases we can
3383/// splat a vector with the dominant element and make up the shortfall with
3384/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3385/// Note that this includes vectors of 2 elements by association. The
3386/// upper-most element is the "dominant" one, allowing us to use a splat to
3387/// "insert" the upper element, and an insert of the lower element at position
3388/// 0, which improves codegen.
3390 const RISCVSubtarget &Subtarget) {
3391 MVT VT = Op.getSimpleValueType();
3392 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3393
3394 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3395
3396 SDLoc DL(Op);
3397 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3398
3399 MVT XLenVT = Subtarget.getXLenVT();
3400 unsigned NumElts = Op.getNumOperands();
3401
3402 SDValue DominantValue;
3403 unsigned MostCommonCount = 0;
3404 DenseMap<SDValue, unsigned> ValueCounts;
3405 unsigned NumUndefElts =
3406 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3407
3408 // Track the number of scalar loads we know we'd be inserting, estimated as
3409 // any non-zero floating-point constant. Other kinds of element are either
3410 // already in registers or are materialized on demand. The threshold at which
3411 // a vector load is more desirable than several scalar materializion and
3412 // vector-insertion instructions is not known.
3413 unsigned NumScalarLoads = 0;
3414
3415 for (SDValue V : Op->op_values()) {
3416 if (V.isUndef())
3417 continue;
3418
3419 ValueCounts.insert(std::make_pair(V, 0));
3420 unsigned &Count = ValueCounts[V];
3421 if (0 == Count)
3422 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3423 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3424
3425 // Is this value dominant? In case of a tie, prefer the highest element as
3426 // it's cheaper to insert near the beginning of a vector than it is at the
3427 // end.
3428 if (++Count >= MostCommonCount) {
3429 DominantValue = V;
3430 MostCommonCount = Count;
3431 }
3432 }
3433
3434 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3435 unsigned NumDefElts = NumElts - NumUndefElts;
3436 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3437
3438 // Don't perform this optimization when optimizing for size, since
3439 // materializing elements and inserting them tends to cause code bloat.
3440 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3441 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3442 ((MostCommonCount > DominantValueCountThreshold) ||
3443 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3444 // Start by splatting the most common element.
3445 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3446
3447 DenseSet<SDValue> Processed{DominantValue};
3448
3449 // We can handle an insert into the last element (of a splat) via
3450 // v(f)slide1down. This is slightly better than the vslideup insert
3451 // lowering as it avoids the need for a vector group temporary. It
3452 // is also better than using vmerge.vx as it avoids the need to
3453 // materialize the mask in a vector register.
3454 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3455 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3456 LastOp != DominantValue) {
3457 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3458 auto OpCode =
3460 if (!VT.isFloatingPoint())
3461 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3462 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3463 LastOp, Mask, VL);
3464 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3465 Processed.insert(LastOp);
3466 }
3467
3468 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3469 for (const auto &OpIdx : enumerate(Op->ops())) {
3470 const SDValue &V = OpIdx.value();
3471 if (V.isUndef() || !Processed.insert(V).second)
3472 continue;
3473 if (ValueCounts[V] == 1) {
3474 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3475 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3476 } else {
3477 // Blend in all instances of this value using a VSELECT, using a
3478 // mask where each bit signals whether that element is the one
3479 // we're after.
3481 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3482 return DAG.getConstant(V == V1, DL, XLenVT);
3483 });
3484 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3485 DAG.getBuildVector(SelMaskTy, DL, Ops),
3486 DAG.getSplatBuildVector(VT, DL, V), Vec);
3487 }
3488 }
3489
3490 return Vec;
3491 }
3492
3493 return SDValue();
3494}
3495
3497 const RISCVSubtarget &Subtarget) {
3498 MVT VT = Op.getSimpleValueType();
3499 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3500
3501 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3502
3503 SDLoc DL(Op);
3504 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3505
3506 MVT XLenVT = Subtarget.getXLenVT();
3507 unsigned NumElts = Op.getNumOperands();
3508
3509 if (VT.getVectorElementType() == MVT::i1) {
3510 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3511 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3512 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3513 }
3514
3515 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3516 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3517 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3518 }
3519
3520 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3521 // scalar integer chunks whose bit-width depends on the number of mask
3522 // bits and XLEN.
3523 // First, determine the most appropriate scalar integer type to use. This
3524 // is at most XLenVT, but may be shrunk to a smaller vector element type
3525 // according to the size of the final vector - use i8 chunks rather than
3526 // XLenVT if we're producing a v8i1. This results in more consistent
3527 // codegen across RV32 and RV64.
3528 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3529 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3530 // If we have to use more than one INSERT_VECTOR_ELT then this
3531 // optimization is likely to increase code size; avoid peforming it in
3532 // such a case. We can use a load from a constant pool in this case.
3533 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3534 return SDValue();
3535 // Now we can create our integer vector type. Note that it may be larger
3536 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3537 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3538 MVT IntegerViaVecVT =
3539 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3540 IntegerViaVecElts);
3541
3542 uint64_t Bits = 0;
3543 unsigned BitPos = 0, IntegerEltIdx = 0;
3544 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3545
3546 for (unsigned I = 0; I < NumElts;) {
3547 SDValue V = Op.getOperand(I);
3548 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3549 Bits |= ((uint64_t)BitValue << BitPos);
3550 ++BitPos;
3551 ++I;
3552
3553 // Once we accumulate enough bits to fill our scalar type or process the
3554 // last element, insert into our vector and clear our accumulated data.
3555 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3556 if (NumViaIntegerBits <= 32)
3557 Bits = SignExtend64<32>(Bits);
3558 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3559 Elts[IntegerEltIdx] = Elt;
3560 Bits = 0;
3561 BitPos = 0;
3562 IntegerEltIdx++;
3563 }
3564 }
3565
3566 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3567
3568 if (NumElts < NumViaIntegerBits) {
3569 // If we're producing a smaller vector than our minimum legal integer
3570 // type, bitcast to the equivalent (known-legal) mask type, and extract
3571 // our final mask.
3572 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3573 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3574 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3575 DAG.getConstant(0, DL, XLenVT));
3576 } else {
3577 // Else we must have produced an integer type with the same size as the
3578 // mask type; bitcast for the final result.
3579 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3580 Vec = DAG.getBitcast(VT, Vec);
3581 }
3582
3583 return Vec;
3584 }
3585
3586 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3587 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3589 if (!VT.isFloatingPoint())
3590 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3591 Splat =
3592 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3593 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3594 }
3595
3596 // Try and match index sequences, which we can lower to the vid instruction
3597 // with optional modifications. An all-undef vector is matched by
3598 // getSplatValue, above.
3599 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3600 int64_t StepNumerator = SimpleVID->StepNumerator;
3601 unsigned StepDenominator = SimpleVID->StepDenominator;
3602 int64_t Addend = SimpleVID->Addend;
3603
3604 assert(StepNumerator != 0 && "Invalid step");
3605 bool Negate = false;
3606 int64_t SplatStepVal = StepNumerator;
3607 unsigned StepOpcode = ISD::MUL;
3608 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3609 // anyway as the shift of 63 won't fit in uimm5.
3610 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3611 isPowerOf2_64(std::abs(StepNumerator))) {
3612 Negate = StepNumerator < 0;
3613 StepOpcode = ISD::SHL;
3614 SplatStepVal = Log2_64(std::abs(StepNumerator));
3615 }
3616
3617 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3618 // threshold since it's the immediate value many RVV instructions accept.
3619 // There is no vmul.vi instruction so ensure multiply constant can fit in
3620 // a single addi instruction.
3621 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3622 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3623 isPowerOf2_32(StepDenominator) &&
3624 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3625 MVT VIDVT =
3627 MVT VIDContainerVT =
3628 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3629 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3630 // Convert right out of the scalable type so we can use standard ISD
3631 // nodes for the rest of the computation. If we used scalable types with
3632 // these, we'd lose the fixed-length vector info and generate worse
3633 // vsetvli code.
3634 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3635 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3636 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3637 SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
3638 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3639 }
3640 if (StepDenominator != 1) {
3641 SDValue SplatStep =
3642 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3643 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3644 }
3645 if (Addend != 0 || Negate) {
3646 SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
3647 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3648 VID);
3649 }
3650 if (VT.isFloatingPoint()) {
3651 // TODO: Use vfwcvt to reduce register pressure.
3652 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3653 }
3654 return VID;
3655 }
3656 }
3657
3658 // For very small build_vectors, use a single scalar insert of a constant.
3659 // TODO: Base this on constant rematerialization cost, not size.
3660 const unsigned EltBitSize = VT.getScalarSizeInBits();
3661 if (VT.getSizeInBits() <= 32 &&
3663 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3664 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3665 "Unexpected sequence type");
3666 // If we can use the original VL with the modified element type, this
3667 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3668 // be moved into InsertVSETVLI?
3669 unsigned ViaVecLen =
3670 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3671 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3672
3673 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3674 uint64_t SplatValue = 0;
3675 // Construct the amalgamated value at this larger vector type.
3676 for (const auto &OpIdx : enumerate(Op->op_values())) {
3677 const auto &SeqV = OpIdx.value();
3678 if (!SeqV.isUndef())
3679 SplatValue |=
3680 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3681 }
3682
3683 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3684 // achieve better constant materializion.
3685 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3686 SplatValue = SignExtend64<32>(SplatValue);
3687
3688 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3689 DAG.getUNDEF(ViaVecVT),
3690 DAG.getConstant(SplatValue, DL, XLenVT),
3691 DAG.getConstant(0, DL, XLenVT));
3692 if (ViaVecLen != 1)
3694 MVT::getVectorVT(ViaIntVT, 1), Vec,
3695 DAG.getConstant(0, DL, XLenVT));
3696 return DAG.getBitcast(VT, Vec);
3697 }
3698
3699
3700 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3701 // when re-interpreted as a vector with a larger element type. For example,
3702 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3703 // could be instead splat as
3704 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3705 // TODO: This optimization could also work on non-constant splats, but it
3706 // would require bit-manipulation instructions to construct the splat value.
3707 SmallVector<SDValue> Sequence;
3708 const auto *BV = cast<BuildVectorSDNode>(Op);
3709 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3711 BV->getRepeatedSequence(Sequence) &&
3712 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3713 unsigned SeqLen = Sequence.size();
3714 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3715 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3716 ViaIntVT == MVT::i64) &&
3717 "Unexpected sequence type");
3718
3719 // If we can use the original VL with the modified element type, this
3720 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3721 // be moved into InsertVSETVLI?
3722 const unsigned RequiredVL = NumElts / SeqLen;
3723 const unsigned ViaVecLen =
3724 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3725 NumElts : RequiredVL;
3726 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3727
3728 unsigned EltIdx = 0;
3729 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3730 uint64_t SplatValue = 0;
3731 // Construct the amalgamated value which can be splatted as this larger
3732 // vector type.
3733 for (const auto &SeqV : Sequence) {
3734 if (!SeqV.isUndef())
3735 SplatValue |=
3736 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3737 EltIdx++;
3738 }
3739
3740 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3741 // achieve better constant materializion.
3742 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3743 SplatValue = SignExtend64<32>(SplatValue);
3744
3745 // Since we can't introduce illegal i64 types at this stage, we can only
3746 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3747 // way we can use RVV instructions to splat.
3748 assert((ViaIntVT.bitsLE(XLenVT) ||
3749 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3750 "Unexpected bitcast sequence");
3751 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3752 SDValue ViaVL =
3753 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3754 MVT ViaContainerVT =
3755 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3756 SDValue Splat =
3757 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3758 DAG.getUNDEF(ViaContainerVT),
3759 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3760 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3761 if (ViaVecLen != RequiredVL)
3763 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3764 DAG.getConstant(0, DL, XLenVT));
3765 return DAG.getBitcast(VT, Splat);
3766 }
3767 }
3768
3769 // If the number of signbits allows, see if we can lower as a <N x i8>.
3770 // Our main goal here is to reduce LMUL (and thus work) required to
3771 // build the constant, but we will also narrow if the resulting
3772 // narrow vector is known to materialize cheaply.
3773 // TODO: We really should be costing the smaller vector. There are
3774 // profitable cases this misses.
3775 if (EltBitSize > 8 && VT.isInteger() &&
3776 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen())) {
3777 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3778 if (EltBitSize - SignBits < 8) {
3779 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3780 DL, Op->ops());
3781 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3782 Source, DAG, Subtarget);
3783 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3784 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3785 }
3786 }
3787
3788 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3789 return Res;
3790
3791 // For constant vectors, use generic constant pool lowering. Otherwise,
3792 // we'd have to materialize constants in GPRs just to move them into the
3793 // vector.
3794 return SDValue();
3795}
3796
3798 const RISCVSubtarget &Subtarget) {
3799 MVT VT = Op.getSimpleValueType();
3800 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3801
3802 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3804 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3805
3806 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3807
3808 SDLoc DL(Op);
3809 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3810
3811 MVT XLenVT = Subtarget.getXLenVT();
3812
3813 if (VT.getVectorElementType() == MVT::i1) {
3814 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3815 // vector type, we have a legal equivalently-sized i8 type, so we can use
3816 // that.
3817 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3818 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3819
3820 SDValue WideVec;
3821 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3822 // For a splat, perform a scalar truncate before creating the wider
3823 // vector.
3824 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
3825 DAG.getConstant(1, DL, Splat.getValueType()));
3826 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3827 } else {
3828 SmallVector<SDValue, 8> Ops(Op->op_values());
3829 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3830 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3831 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3832 }
3833
3834 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3835 }
3836
3837 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3838 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3839 return Gather;
3840 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3842 if (!VT.isFloatingPoint())
3843 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3844 Splat =
3845 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3846 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3847 }
3848
3849 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3850 return Res;
3851
3852 // If we're compiling for an exact VLEN value, we can split our work per
3853 // register in the register group.
3854 if (const auto VLen = Subtarget.getRealVLen();
3855 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
3856 MVT ElemVT = VT.getVectorElementType();
3857 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
3858 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3859 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
3860 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
3861 assert(M1VT == getLMUL1VT(M1VT));
3862
3863 // The following semantically builds up a fixed length concat_vector
3864 // of the component build_vectors. We eagerly lower to scalable and
3865 // insert_subvector here to avoid DAG combining it back to a large
3866 // build_vector.
3867 SmallVector<SDValue> BuildVectorOps(Op->op_begin(), Op->op_end());
3868 unsigned NumOpElts = M1VT.getVectorMinNumElements();
3869 SDValue Vec = DAG.getUNDEF(ContainerVT);
3870 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
3871 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
3872 SDValue SubBV =
3873 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
3874 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
3875 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
3876 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
3877 DAG.getVectorIdxConstant(InsertIdx, DL));
3878 }
3879 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3880 }
3881
3882 // For m1 vectors, if we have non-undef values in both halves of our vector,
3883 // split the vector into low and high halves, build them separately, then
3884 // use a vselect to combine them. For long vectors, this cuts the critical
3885 // path of the vslide1down sequence in half, and gives us an opportunity
3886 // to special case each half independently. Note that we don't change the
3887 // length of the sub-vectors here, so if both fallback to the generic
3888 // vslide1down path, we should be able to fold the vselect into the final
3889 // vslidedown (for the undef tail) for the first half w/ masking.
3890 unsigned NumElts = VT.getVectorNumElements();
3891 unsigned NumUndefElts =
3892 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3893 unsigned NumDefElts = NumElts - NumUndefElts;
3894 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
3895 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
3896 SmallVector<SDValue> SubVecAOps, SubVecBOps;
3897 SmallVector<SDValue> MaskVals;
3898 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
3899 SubVecAOps.reserve(NumElts);
3900 SubVecBOps.reserve(NumElts);
3901 for (unsigned i = 0; i < NumElts; i++) {
3902 SDValue Elem = Op->getOperand(i);
3903 if (i < NumElts / 2) {
3904 SubVecAOps.push_back(Elem);
3905 SubVecBOps.push_back(UndefElem);
3906 } else {
3907 SubVecAOps.push_back(UndefElem);
3908 SubVecBOps.push_back(Elem);
3909 }
3910 bool SelectMaskVal = (i < NumElts / 2);
3911 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
3912 }
3913 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
3914 MaskVals.size() == NumElts);
3915
3916 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
3917 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
3918 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
3919 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
3920 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
3921 }
3922
3923 // Cap the cost at a value linear to the number of elements in the vector.
3924 // The default lowering is to use the stack. The vector store + scalar loads
3925 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3926 // being (at least) linear in LMUL. As a result, using the vslidedown
3927 // lowering for every element ends up being VL*LMUL..
3928 // TODO: Should we be directly costing the stack alternative? Doing so might
3929 // give us a more accurate upper bound.
3930 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3931
3932 // TODO: unify with TTI getSlideCost.
3933 InstructionCost PerSlideCost = 1;
3934 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3935 default: break;
3937 PerSlideCost = 2;
3938 break;
3940 PerSlideCost = 4;
3941 break;
3943 PerSlideCost = 8;
3944 break;
3945 }
3946
3947 // TODO: Should we be using the build instseq then cost + evaluate scheme
3948 // we use for integer constants here?
3949 unsigned UndefCount = 0;
3950 for (const SDValue &V : Op->ops()) {
3951 if (V.isUndef()) {
3952 UndefCount++;
3953 continue;
3954 }
3955 if (UndefCount) {
3956 LinearBudget -= PerSlideCost;
3957 UndefCount = 0;
3958 }
3959 LinearBudget -= PerSlideCost;
3960 }
3961 if (UndefCount) {
3962 LinearBudget -= PerSlideCost;
3963 }
3964
3965 if (LinearBudget < 0)
3966 return SDValue();
3967
3968 assert((!VT.isFloatingPoint() ||
3969 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3970 "Illegal type which will result in reserved encoding");
3971
3972 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3973
3974 SDValue Vec;
3975 UndefCount = 0;
3976 for (SDValue V : Op->ops()) {
3977 if (V.isUndef()) {
3978 UndefCount++;
3979 continue;
3980 }
3981
3982 // Start our sequence with a TA splat in the hopes that hardware is able to
3983 // recognize there's no dependency on the prior value of our temporary
3984 // register.
3985 if (!Vec) {
3986 Vec = DAG.getSplatVector(VT, DL, V);
3987 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3988 UndefCount = 0;
3989 continue;
3990 }
3991
3992 if (UndefCount) {
3993 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3994 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3995 Vec, Offset, Mask, VL, Policy);
3996 UndefCount = 0;
3997 }
3998 auto OpCode =
4000 if (!VT.isFloatingPoint())
4001 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4002 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4003 V, Mask, VL);
4004 }
4005 if (UndefCount) {
4006 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4007 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4008 Vec, Offset, Mask, VL, Policy);
4009 }
4010 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4011}
4012
4013static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4015 SelectionDAG &DAG) {
4016 if (!Passthru)
4017 Passthru = DAG.getUNDEF(VT);
4018 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4019 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4020 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4021 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4022 // node in order to try and match RVV vector/scalar instructions.
4023 if ((LoC >> 31) == HiC)
4024 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4025
4026 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4027 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4028 // vlmax vsetvli or vsetivli to change the VL.
4029 // FIXME: Support larger constants?
4030 // FIXME: Support non-constant VLs by saturating?
4031 if (LoC == HiC) {
4032 SDValue NewVL;
4033 if (isAllOnesConstant(VL) ||
4034 (isa<RegisterSDNode>(VL) &&
4035 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4036 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4037 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4038 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4039
4040 if (NewVL) {
4041 MVT InterVT =
4042 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4043 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4044 DAG.getUNDEF(InterVT), Lo,
4045 DAG.getRegister(RISCV::X0, MVT::i32));
4046 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4047 }
4048 }
4049 }
4050
4051 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4052 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4053 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4054 Hi.getConstantOperandVal(1) == 31)
4055 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4056
4057 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4058 // even if it might be sign extended.
4059 if (Hi.isUndef())
4060 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4061
4062 // Fall back to a stack store and stride x0 vector load.
4063 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4064 Hi, VL);
4065}
4066
4067// Called by type legalization to handle splat of i64 on RV32.
4068// FIXME: We can optimize this when the type has sign or zero bits in one
4069// of the halves.
4070static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4071 SDValue Scalar, SDValue VL,
4072 SelectionDAG &DAG) {
4073 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4074 SDValue Lo, Hi;
4075 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4076 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4077}
4078
4079// This function lowers a splat of a scalar operand Splat with the vector
4080// length VL. It ensures the final sequence is type legal, which is useful when
4081// lowering a splat after type legalization.
4082static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4083 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4084 const RISCVSubtarget &Subtarget) {
4085 bool HasPassthru = Passthru && !Passthru.isUndef();
4086 if (!HasPassthru && !Passthru)
4087 Passthru = DAG.getUNDEF(VT);
4088 if (VT.isFloatingPoint())
4089 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4090
4091 MVT XLenVT = Subtarget.getXLenVT();
4092
4093 // Simplest case is that the operand needs to be promoted to XLenVT.
4094 if (Scalar.getValueType().bitsLE(XLenVT)) {
4095 // If the operand is a constant, sign extend to increase our chances
4096 // of being able to use a .vi instruction. ANY_EXTEND would become a
4097 // a zero extend and the simm5 check in isel would fail.
4098 // FIXME: Should we ignore the upper bits in isel instead?
4099 unsigned ExtOpc =
4100 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4101 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4102 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4103 }
4104
4105 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4106 "Unexpected scalar for splat lowering!");
4107
4108 if (isOneConstant(VL) && isNullConstant(Scalar))
4109 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4110 DAG.getConstant(0, DL, XLenVT), VL);
4111
4112 // Otherwise use the more complicated splatting algorithm.
4113 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4114}
4115
4116// This function lowers an insert of a scalar operand Scalar into lane
4117// 0 of the vector regardless of the value of VL. The contents of the
4118// remaining lanes of the result vector are unspecified. VL is assumed
4119// to be non-zero.
4121 const SDLoc &DL, SelectionDAG &DAG,
4122 const RISCVSubtarget &Subtarget) {
4123 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4124
4125 const MVT XLenVT = Subtarget.getXLenVT();
4126 SDValue Passthru = DAG.getUNDEF(VT);
4127
4128 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4129 isNullConstant(Scalar.getOperand(1))) {
4130 SDValue ExtractedVal = Scalar.getOperand(0);
4131 // The element types must be the same.
4132 if (ExtractedVal.getValueType().getVectorElementType() ==
4133 VT.getVectorElementType()) {
4134 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4135 MVT ExtractedContainerVT = ExtractedVT;
4136 if (ExtractedContainerVT.isFixedLengthVector()) {
4137 ExtractedContainerVT = getContainerForFixedLengthVector(
4138 DAG, ExtractedContainerVT, Subtarget);
4139 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4140 ExtractedVal, DAG, Subtarget);
4141 }
4142 if (ExtractedContainerVT.bitsLE(VT))
4143 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4144 ExtractedVal, DAG.getConstant(0, DL, XLenVT));
4145 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4146 DAG.getConstant(0, DL, XLenVT));
4147 }
4148 }
4149
4150
4151 if (VT.isFloatingPoint())
4152 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4153 DAG.getUNDEF(VT), Scalar, VL);
4154
4155 // Avoid the tricky legalization cases by falling back to using the
4156 // splat code which already handles it gracefully.
4157 if (!Scalar.getValueType().bitsLE(XLenVT))
4158 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4159 DAG.getConstant(1, DL, XLenVT),
4160 VT, DL, DAG, Subtarget);
4161
4162 // If the operand is a constant, sign extend to increase our chances
4163 // of being able to use a .vi instruction. ANY_EXTEND would become a
4164 // a zero extend and the simm5 check in isel would fail.
4165 // FIXME: Should we ignore the upper bits in isel instead?
4166 unsigned ExtOpc =
4167 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4168 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);