LLVM 18.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
18#include "RISCVRegisterInfo.h"
19#include "RISCVSubtarget.h"
20#include "RISCVTargetMachine.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
34#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/Support/Debug.h"
45#include <optional>
46
47using namespace llvm;
48
49#define DEBUG_TYPE "riscv-lower"
50
51STATISTIC(NumTailCalls, "Number of tail calls");
52
54 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
55 cl::desc("Give the maximum size (in number of nodes) of the web of "
56 "instructions that we will consider for VW expansion"),
57 cl::init(18));
58
59static cl::opt<bool>
60 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
61 cl::desc("Allow the formation of VW_W operations (e.g., "
62 "VWADD_W) with splat constants"),
63 cl::init(false));
64
66 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
67 cl::desc("Set the minimum number of repetitions of a divisor to allow "
68 "transformation to multiplications by the reciprocal"),
69 cl::init(2));
70
71static cl::opt<int>
73 cl::desc("Give the maximum number of instructions that we will "
74 "use for creating a floating-point immediate value"),
75 cl::init(2));
76
78 const RISCVSubtarget &STI)
79 : TargetLowering(TM), Subtarget(STI) {
80
81 if (Subtarget.isRVE())
82 report_fatal_error("Codegen not yet implemented for RVE");
83
84 RISCVABI::ABI ABI = Subtarget.getTargetABI();
85 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
86
87 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
88 !Subtarget.hasStdExtF()) {
89 errs() << "Hard-float 'f' ABI can't be used for a target that "
90 "doesn't support the F instruction set extension (ignoring "
91 "target-abi)\n";
93 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
94 !Subtarget.hasStdExtD()) {
95 errs() << "Hard-float 'd' ABI can't be used for a target that "
96 "doesn't support the D instruction set extension (ignoring "
97 "target-abi)\n";
99 }
100
101 switch (ABI) {
102 default:
103 report_fatal_error("Don't know how to lower this ABI");
110 break;
111 }
112
113 MVT XLenVT = Subtarget.getXLenVT();
114
115 // Set up the register classes.
116 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
117
118 if (Subtarget.hasStdExtZfhOrZfhmin())
119 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
120 if (Subtarget.hasStdExtZfbfmin())
121 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
122 if (Subtarget.hasStdExtF())
123 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
124 if (Subtarget.hasStdExtD())
125 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
126 if (Subtarget.hasStdExtZhinxOrZhinxmin())
127 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
128 if (Subtarget.hasStdExtZfinx())
129 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
130 if (Subtarget.hasStdExtZdinx()) {
131 if (Subtarget.is64Bit())
132 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
133 else
134 addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
135 }
136
137 static const MVT::SimpleValueType BoolVecVTs[] = {
138 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
139 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
140 static const MVT::SimpleValueType IntVecVTs[] = {
141 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
142 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
143 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
144 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
145 MVT::nxv4i64, MVT::nxv8i64};
146 static const MVT::SimpleValueType F16VecVTs[] = {
147 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
148 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
149 static const MVT::SimpleValueType BF16VecVTs[] = {
150 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
151 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
152 static const MVT::SimpleValueType F32VecVTs[] = {
153 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
154 static const MVT::SimpleValueType F64VecVTs[] = {
155 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
156
157 if (Subtarget.hasVInstructions()) {
158 auto addRegClassForRVV = [this](MVT VT) {
159 // Disable the smallest fractional LMUL types if ELEN is less than
160 // RVVBitsPerBlock.
161 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
162 if (VT.getVectorMinNumElements() < MinElts)
163 return;
164
165 unsigned Size = VT.getSizeInBits().getKnownMinValue();
166 const TargetRegisterClass *RC;
168 RC = &RISCV::VRRegClass;
169 else if (Size == 2 * RISCV::RVVBitsPerBlock)
170 RC = &RISCV::VRM2RegClass;
171 else if (Size == 4 * RISCV::RVVBitsPerBlock)
172 RC = &RISCV::VRM4RegClass;
173 else if (Size == 8 * RISCV::RVVBitsPerBlock)
174 RC = &RISCV::VRM8RegClass;
175 else
176 llvm_unreachable("Unexpected size");
177
178 addRegisterClass(VT, RC);
179 };
180
181 for (MVT VT : BoolVecVTs)
182 addRegClassForRVV(VT);
183 for (MVT VT : IntVecVTs) {
184 if (VT.getVectorElementType() == MVT::i64 &&
185 !Subtarget.hasVInstructionsI64())
186 continue;
187 addRegClassForRVV(VT);
188 }
189
190 if (Subtarget.hasVInstructionsF16Minimal())
191 for (MVT VT : F16VecVTs)
192 addRegClassForRVV(VT);
193
194 if (Subtarget.hasVInstructionsBF16())
195 for (MVT VT : BF16VecVTs)
196 addRegClassForRVV(VT);
197
198 if (Subtarget.hasVInstructionsF32())
199 for (MVT VT : F32VecVTs)
200 addRegClassForRVV(VT);
201
202 if (Subtarget.hasVInstructionsF64())
203 for (MVT VT : F64VecVTs)
204 addRegClassForRVV(VT);
205
206 if (Subtarget.useRVVForFixedLengthVectors()) {
207 auto addRegClassForFixedVectors = [this](MVT VT) {
208 MVT ContainerVT = getContainerForFixedLengthVector(VT);
209 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
210 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
211 addRegisterClass(VT, TRI.getRegClass(RCID));
212 };
214 if (useRVVForFixedLengthVectorVT(VT))
215 addRegClassForFixedVectors(VT);
216
218 if (useRVVForFixedLengthVectorVT(VT))
219 addRegClassForFixedVectors(VT);
220 }
221 }
222
223 // Compute derived properties from the register classes.
225
227
229 MVT::i1, Promote);
230 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
232 MVT::i1, Promote);
233
234 // TODO: add all necessary setOperationAction calls.
236
241
248
250
253
255
257
258 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
259 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
260
261 if (Subtarget.is64Bit()) {
263
265
267 MVT::i32, Custom);
268
271 MVT::i32, Custom);
272 } else {
274 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
275 nullptr);
276 setLibcallName(RTLIB::MULO_I64, nullptr);
277 }
278
279 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
281 else if (Subtarget.is64Bit())
282 setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
283 else
285
286 if (!Subtarget.hasStdExtM())
288 XLenVT, Expand);
289 else if (Subtarget.is64Bit())
291 {MVT::i8, MVT::i16, MVT::i32}, Custom);
292
295 Expand);
296
298 Custom);
299
300 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
301 if (Subtarget.is64Bit())
303 } else if (Subtarget.hasVendorXTHeadBb()) {
304 if (Subtarget.is64Bit())
307 } else {
309 }
310
311 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
312 // pattern match it directly in isel.
314 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
315 Subtarget.hasVendorXTHeadBb())
316 ? Legal
317 : Expand);
318 // Zbkb can use rev8+brev8 to implement bitreverse.
320 Subtarget.hasStdExtZbkb() ? Custom : Expand);
321
322 if (Subtarget.hasStdExtZbb()) {
324 Legal);
325
326 if (Subtarget.is64Bit())
329 MVT::i32, Custom);
330 } else {
332 }
333
334 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
335 // We need the custom lowering to make sure that the resulting sequence
336 // for the 32bit case is efficient on 64bit targets.
337 if (Subtarget.is64Bit())
339 } else {
341 }
342
343 if (Subtarget.is64Bit())
345
346 if (!Subtarget.hasVendorXTHeadCondMov())
348
349 static const unsigned FPLegalNodeTypes[] = {
356
357 static const ISD::CondCode FPCCToExpand[] = {
361
362 static const unsigned FPOpToExpand[] = {
364 ISD::FREM};
365
366 static const unsigned FPRndMode[] = {
369
372
373 static const unsigned ZfhminZfbfminPromoteOps[] = {
383
384 if (Subtarget.hasStdExtZfbfmin()) {
393 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
395 // FIXME: Need to promote bf16 FCOPYSIGN to f32, but the
396 // DAGCombiner::visitFP_ROUND probably needs improvements first.
398 }
399
401 if (Subtarget.hasStdExtZfhOrZhinx()) {
402 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
403 setOperationAction(FPRndMode, MVT::f16,
404 Subtarget.hasStdExtZfa() ? Legal : Custom);
407 } else {
408 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
411 MVT::f16, Legal);
412 // FIXME: Need to promote f16 FCOPYSIGN to f32, but the
413 // DAGCombiner::visitFP_ROUND probably needs improvements first.
415 }
416
419 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
422
424 Subtarget.hasStdExtZfa() ? Legal : Promote);
429 MVT::f16, Promote);
430
431 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
432 // complete support for all operations in LegalizeDAG.
437 MVT::f16, Promote);
438
439 // We need to custom promote this.
440 if (Subtarget.is64Bit())
442
443 if (!Subtarget.hasStdExtZfa())
445 }
446
447 if (Subtarget.hasStdExtFOrZfinx()) {
448 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
449 setOperationAction(FPRndMode, MVT::f32,
450 Subtarget.hasStdExtZfa() ? Legal : Custom);
451 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
455 setOperationAction(FPOpToExpand, MVT::f32, Expand);
456 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
457 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
458 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
459 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
463 Subtarget.isSoftFPABI() ? LibCall : Custom);
466
467 if (Subtarget.hasStdExtZfa())
469 else
471 }
472
473 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
475
476 if (Subtarget.hasStdExtDOrZdinx()) {
477 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
478
479 if (Subtarget.hasStdExtZfa()) {
480 setOperationAction(FPRndMode, MVT::f64, Legal);
484 } else {
485 if (Subtarget.is64Bit())
486 setOperationAction(FPRndMode, MVT::f64, Custom);
487
489 }
490
493 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
497 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
498 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
499 setOperationAction(FPOpToExpand, MVT::f64, Expand);
500 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
501 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
502 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
503 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
507 Subtarget.isSoftFPABI() ? LibCall : Custom);
510 }
511
512 if (Subtarget.is64Bit()) {
515 MVT::i32, Custom);
517 }
518
519 if (Subtarget.hasStdExtFOrZfinx()) {
521 Custom);
522
525 XLenVT, Legal);
526
529 }
530
533 XLenVT, Custom);
534
536
537 if (Subtarget.is64Bit())
539
540 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
541 // Unfortunately this can't be determined just from the ISA naming string.
543 Subtarget.is64Bit() ? Legal : Custom);
544
547 if (Subtarget.is64Bit())
549
550 if (Subtarget.hasStdExtZicbop()) {
552 }
553
554 if (Subtarget.hasStdExtA()) {
557 } else if (Subtarget.hasForcedAtomics()) {
559 } else {
561 }
562
564
566
567 if (Subtarget.hasVInstructions()) {
569
571
572 // RVV intrinsics may have illegal operands.
573 // We also need to custom legalize vmv.x.s.
576 {MVT::i8, MVT::i16}, Custom);
577 if (Subtarget.is64Bit())
579 MVT::i32, Custom);
580 else
582 MVT::i64, Custom);
583
585 MVT::Other, Custom);
586
587 static const unsigned IntegerVPOps[] = {
588 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
589 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
590 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
591 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
592 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
593 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
594 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
595 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
596 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
597 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
598 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
599 ISD::VP_ABS};
600
601 static const unsigned FloatingPointVPOps[] = {
602 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
603 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
604 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
605 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
606 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
607 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
608 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
609 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
610 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
611 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS};
612
613 static const unsigned IntegerVecReduceOps[] = {
617
618 static const unsigned FloatingPointVecReduceOps[] = {
621
622 if (!Subtarget.is64Bit()) {
623 // We must custom-lower certain vXi64 operations on RV32 due to the vector
624 // element type being illegal.
626 MVT::i64, Custom);
627
628 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
629
630 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
631 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
632 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
633 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
634 MVT::i64, Custom);
635 }
636
637 for (MVT VT : BoolVecVTs) {
638 if (!isTypeLegal(VT))
639 continue;
640
642
643 // Mask VTs are custom-expanded into a series of standard nodes
647 VT, Custom);
648
650 Custom);
651
654 {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
655 Expand);
656
657 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
658
661 Custom);
662
664 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
665 Custom);
666
667 // RVV has native int->float & float->int conversions where the
668 // element type sizes are within one power-of-two of each other. Any
669 // wider distances between type sizes have to be lowered as sequences
670 // which progressively narrow the gap in stages.
675 VT, Custom);
677 Custom);
678
679 // Expand all extending loads to types larger than this, and truncating
680 // stores from types larger than this.
682 setTruncStoreAction(OtherVT, VT, Expand);
684 VT, Expand);
685 }
686
687 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
688 ISD::VP_TRUNCATE, ISD::VP_SETCC},
689 VT, Custom);
690
693
695
698 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
699 }
700
701 for (MVT VT : IntVecVTs) {
702 if (!isTypeLegal(VT))
703 continue;
704
707
708 // Vectors implement MULHS/MULHU.
710
711 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
712 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
714
716 Legal);
717
718 setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
719
720 // Custom-lower extensions and truncations from/to mask types.
722 VT, Custom);
723
724 // RVV has native int->float & float->int conversions where the
725 // element type sizes are within one power-of-two of each other. Any
726 // wider distances between type sizes have to be lowered as sequences
727 // which progressively narrow the gap in stages.
732 VT, Custom);
734 Custom);
735
738
739 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
740 // nodes which truncate by one power of two at a time.
742
743 // Custom-lower insert/extract operations to simplify patterns.
745 Custom);
746
747 // Custom-lower reduction operations to set up the corresponding custom
748 // nodes' operands.
749 setOperationAction(IntegerVecReduceOps, VT, Custom);
750
751 setOperationAction(IntegerVPOps, VT, Custom);
752
754
756 VT, Custom);
757
759 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
760 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
761 VT, Custom);
762
765 VT, Custom);
766
769
771
773 setTruncStoreAction(VT, OtherVT, Expand);
775 VT, Expand);
776 }
777
780
781 // Splice
783
784 if (Subtarget.hasStdExtZvbb()) {
786 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
787 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
788 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
789 VT, Custom);
790 } else {
792 setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
794 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
795 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
796 VT, Expand);
797
798 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
799 // range of f32.
800 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
801 if (isTypeLegal(FloatVT)) {
803 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
804 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
805 VT, Custom);
806 }
807
809 }
810 }
811
812 // Expand various CCs to best match the RVV ISA, which natively supports UNE
813 // but no other unordered comparisons, and supports all ordered comparisons
814 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
815 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
816 // and we pattern-match those back to the "original", swapping operands once
817 // more. This way we catch both operations and both "vf" and "fv" forms with
818 // fewer patterns.
819 static const ISD::CondCode VFPCCToExpand[] = {
823 };
824
825 // TODO: support more ops.
826 static const unsigned ZvfhminPromoteOps[] = {
832
833 // TODO: support more vp ops.
834 static const unsigned ZvfhminPromoteVPOps[] = {
835 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
836 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
837 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
838 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
839 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
840 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
841 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
842 ISD::VP_FNEARBYINT};
843
844 // Sets common operation actions on RVV floating-point vector types.
845 const auto SetCommonVFPActions = [&](MVT VT) {
847 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
848 // sizes are within one power-of-two of each other. Therefore conversions
849 // between vXf16 and vXf64 must be lowered as sequences which convert via
850 // vXf32.
852 // Custom-lower insert/extract operations to simplify patterns.
854 Custom);
855 // Expand various condition codes (explained above).
856 setCondCodeAction(VFPCCToExpand, VT, Expand);
857
860
864 VT, Custom);
865
866 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
867
868 // Expand FP operations that need libcalls.
880
882
884
886 VT, Custom);
887
889 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
890 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
891 VT, Custom);
892
895
898 VT, Custom);
899
902
904
905 setOperationAction(FloatingPointVPOps, VT, Custom);
906
908 Custom);
911 VT, Legal);
916 VT, Custom);
917 };
918
919 // Sets common extload/truncstore actions on RVV floating-point vector
920 // types.
921 const auto SetCommonVFPExtLoadTruncStoreActions =
922 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
923 for (auto SmallVT : SmallerVTs) {
924 setTruncStoreAction(VT, SmallVT, Expand);
925 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
926 }
927 };
928
929 if (Subtarget.hasVInstructionsF16()) {
930 for (MVT VT : F16VecVTs) {
931 if (!isTypeLegal(VT))
932 continue;
933 SetCommonVFPActions(VT);
934 }
935 } else if (Subtarget.hasVInstructionsF16Minimal()) {
936 for (MVT VT : F16VecVTs) {
937 if (!isTypeLegal(VT))
938 continue;
940 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
942 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
943 VT, Custom);
946 VT, Custom);
947 // load/store
949
950 // Custom split nxv32f16 since nxv32f32 if not legal.
951 if (VT == MVT::nxv32f16) {
952 setOperationAction(ZvfhminPromoteOps, VT, Custom);
953 setOperationAction(ZvfhminPromoteVPOps, VT, Custom);
954 continue;
955 }
956 // Add more promote ops.
957 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
958 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
959 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
960 }
961 }
962
963 if (Subtarget.hasVInstructionsF32()) {
964 for (MVT VT : F32VecVTs) {
965 if (!isTypeLegal(VT))
966 continue;
967 SetCommonVFPActions(VT);
968 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
969 }
970 }
971
972 if (Subtarget.hasVInstructionsF64()) {
973 for (MVT VT : F64VecVTs) {
974 if (!isTypeLegal(VT))
975 continue;
976 SetCommonVFPActions(VT);
977 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
978 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
979 }
980 }
981
982 if (Subtarget.useRVVForFixedLengthVectors()) {
984 if (!useRVVForFixedLengthVectorVT(VT))
985 continue;
986
987 // By default everything must be expanded.
988 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
991 setTruncStoreAction(VT, OtherVT, Expand);
993 OtherVT, VT, Expand);
994 }
995
996 // Custom lower fixed vector undefs to scalable vector undefs to avoid
997 // expansion to a build_vector of 0s.
999
1000 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1002 Custom);
1003
1005 Custom);
1006
1008 VT, Custom);
1009
1011
1013
1015
1017
1019
1021
1024 Custom);
1025
1027 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1028 Custom);
1029
1031 {
1040 },
1041 VT, Custom);
1043 Custom);
1044
1046
1047 // Operations below are different for between masks and other vectors.
1048 if (VT.getVectorElementType() == MVT::i1) {
1049 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1050 ISD::OR, ISD::XOR},
1051 VT, Custom);
1052
1053 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1054 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1055 VT, Custom);
1056 continue;
1057 }
1058
1059 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1060 // it before type legalization for i64 vectors on RV32. It will then be
1061 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1062 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1063 // improvements first.
1064 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1067 }
1068
1071
1072 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1073 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1074 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1075 ISD::VP_SCATTER},
1076 VT, Custom);
1077
1081 VT, Custom);
1082
1085
1086 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1087 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1089
1092 Custom);
1093
1096
1099
1100 // Custom-lower reduction operations to set up the corresponding custom
1101 // nodes' operands.
1105 VT, Custom);
1106
1107 setOperationAction(IntegerVPOps, VT, Custom);
1108
1109 if (Subtarget.hasStdExtZvbb()) {
1113 ISD::ROTR},
1114 VT, Custom);
1115 } else {
1116 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1117 // range of f32.
1118 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1119 if (isTypeLegal(FloatVT))
1122 Custom);
1123 }
1124 }
1125
1127 // There are no extending loads or truncating stores.
1128 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1129 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1130 setTruncStoreAction(VT, InnerVT, Expand);
1131 }
1132
1133 if (!useRVVForFixedLengthVectorVT(VT))
1134 continue;
1135
1136 // By default everything must be expanded.
1137 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1139
1140 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1141 // expansion to a build_vector of 0s.
1143
1144 if (VT.getVectorElementType() == MVT::f16 &&
1145 !Subtarget.hasVInstructionsF16()) {
1147 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1149 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1150 VT, Custom);
1153 VT, Custom);
1155 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1156 // Don't promote f16 vector operations to f32 if f32 vector type is
1157 // not legal.
1158 // TODO: could split the f16 vector into two vectors and do promotion.
1159 if (!isTypeLegal(F32VecVT))
1160 continue;
1161 setOperationPromotedToType(ZvfhminPromoteOps, VT, F32VecVT);
1162 setOperationPromotedToType(ZvfhminPromoteVPOps, VT, F32VecVT);
1163 continue;
1164 }
1165
1166 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1168 Custom);
1169
1173 VT, Custom);
1174
1177 VT, Custom);
1178
1179 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1180 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1181 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1182 ISD::VP_SCATTER},
1183 VT, Custom);
1184
1189 VT, Custom);
1190
1192
1195 VT, Custom);
1196
1197 setCondCodeAction(VFPCCToExpand, VT, Expand);
1198
1202
1204
1205 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1206
1207 setOperationAction(FloatingPointVPOps, VT, Custom);
1208
1210 Custom);
1217 VT, Custom);
1218 }
1219
1220 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1221 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
1222 Custom);
1225 if (Subtarget.hasStdExtFOrZfinx())
1227 if (Subtarget.hasStdExtDOrZdinx())
1229 }
1230 }
1231
1232 if (Subtarget.hasStdExtA())
1234
1235 if (Subtarget.hasForcedAtomics()) {
1236 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1242 XLenVT, LibCall);
1243 }
1244
1245 if (Subtarget.hasVendorXTHeadMemIdx()) {
1246 for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
1247 ++im) {
1248 setIndexedLoadAction(im, MVT::i8, Legal);
1249 setIndexedStoreAction(im, MVT::i8, Legal);
1250 setIndexedLoadAction(im, MVT::i16, Legal);
1251 setIndexedStoreAction(im, MVT::i16, Legal);
1252 setIndexedLoadAction(im, MVT::i32, Legal);
1253 setIndexedStoreAction(im, MVT::i32, Legal);
1254
1255 if (Subtarget.is64Bit()) {
1256 setIndexedLoadAction(im, MVT::i64, Legal);
1257 setIndexedStoreAction(im, MVT::i64, Legal);
1258 }
1259 }
1260 }
1261
1262 // Function alignments.
1263 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1264 setMinFunctionAlignment(FunctionAlignment);
1265 // Set preferred alignments.
1268
1270
1271 // Jumps are expensive, compared to logic
1273
1277 if (Subtarget.is64Bit())
1279
1280 if (Subtarget.hasStdExtFOrZfinx())
1282
1283 if (Subtarget.hasStdExtZbb())
1285
1286 if (Subtarget.hasStdExtZbs() && Subtarget.is64Bit())
1288
1289 if (Subtarget.hasStdExtZbkb())
1293 if (Subtarget.hasStdExtFOrZfinx())
1296 if (Subtarget.hasVInstructions())
1298 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1301 if (Subtarget.hasVendorXTHeadMemPair())
1303 if (Subtarget.useRVVForFixedLengthVectors())
1305
1306 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1307 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1308
1309 // Disable strict node mutation.
1310 IsStrictFPEnabled = true;
1311}
1312
1314 LLVMContext &Context,
1315 EVT VT) const {
1316 if (!VT.isVector())
1317 return getPointerTy(DL);
1318 if (Subtarget.hasVInstructions() &&
1319 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1320 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1322}
1323
1324MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1325 return Subtarget.getXLenVT();
1326}
1327
1328// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1329bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1330 unsigned VF,
1331 bool IsScalable) const {
1332 if (!Subtarget.hasVInstructions())
1333 return true;
1334
1335 if (!IsScalable)
1336 return true;
1337
1338 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1339 return true;
1340
1341 // Don't allow VF=1 if those types are't legal.
1342 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1343 return true;
1344
1345 // VLEN=32 support is incomplete.
1346 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1347 return true;
1348
1349 // The maximum VF is for the smallest element width with LMUL=8.
1350 // VF must be a power of 2.
1351 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1352 return VF > MaxVF || !isPowerOf2_32(VF);
1353}
1354
1356 const CallInst &I,
1357 MachineFunction &MF,
1358 unsigned Intrinsic) const {
1359 auto &DL = I.getModule()->getDataLayout();
1360
1361 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1362 bool IsUnitStrided) {
1364 Info.ptrVal = I.getArgOperand(PtrOp);
1365 Type *MemTy;
1366 if (IsStore) {
1367 // Store value is the first operand.
1368 MemTy = I.getArgOperand(0)->getType();
1369 } else {
1370 // Use return type. If it's segment load, return type is a struct.
1371 MemTy = I.getType();
1372 if (MemTy->isStructTy())
1373 MemTy = MemTy->getStructElementType(0);
1374 }
1375 if (!IsUnitStrided)
1376 MemTy = MemTy->getScalarType();
1377
1378 Info.memVT = getValueType(DL, MemTy);
1379 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1381 Info.flags |=
1383 return true;
1384 };
1385
1386 if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
1388
1390 switch (Intrinsic) {
1391 default:
1392 return false;
1393 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1394 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1395 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1396 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1397 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1398 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1399 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1400 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1401 case Intrinsic::riscv_masked_cmpxchg_i32:
1403 Info.memVT = MVT::i32;
1404 Info.ptrVal = I.getArgOperand(0);
1405 Info.offset = 0;
1406 Info.align = Align(4);
1409 return true;
1410 case Intrinsic::riscv_masked_strided_load:
1411 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
1412 /*IsUnitStrided*/ false);
1413 case Intrinsic::riscv_masked_strided_store:
1414 return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
1415 /*IsUnitStrided*/ false);
1416 case Intrinsic::riscv_seg2_load:
1417 case Intrinsic::riscv_seg3_load:
1418 case Intrinsic::riscv_seg4_load:
1419 case Intrinsic::riscv_seg5_load:
1420 case Intrinsic::riscv_seg6_load:
1421 case Intrinsic::riscv_seg7_load:
1422 case Intrinsic::riscv_seg8_load:
1423 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1424 /*IsUnitStrided*/ false);
1425 case Intrinsic::riscv_seg2_store:
1426 case Intrinsic::riscv_seg3_store:
1427 case Intrinsic::riscv_seg4_store:
1428 case Intrinsic::riscv_seg5_store:
1429 case Intrinsic::riscv_seg6_store:
1430 case Intrinsic::riscv_seg7_store:
1431 case Intrinsic::riscv_seg8_store:
1432 // Operands are (vec, ..., vec, ptr, vl)
1433 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1434 /*IsStore*/ true,
1435 /*IsUnitStrided*/ false);
1436 case Intrinsic::riscv_vle:
1437 case Intrinsic::riscv_vle_mask:
1438 case Intrinsic::riscv_vleff:
1439 case Intrinsic::riscv_vleff_mask:
1440 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1441 /*IsStore*/ false,
1442 /*IsUnitStrided*/ true);
1443 case Intrinsic::riscv_vse:
1444 case Intrinsic::riscv_vse_mask:
1445 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1446 /*IsStore*/ true,
1447 /*IsUnitStrided*/ true);
1448 case Intrinsic::riscv_vlse:
1449 case Intrinsic::riscv_vlse_mask:
1450 case Intrinsic::riscv_vloxei:
1451 case Intrinsic::riscv_vloxei_mask:
1452 case Intrinsic::riscv_vluxei:
1453 case Intrinsic::riscv_vluxei_mask:
1454 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1455 /*IsStore*/ false,
1456 /*IsUnitStrided*/ false);
1457 case Intrinsic::riscv_vsse:
1458 case Intrinsic::riscv_vsse_mask:
1459 case Intrinsic::riscv_vsoxei:
1460 case Intrinsic::riscv_vsoxei_mask:
1461 case Intrinsic::riscv_vsuxei:
1462 case Intrinsic::riscv_vsuxei_mask:
1463 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1464 /*IsStore*/ true,
1465 /*IsUnitStrided*/ false);
1466 case Intrinsic::riscv_vlseg2:
1467 case Intrinsic::riscv_vlseg3:
1468 case Intrinsic::riscv_vlseg4:
1469 case Intrinsic::riscv_vlseg5:
1470 case Intrinsic::riscv_vlseg6:
1471 case Intrinsic::riscv_vlseg7:
1472 case Intrinsic::riscv_vlseg8:
1473 case Intrinsic::riscv_vlseg2ff:
1474 case Intrinsic::riscv_vlseg3ff:
1475 case Intrinsic::riscv_vlseg4ff:
1476 case Intrinsic::riscv_vlseg5ff:
1477 case Intrinsic::riscv_vlseg6ff:
1478 case Intrinsic::riscv_vlseg7ff:
1479 case Intrinsic::riscv_vlseg8ff:
1480 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1481 /*IsStore*/ false,
1482 /*IsUnitStrided*/ false);
1483 case Intrinsic::riscv_vlseg2_mask:
1484 case Intrinsic::riscv_vlseg3_mask:
1485 case Intrinsic::riscv_vlseg4_mask:
1486 case Intrinsic::riscv_vlseg5_mask:
1487 case Intrinsic::riscv_vlseg6_mask:
1488 case Intrinsic::riscv_vlseg7_mask:
1489 case Intrinsic::riscv_vlseg8_mask:
1490 case Intrinsic::riscv_vlseg2ff_mask:
1491 case Intrinsic::riscv_vlseg3ff_mask:
1492 case Intrinsic::riscv_vlseg4ff_mask:
1493 case Intrinsic::riscv_vlseg5ff_mask:
1494 case Intrinsic::riscv_vlseg6ff_mask:
1495 case Intrinsic::riscv_vlseg7ff_mask:
1496 case Intrinsic::riscv_vlseg8ff_mask:
1497 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1498 /*IsStore*/ false,
1499 /*IsUnitStrided*/ false);
1500 case Intrinsic::riscv_vlsseg2:
1501 case Intrinsic::riscv_vlsseg3:
1502 case Intrinsic::riscv_vlsseg4:
1503 case Intrinsic::riscv_vlsseg5:
1504 case Intrinsic::riscv_vlsseg6:
1505 case Intrinsic::riscv_vlsseg7:
1506 case Intrinsic::riscv_vlsseg8:
1507 case Intrinsic::riscv_vloxseg2:
1508 case Intrinsic::riscv_vloxseg3:
1509 case Intrinsic::riscv_vloxseg4:
1510 case Intrinsic::riscv_vloxseg5:
1511 case Intrinsic::riscv_vloxseg6:
1512 case Intrinsic::riscv_vloxseg7:
1513 case Intrinsic::riscv_vloxseg8:
1514 case Intrinsic::riscv_vluxseg2:
1515 case Intrinsic::riscv_vluxseg3:
1516 case Intrinsic::riscv_vluxseg4:
1517 case Intrinsic::riscv_vluxseg5:
1518 case Intrinsic::riscv_vluxseg6:
1519 case Intrinsic::riscv_vluxseg7:
1520 case Intrinsic::riscv_vluxseg8:
1521 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1522 /*IsStore*/ false,
1523 /*IsUnitStrided*/ false);
1524 case Intrinsic::riscv_vlsseg2_mask:
1525 case Intrinsic::riscv_vlsseg3_mask:
1526 case Intrinsic::riscv_vlsseg4_mask:
1527 case Intrinsic::riscv_vlsseg5_mask:
1528 case Intrinsic::riscv_vlsseg6_mask:
1529 case Intrinsic::riscv_vlsseg7_mask:
1530 case Intrinsic::riscv_vlsseg8_mask:
1531 case Intrinsic::riscv_vloxseg2_mask:
1532 case Intrinsic::riscv_vloxseg3_mask:
1533 case Intrinsic::riscv_vloxseg4_mask:
1534 case Intrinsic::riscv_vloxseg5_mask:
1535 case Intrinsic::riscv_vloxseg6_mask:
1536 case Intrinsic::riscv_vloxseg7_mask:
1537 case Intrinsic::riscv_vloxseg8_mask:
1538 case Intrinsic::riscv_vluxseg2_mask:
1539 case Intrinsic::riscv_vluxseg3_mask:
1540 case Intrinsic::riscv_vluxseg4_mask:
1541 case Intrinsic::riscv_vluxseg5_mask:
1542 case Intrinsic::riscv_vluxseg6_mask:
1543 case Intrinsic::riscv_vluxseg7_mask:
1544 case Intrinsic::riscv_vluxseg8_mask:
1545 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1546 /*IsStore*/ false,
1547 /*IsUnitStrided*/ false);
1548 case Intrinsic::riscv_vsseg2:
1549 case Intrinsic::riscv_vsseg3:
1550 case Intrinsic::riscv_vsseg4:
1551 case Intrinsic::riscv_vsseg5:
1552 case Intrinsic::riscv_vsseg6:
1553 case Intrinsic::riscv_vsseg7:
1554 case Intrinsic::riscv_vsseg8:
1555 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1556 /*IsStore*/ true,
1557 /*IsUnitStrided*/ false);
1558 case Intrinsic::riscv_vsseg2_mask:
1559 case Intrinsic::riscv_vsseg3_mask:
1560 case Intrinsic::riscv_vsseg4_mask:
1561 case Intrinsic::riscv_vsseg5_mask:
1562 case Intrinsic::riscv_vsseg6_mask:
1563 case Intrinsic::riscv_vsseg7_mask:
1564 case Intrinsic::riscv_vsseg8_mask:
1565 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1566 /*IsStore*/ true,
1567 /*IsUnitStrided*/ false);
1568 case Intrinsic::riscv_vssseg2:
1569 case Intrinsic::riscv_vssseg3:
1570 case Intrinsic::riscv_vssseg4:
1571 case Intrinsic::riscv_vssseg5:
1572 case Intrinsic::riscv_vssseg6:
1573 case Intrinsic::riscv_vssseg7:
1574 case Intrinsic::riscv_vssseg8:
1575 case Intrinsic::riscv_vsoxseg2:
1576 case Intrinsic::riscv_vsoxseg3:
1577 case Intrinsic::riscv_vsoxseg4:
1578 case Intrinsic::riscv_vsoxseg5:
1579 case Intrinsic::riscv_vsoxseg6:
1580 case Intrinsic::riscv_vsoxseg7:
1581 case Intrinsic::riscv_vsoxseg8:
1582 case Intrinsic::riscv_vsuxseg2:
1583 case Intrinsic::riscv_vsuxseg3:
1584 case Intrinsic::riscv_vsuxseg4:
1585 case Intrinsic::riscv_vsuxseg5:
1586 case Intrinsic::riscv_vsuxseg6:
1587 case Intrinsic::riscv_vsuxseg7:
1588 case Intrinsic::riscv_vsuxseg8:
1589 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1590 /*IsStore*/ true,
1591 /*IsUnitStrided*/ false);
1592 case Intrinsic::riscv_vssseg2_mask:
1593 case Intrinsic::riscv_vssseg3_mask:
1594 case Intrinsic::riscv_vssseg4_mask:
1595 case Intrinsic::riscv_vssseg5_mask:
1596 case Intrinsic::riscv_vssseg6_mask:
1597 case Intrinsic::riscv_vssseg7_mask:
1598 case Intrinsic::riscv_vssseg8_mask:
1599 case Intrinsic::riscv_vsoxseg2_mask:
1600 case Intrinsic::riscv_vsoxseg3_mask:
1601 case Intrinsic::riscv_vsoxseg4_mask:
1602 case Intrinsic::riscv_vsoxseg5_mask:
1603 case Intrinsic::riscv_vsoxseg6_mask:
1604 case Intrinsic::riscv_vsoxseg7_mask:
1605 case Intrinsic::riscv_vsoxseg8_mask:
1606 case Intrinsic::riscv_vsuxseg2_mask:
1607 case Intrinsic::riscv_vsuxseg3_mask:
1608 case Intrinsic::riscv_vsuxseg4_mask:
1609 case Intrinsic::riscv_vsuxseg5_mask:
1610 case Intrinsic::riscv_vsuxseg6_mask:
1611 case Intrinsic::riscv_vsuxseg7_mask:
1612 case Intrinsic::riscv_vsuxseg8_mask:
1613 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1614 /*IsStore*/ true,
1615 /*IsUnitStrided*/ false);
1616 }
1617}
1618
1620 const AddrMode &AM, Type *Ty,
1621 unsigned AS,
1622 Instruction *I) const {
1623 // No global is ever allowed as a base.
1624 if (AM.BaseGV)
1625 return false;
1626
1627 // RVV instructions only support register addressing.
1628 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1629 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1630
1631 // Require a 12-bit signed offset.
1632 if (!isInt<12>(AM.BaseOffs))
1633 return false;
1634
1635 switch (AM.Scale) {
1636 case 0: // "r+i" or just "i", depending on HasBaseReg.
1637 break;
1638 case 1:
1639 if (!AM.HasBaseReg) // allow "r+i".
1640 break;
1641 return false; // disallow "r+r" or "r+r+i".
1642 default:
1643 return false;
1644 }
1645
1646 return true;
1647}
1648
1650 return isInt<12>(Imm);
1651}
1652
1654 return isInt<12>(Imm);
1655}
1656
1657// On RV32, 64-bit integers are split into their high and low parts and held
1658// in two different registers, so the trunc is free since the low register can
1659// just be used.
1660// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1661// isTruncateFree?
1663 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1664 return false;
1665 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1666 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1667 return (SrcBits == 64 && DestBits == 32);
1668}
1669
1671 // We consider i64->i32 free on RV64 since we have good selection of W
1672 // instructions that make promoting operations back to i64 free in many cases.
1673 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1674 !DstVT.isInteger())
1675 return false;
1676 unsigned SrcBits = SrcVT.getSizeInBits();
1677 unsigned DestBits = DstVT.getSizeInBits();
1678 return (SrcBits == 64 && DestBits == 32);
1679}
1680
1682 // Zexts are free if they can be combined with a load.
1683 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1684 // poorly with type legalization of compares preferring sext.
1685 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1686 EVT MemVT = LD->getMemoryVT();
1687 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1688 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1689 LD->getExtensionType() == ISD::ZEXTLOAD))
1690 return true;
1691 }
1692
1693 return TargetLowering::isZExtFree(Val, VT2);
1694}
1695
1697 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1698}
1699
1701 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1702}
1703
1705 return Subtarget.hasStdExtZbb();
1706}
1707
1709 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1710}
1711
1713 const Instruction &AndI) const {
1714 // We expect to be able to match a bit extraction instruction if the Zbs
1715 // extension is supported and the mask is a power of two. However, we
1716 // conservatively return false if the mask would fit in an ANDI instruction,
1717 // on the basis that it's possible the sinking+duplication of the AND in
1718 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
1719 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
1720 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1721 return false;
1722 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
1723 if (!Mask)
1724 return false;
1725 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1726}
1727
1729 EVT VT = Y.getValueType();
1730
1731 // FIXME: Support vectors once we have tests.
1732 if (VT.isVector())
1733 return false;
1734
1735 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1736 !isa<ConstantSDNode>(Y);
1737}
1738
1740 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
1741 if (Subtarget.hasStdExtZbs())
1742 return X.getValueType().isScalarInteger();
1743 auto *C = dyn_cast<ConstantSDNode>(Y);
1744 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
1745 if (Subtarget.hasVendorXTHeadBs())
1746 return C != nullptr;
1747 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
1748 return C && C->getAPIntValue().ule(10);
1749}
1750
1752 EVT VT) const {
1753 // Only enable for rvv.
1754 if (!VT.isVector() || !Subtarget.hasVInstructions())
1755 return false;
1756
1757 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
1758 return false;
1759
1760 return true;
1761}
1762
1764 Type *Ty) const {
1765 assert(Ty->isIntegerTy());
1766
1767 unsigned BitSize = Ty->getIntegerBitWidth();
1768 if (BitSize > Subtarget.getXLen())
1769 return false;
1770
1771 // Fast path, assume 32-bit immediates are cheap.
1772 int64_t Val = Imm.getSExtValue();
1773 if (isInt<32>(Val))
1774 return true;
1775
1776 // A constant pool entry may be more aligned thant he load we're trying to
1777 // replace. If we don't support unaligned scalar mem, prefer the constant
1778 // pool.
1779 // TODO: Can the caller pass down the alignment?
1780 if (!Subtarget.enableUnalignedScalarMem())
1781 return true;
1782
1783 // Prefer to keep the load if it would require many instructions.
1784 // This uses the same threshold we use for constant pools but doesn't
1785 // check useConstantPoolForLargeInts.
1786 // TODO: Should we keep the load only when we're definitely going to emit a
1787 // constant pool?
1788
1790 RISCVMatInt::generateInstSeq(Val, Subtarget.getFeatureBits());
1791 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
1792}
1793
1797 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1798 SelectionDAG &DAG) const {
1799 // One interesting pattern that we'd want to form is 'bit extract':
1800 // ((1 >> Y) & 1) ==/!= 0
1801 // But we also need to be careful not to try to reverse that fold.
1802
1803 // Is this '((1 >> Y) & 1)'?
1804 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
1805 return false; // Keep the 'bit extract' pattern.
1806
1807 // Will this be '((1 >> Y) & 1)' after the transform?
1808 if (NewShiftOpcode == ISD::SRL && CC->isOne())
1809 return true; // Do form the 'bit extract' pattern.
1810
1811 // If 'X' is a constant, and we transform, then we will immediately
1812 // try to undo the fold, thus causing endless combine loop.
1813 // So only do the transform if X is not a constant. This matches the default
1814 // implementation of this function.
1815 return !XC;
1816}
1817
1818bool RISCVTargetLowering::canSplatOperand(unsigned Opcode, int Operand) const {
1819 switch (Opcode) {
1820 case Instruction::Add:
1821 case Instruction::Sub:
1822 case Instruction::Mul:
1823 case Instruction::And:
1824 case Instruction::Or:
1825 case Instruction::Xor:
1826 case Instruction::FAdd:
1827 case Instruction::FSub:
1828 case Instruction::FMul:
1829 case Instruction::FDiv:
1830 case Instruction::ICmp:
1831 case Instruction::FCmp:
1832 return true;
1833 case Instruction::Shl:
1834 case Instruction::LShr:
1835 case Instruction::AShr:
1836 case Instruction::UDiv:
1837 case Instruction::SDiv:
1838 case Instruction::URem:
1839 case Instruction::SRem:
1840 return Operand == 1;
1841 default:
1842 return false;
1843 }
1844}
1845
1846
1848 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1849 return false;
1850
1851 if (canSplatOperand(I->getOpcode(), Operand))
1852 return true;
1853
1854 auto *II = dyn_cast<IntrinsicInst>(I);
1855 if (!II)
1856 return false;
1857
1858 switch (II->getIntrinsicID()) {
1859 case Intrinsic::fma:
1860 case Intrinsic::vp_fma:
1861 return Operand == 0 || Operand == 1;
1862 case Intrinsic::vp_shl:
1863 case Intrinsic::vp_lshr:
1864 case Intrinsic::vp_ashr:
1865 case Intrinsic::vp_udiv:
1866 case Intrinsic::vp_sdiv:
1867 case Intrinsic::vp_urem:
1868 case Intrinsic::vp_srem:
1869 return Operand == 1;
1870 // These intrinsics are commutative.
1871 case Intrinsic::vp_add:
1872 case Intrinsic::vp_mul:
1873 case Intrinsic::vp_and:
1874 case Intrinsic::vp_or:
1875 case Intrinsic::vp_xor:
1876 case Intrinsic::vp_fadd:
1877 case Intrinsic::vp_fmul:
1878 case Intrinsic::vp_icmp:
1879 case Intrinsic::vp_fcmp:
1880 // These intrinsics have 'vr' versions.
1881 case Intrinsic::vp_sub:
1882 case Intrinsic::vp_fsub:
1883 case Intrinsic::vp_fdiv:
1884 return Operand == 0 || Operand == 1;
1885 default:
1886 return false;
1887 }
1888}
1889
1890/// Check if sinking \p I's operands to I's basic block is profitable, because
1891/// the operands can be folded into a target instruction, e.g.
1892/// splats of scalars can fold into vector instructions.
1894 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
1895 using namespace llvm::PatternMatch;
1896
1897 if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
1898 return false;
1899
1900 for (auto OpIdx : enumerate(I->operands())) {
1901 if (!canSplatOperand(I, OpIdx.index()))
1902 continue;
1903
1904 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1905 // Make sure we are not already sinking this operand
1906 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
1907 continue;
1908
1909 // We are looking for a splat that can be sunk.
1911 m_Undef(), m_ZeroMask())))
1912 continue;
1913
1914 // Don't sink i1 splats.
1915 if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
1916 continue;
1917
1918 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
1919 // and vector registers
1920 for (Use &U : Op->uses()) {
1921 Instruction *Insn = cast<Instruction>(U.getUser());
1922 if (!canSplatOperand(Insn, U.getOperandNo()))
1923 return false;
1924 }
1925
1926 Ops.push_back(&Op->getOperandUse(0));
1927 Ops.push_back(&OpIdx.value());
1928 }
1929 return true;
1930}
1931
1933 unsigned Opc = VecOp.getOpcode();
1934
1935 // Assume target opcodes can't be scalarized.
1936 // TODO - do we have any exceptions?
1937 if (Opc >= ISD::BUILTIN_OP_END)
1938 return false;
1939
1940 // If the vector op is not supported, try to convert to scalar.
1941 EVT VecVT = VecOp.getValueType();
1942 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
1943 return true;
1944
1945 // If the vector op is supported, but the scalar op is not, the transform may
1946 // not be worthwhile.
1947 // Permit a vector binary operation can be converted to scalar binary
1948 // operation which is custom lowered with illegal type.
1949 EVT ScalarVT = VecVT.getScalarType();
1950 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
1951 isOperationCustom(Opc, ScalarVT);
1952}
1953
1955 const GlobalAddressSDNode *GA) const {
1956 // In order to maximise the opportunity for common subexpression elimination,
1957 // keep a separate ADD node for the global address offset instead of folding
1958 // it in the global address node. Later peephole optimisations may choose to
1959 // fold it back in when profitable.
1960 return false;
1961}
1962
1963// Returns 0-31 if the fli instruction is available for the type and this is
1964// legal FP immediate for the type. Returns -1 otherwise.
1966 if (!Subtarget.hasStdExtZfa())
1967 return -1;
1968
1969 bool IsSupportedVT = false;
1970 if (VT == MVT::f16) {
1971 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1972 } else if (VT == MVT::f32) {
1973 IsSupportedVT = true;
1974 } else if (VT == MVT::f64) {
1975 assert(Subtarget.hasStdExtD() && "Expect D extension");
1976 IsSupportedVT = true;
1977 }
1978
1979 if (!IsSupportedVT)
1980 return -1;
1981
1982 return RISCVLoadFPImm::getLoadFPImm(Imm);
1983}
1984
1986 bool ForCodeSize) const {
1987 bool IsLegalVT = false;
1988 if (VT == MVT::f16)
1989 IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
1990 else if (VT == MVT::f32)
1991 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
1992 else if (VT == MVT::f64)
1993 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
1994 else if (VT == MVT::bf16)
1995 IsLegalVT = Subtarget.hasStdExtZfbfmin();
1996
1997 if (!IsLegalVT)
1998 return false;
1999
2000 if (getLegalZfaFPImm(Imm, VT) >= 0)
2001 return true;
2002
2003 // Cannot create a 64 bit floating-point immediate value for rv32.
2004 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2005 // td can handle +0.0 or -0.0 already.
2006 // -0.0 can be created by fmv + fneg.
2007 return Imm.isZero();
2008 }
2009 // Special case: the cost for -0.0 is 1.
2010 int Cost = Imm.isNegZero()
2011 ? 1
2012 : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2013 Subtarget.getXLen(),
2014 Subtarget.getFeatureBits());
2015 // If the constantpool data is already in cache, only Cost 1 is cheaper.
2016 return Cost < FPImmCost;
2017}
2018
2019// TODO: This is very conservative.
2021 unsigned Index) const {
2023 return false;
2024
2025 // Only support extracting a fixed from a fixed vector for now.
2026 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2027 return false;
2028
2029 unsigned ResElts = ResVT.getVectorNumElements();
2030 unsigned SrcElts = SrcVT.getVectorNumElements();
2031
2032 // Convervatively only handle extracting half of a vector.
2033 // TODO: Relax this.
2034 if ((ResElts * 2) != SrcElts)
2035 return false;
2036
2037 // The smallest type we can slide is i8.
2038 // TODO: We can extract index 0 from a mask vector without a slide.
2039 if (ResVT.getVectorElementType() == MVT::i1)
2040 return false;
2041
2042 // Slide can support arbitrary index, but we only treat vslidedown.vi as
2043 // cheap.
2044 if (Index >= 32)
2045 return false;
2046
2047 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2048 // the upper half of a vector until we have more test coverage.
2049 return Index == 0 || Index == ResElts;
2050}
2051
2054 EVT VT) const {
2055 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2056 // We might still end up using a GPR but that will be decided based on ABI.
2057 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2059 return MVT::f32;
2060
2062}
2063
2066 EVT VT) const {
2067 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2068 // We might still end up using a GPR but that will be decided based on ABI.
2069 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2071 return 1;
2072
2074}
2075
2076// Changes the condition code and swaps operands if necessary, so the SetCC
2077// operation matches one of the comparisons supported directly by branches
2078// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2079// with 1/-1.
2080static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2081 ISD::CondCode &CC, SelectionDAG &DAG) {
2082 // If this is a single bit test that can't be handled by ANDI, shift the
2083 // bit to be tested to the MSB and perform a signed compare with 0.
2084 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2085 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2086 isa<ConstantSDNode>(LHS.getOperand(1))) {
2087 uint64_t Mask = LHS.getConstantOperandVal(1);
2088 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2089 unsigned ShAmt = 0;
2090 if (isPowerOf2_64(Mask)) {
2092 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2093 } else {
2094 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2095 }
2096
2097 LHS = LHS.getOperand(0);
2098 if (ShAmt != 0)
2099 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2100 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2101 return;
2102 }
2103 }
2104
2105 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2106 int64_t C = RHSC->getSExtValue();
2107 switch (CC) {
2108 default: break;
2109 case ISD::SETGT:
2110 // Convert X > -1 to X >= 0.
2111 if (C == -1) {
2112 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2113 CC = ISD::SETGE;
2114 return;
2115 }
2116 break;
2117 case ISD::SETLT:
2118 // Convert X < 1 to 0 <= X.
2119 if (C == 1) {
2120 RHS = LHS;
2121 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2122 CC = ISD::SETGE;
2123 return;
2124 }
2125 break;
2126 }
2127 }
2128
2129 switch (CC) {
2130 default:
2131 break;
2132 case ISD::SETGT:
2133 case ISD::SETLE:
2134 case ISD::SETUGT:
2135 case ISD::SETULE:
2137 std::swap(LHS, RHS);
2138 break;
2139 }
2140}
2141
2143 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2144 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2145 if (VT.getVectorElementType() == MVT::i1)
2146 KnownSize *= 8;
2147
2148 switch (KnownSize) {
2149 default:
2150 llvm_unreachable("Invalid LMUL.");
2151 case 8:
2153 case 16:
2155 case 32:
2157 case 64:
2159 case 128:
2161 case 256:
2163 case 512:
2165 }
2166}
2167
2169 switch (LMul) {
2170 default:
2171 llvm_unreachable("Invalid LMUL.");
2176 return RISCV::VRRegClassID;
2178 return RISCV::VRM2RegClassID;
2180 return RISCV::VRM4RegClassID;
2182 return RISCV::VRM8RegClassID;
2183 }
2184}
2185
2187 RISCVII::VLMUL LMUL = getLMUL(VT);
2188 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2189 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2190 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2191 LMUL == RISCVII::VLMUL::LMUL_1) {
2192 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2193 "Unexpected subreg numbering");
2194 return RISCV::sub_vrm1_0 + Index;
2195 }
2196 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2197 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2198 "Unexpected subreg numbering");
2199 return RISCV::sub_vrm2_0 + Index;
2200 }
2201 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2202 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2203 "Unexpected subreg numbering");
2204 return RISCV::sub_vrm4_0 + Index;
2205 }
2206 llvm_unreachable("Invalid vector type.");
2207}
2208
2210 if (VT.getVectorElementType() == MVT::i1)
2211 return RISCV::VRRegClassID;
2212 return getRegClassIDForLMUL(getLMUL(VT));
2213}
2214
2215// Attempt to decompose a subvector insert/extract between VecVT and
2216// SubVecVT via subregister indices. Returns the subregister index that
2217// can perform the subvector insert/extract with the given element index, as
2218// well as the index corresponding to any leftover subvectors that must be
2219// further inserted/extracted within the register class for SubVecVT.
2220std::pair<unsigned, unsigned>
2222 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2223 const RISCVRegisterInfo *TRI) {
2224 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2225 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2226 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2227 "Register classes not ordered");
2228 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2229 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2230 // Try to compose a subregister index that takes us from the incoming
2231 // LMUL>1 register class down to the outgoing one. At each step we half
2232 // the LMUL:
2233 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2234 // Note that this is not guaranteed to find a subregister index, such as
2235 // when we are extracting from one VR type to another.
2236 unsigned SubRegIdx = RISCV::NoSubRegister;
2237 for (const unsigned RCID :
2238 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2239 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2240 VecVT = VecVT.getHalfNumVectorElementsVT();
2241 bool IsHi =
2242 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2243 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2244 getSubregIndexByMVT(VecVT, IsHi));
2245 if (IsHi)
2246 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2247 }
2248 return {SubRegIdx, InsertExtractIdx};
2249}
2250
2251// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2252// stores for those types.
2253bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2254 return !Subtarget.useRVVForFixedLengthVectors() ||
2255 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2256}
2257
2259 if (!ScalarTy.isSimple())
2260 return false;
2261 switch (ScalarTy.getSimpleVT().SimpleTy) {
2262 case MVT::iPTR:
2263 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2264 case MVT::i8:
2265 case MVT::i16:
2266 case MVT::i32:
2267 return true;
2268 case MVT::i64:
2269 return Subtarget.hasVInstructionsI64();
2270 case MVT::f16:
2271 return Subtarget.hasVInstructionsF16();
2272 case MVT::f32:
2273 return Subtarget.hasVInstructionsF32();
2274 case MVT::f64:
2275 return Subtarget.hasVInstructionsF64();
2276 default:
2277 return false;
2278 }
2279}
2280
2281
2282unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2283 return NumRepeatedDivisors;
2284}
2285
2287 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2288 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2289 "Unexpected opcode");
2290 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2291 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2293 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2294 if (!II)
2295 return SDValue();
2296 return Op.getOperand(II->VLOperand + 1 + HasChain);
2297}
2298
2300 const RISCVSubtarget &Subtarget) {
2301 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2302 if (!Subtarget.useRVVForFixedLengthVectors())
2303 return false;
2304
2305 // We only support a set of vector types with a consistent maximum fixed size
2306 // across all supported vector element types to avoid legalization issues.
2307 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2308 // fixed-length vector type we support is 1024 bytes.
2309 if (VT.getFixedSizeInBits() > 1024 * 8)
2310 return false;
2311
2312 unsigned MinVLen = Subtarget.getRealMinVLen();
2313
2314 MVT EltVT = VT.getVectorElementType();
2315
2316 // Don't use RVV for vectors we cannot scalarize if required.
2317 switch (EltVT.SimpleTy) {
2318 // i1 is supported but has different rules.
2319 default:
2320 return false;
2321 case MVT::i1:
2322 // Masks can only use a single register.
2323 if (VT.getVectorNumElements() > MinVLen)
2324 return false;
2325 MinVLen /= 8;
2326 break;
2327 case MVT::i8:
2328 case MVT::i16:
2329 case MVT::i32:
2330 break;
2331 case MVT::i64:
2332 if (!Subtarget.hasVInstructionsI64())
2333 return false;
2334 break;
2335 case MVT::f16:
2336 if (!Subtarget.hasVInstructionsF16Minimal())
2337 return false;
2338 break;
2339 case MVT::f32:
2340 if (!Subtarget.hasVInstructionsF32())
2341 return false;
2342 break;
2343 case MVT::f64:
2344 if (!Subtarget.hasVInstructionsF64())
2345 return false;
2346 break;
2347 }
2348
2349 // Reject elements larger than ELEN.
2350 if (EltVT.getSizeInBits() > Subtarget.getELen())
2351 return false;
2352
2353 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2354 // Don't use RVV for types that don't fit.
2355 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2356 return false;
2357
2358 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2359 // the base fixed length RVV support in place.
2360 if (!VT.isPow2VectorType())
2361 return false;
2362
2363 return true;
2364}
2365
2366bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2367 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2368}
2369
2370// Return the largest legal scalable vector type that matches VT's element type.
2372 const RISCVSubtarget &Subtarget) {
2373 // This may be called before legal types are setup.
2374 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2375 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2376 "Expected legal fixed length vector!");
2377
2378 unsigned MinVLen = Subtarget.getRealMinVLen();
2379 unsigned MaxELen = Subtarget.getELen();
2380
2381 MVT EltVT = VT.getVectorElementType();
2382 switch (EltVT.SimpleTy) {
2383 default:
2384 llvm_unreachable("unexpected element type for RVV container");
2385 case MVT::i1:
2386 case MVT::i8:
2387 case MVT::i16:
2388 case MVT::i32:
2389 case MVT::i64:
2390 case MVT::f16:
2391 case MVT::f32:
2392 case MVT::f64: {
2393 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2394 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2395 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2396 unsigned NumElts =
2398 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2399 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2400 return MVT::getScalableVectorVT(EltVT, NumElts);
2401 }
2402 }
2403}
2404
2406 const RISCVSubtarget &Subtarget) {
2408 Subtarget);
2409}
2410
2412 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2413}
2414
2415// Grow V to consume an entire RVV register.
2417 const RISCVSubtarget &Subtarget) {
2418 assert(VT.isScalableVector() &&
2419 "Expected to convert into a scalable vector!");
2420 assert(V.getValueType().isFixedLengthVector() &&
2421 "Expected a fixed length vector operand!");
2422 SDLoc DL(V);
2423 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2424 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2425}
2426
2427// Shrink V so it's just big enough to maintain a VT's worth of data.
2429 const RISCVSubtarget &Subtarget) {
2431 "Expected to convert into a fixed length vector!");
2432 assert(V.getValueType().isScalableVector() &&
2433 "Expected a scalable vector operand!");
2434 SDLoc DL(V);
2435 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2436 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2437}
2438
2439/// Return the type of the mask type suitable for masking the provided
2440/// vector type. This is simply an i1 element type vector of the same
2441/// (possibly scalable) length.
2442static MVT getMaskTypeFor(MVT VecVT) {
2443 assert(VecVT.isVector());
2445 return MVT::getVectorVT(MVT::i1, EC);
2446}
2447
2448/// Creates an all ones mask suitable for masking a vector of type VecTy with
2449/// vector length VL. .
2450static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2451 SelectionDAG &DAG) {
2452 MVT MaskVT = getMaskTypeFor(VecVT);
2453 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2454}
2455
2456static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
2457 const RISCVSubtarget &Subtarget) {
2458 return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2459}
2460
2461static std::pair<SDValue, SDValue>
2462getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2463 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2464 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2465 SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
2466 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2467 return {Mask, VL};
2468}
2469
2470// Gets the two common "VL" operands: an all-ones mask and the vector length.
2471// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2472// the vector type that the fixed-length vector is contained in. Otherwise if
2473// VecVT is scalable, then ContainerVT should be the same as VecVT.
2474static std::pair<SDValue, SDValue>
2475getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2476 const RISCVSubtarget &Subtarget) {
2477 if (VecVT.isFixedLengthVector())
2478 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2479 Subtarget);
2480 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2481 MVT XLenVT = Subtarget.getXLenVT();
2482 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
2483 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2484 return {Mask, VL};
2485}
2486
2487// As above but assuming the given type is a scalable vector type.
2488static std::pair<SDValue, SDValue>
2490 const RISCVSubtarget &Subtarget) {
2491 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2492 return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
2493}
2494
2496 SelectionDAG &DAG) const {
2497 assert(VecVT.isScalableVector() && "Expected scalable vector");
2498 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2499 VecVT.getVectorElementCount());
2500}
2501
2502// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2503// of either is (currently) supported. This can get us into an infinite loop
2504// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2505// as a ..., etc.
2506// Until either (or both) of these can reliably lower any node, reporting that
2507// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2508// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2509// which is not desirable.
2511 EVT VT, unsigned DefinedValues) const {
2512 return false;
2513}
2514
2516 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2517 // implementation-defined.
2518 if (!VT.isVector())
2520 unsigned DLenFactor = Subtarget.getDLenFactor();
2521 unsigned Cost;
2522 if (VT.isScalableVector()) {
2523 unsigned LMul;
2524 bool Fractional;
2525 std::tie(LMul, Fractional) =
2527 if (Fractional)
2528 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2529 else
2530 Cost = (LMul * DLenFactor);
2531 } else {
2532 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2533 }
2534 return Cost;
2535}
2536
2537
2538/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2539/// is generally quadratic in the number of vreg implied by LMUL. Note that
2540/// operand (index and possibly mask) are handled separately.
2542 return getLMULCost(VT) * getLMULCost(VT);
2543}
2544
2545/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2546/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2547/// or may track the vrgather.vv cost. It is implementation-dependent.
2549 return getLMULCost(VT);
2550}
2551
2552/// Return the cost of a vslidedown.vi/vx or vslideup.vi/vx instruction
2553/// for the type VT. (This does not cover the vslide1up or vslide1down
2554/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2555/// or may track the vrgather.vv cost. It is implementation-dependent.
2557 return getLMULCost(VT);
2558}
2559
2561 const RISCVSubtarget &Subtarget) {
2562 // RISC-V FP-to-int conversions saturate to the destination register size, but
2563 // don't produce 0 for nan. We can use a conversion instruction and fix the
2564 // nan case with a compare and a select.
2565 SDValue Src = Op.getOperand(0);
2566
2567 MVT DstVT = Op.getSimpleValueType();
2568 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2569
2570 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2571
2572 if (!DstVT.isVector()) {
2573 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2574 // the result.
2575 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2576 Src.getValueType() == MVT::bf16) {
2577 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2578 }
2579
2580 unsigned Opc;
2581 if (SatVT == DstVT)
2582 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2583 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2585 else
2586 return SDValue();
2587 // FIXME: Support other SatVTs by clamping before or after the conversion.
2588
2589 SDLoc DL(Op);
2590 SDValue FpToInt = DAG.getNode(
2591 Opc, DL, DstVT, Src,
2593
2594 if (Opc == RISCVISD::FCVT_WU_RV64)
2595 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2596
2597 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2598 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2600 }
2601
2602 // Vectors.
2603
2604 MVT DstEltVT = DstVT.getVectorElementType();
2605 MVT SrcVT = Src.getSimpleValueType();
2606 MVT SrcEltVT = SrcVT.getVectorElementType();
2607 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2608 unsigned DstEltSize = DstEltVT.getSizeInBits();
2609
2610 // Only handle saturating to the destination type.
2611 if (SatVT != DstEltVT)
2612 return SDValue();
2613
2614 // FIXME: Don't support narrowing by more than 1 steps for now.
2615 if (SrcEltSize > (2 * DstEltSize))
2616 return SDValue();
2617
2618 MVT DstContainerVT = DstVT;
2619 MVT SrcContainerVT = SrcVT;
2620 if (DstVT.isFixedLengthVector()) {
2621 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2622 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2623 assert(DstContainerVT.getVectorElementCount() ==
2624 SrcContainerVT.getVectorElementCount() &&
2625 "Expected same element count");
2626 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2627 }
2628
2629 SDLoc DL(Op);
2630
2631 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2632
2633 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2634 {Src, Src, DAG.getCondCode(ISD::SETNE),
2635 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2636
2637 // Need to widen by more than 1 step, promote the FP type, then do a widening
2638 // convert.
2639 if (DstEltSize > (2 * SrcEltSize)) {
2640 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2641 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2642 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2643 }
2644
2645 unsigned RVVOpc =
2647 SDValue Res = DAG.getNode(RVVOpc, DL, DstContainerVT, Src, Mask, VL);
2648
2649 SDValue SplatZero = DAG.getNode(
2650 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
2651 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
2652 Res = DAG.getNode(RISCVISD::VSELECT_VL, DL, DstContainerVT, IsNan, SplatZero,
2653 Res, VL);
2654
2655 if (DstVT.isFixedLengthVector())
2656 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
2657
2658 return Res;
2659}
2660
2662 switch (Opc) {
2663 case ISD::FROUNDEVEN:
2665 case ISD::VP_FROUNDEVEN:
2666 return RISCVFPRndMode::RNE;
2667 case ISD::FTRUNC:
2668 case ISD::STRICT_FTRUNC:
2669 case ISD::VP_FROUNDTOZERO:
2670 return RISCVFPRndMode::RTZ;
2671 case ISD::FFLOOR:
2672 case ISD::STRICT_FFLOOR:
2673 case ISD::VP_FFLOOR:
2674 return RISCVFPRndMode::RDN;
2675 case ISD::FCEIL:
2676 case ISD::STRICT_FCEIL:
2677 case ISD::VP_FCEIL:
2678 return RISCVFPRndMode::RUP;
2679 case ISD::FROUND:
2680 case ISD::STRICT_FROUND:
2681 case ISD::VP_FROUND:
2682 return RISCVFPRndMode::RMM;
2683 case ISD::FRINT:
2684 return RISCVFPRndMode::DYN;
2685 }
2686
2688}
2689
2690// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
2691// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
2692// the integer domain and back. Taking care to avoid converting values that are
2693// nan or already correct.
2694static SDValue
2696 const RISCVSubtarget &Subtarget) {
2697 MVT VT = Op.getSimpleValueType();
2698 assert(VT.isVector() && "Unexpected type");
2699
2700 SDLoc DL(Op);
2701
2702 SDValue Src = Op.getOperand(0);
2703
2704 MVT ContainerVT = VT;
2705 if (VT.isFixedLengthVector()) {
2706 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2707 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2708 }
2709
2710 SDValue Mask, VL;
2711 if (Op->isVPOpcode()) {
2712 Mask = Op.getOperand(1);
2713 if (VT.isFixedLengthVector())
2714 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
2715 Subtarget);
2716 VL = Op.getOperand(2);
2717 } else {
2718 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2719 }
2720
2721 // Freeze the source since we are increasing the number of uses.
2722 Src = DAG.getFreeze(Src);
2723
2724 // We do the conversion on the absolute value and fix the sign at the end.
2725 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2726
2727 // Determine the largest integer that can be represented exactly. This and
2728 // values larger than it don't have any fractional bits so don't need to
2729 // be converted.
2730 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2731 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2732 APFloat MaxVal = APFloat(FltSem);
2733 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2734 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2735 SDValue MaxValNode =
2736 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2737 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2738 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2739
2740 // If abs(Src) was larger than MaxVal or nan, keep it.
2741 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
2742 Mask =
2743 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
2744 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
2745 Mask, Mask, VL});
2746
2747 // Truncate to integer and convert back to FP.
2748 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2749 MVT XLenVT = Subtarget.getXLenVT();
2750 SDValue Truncated;
2751
2752 switch (Op.getOpcode()) {
2753 default:
2754 llvm_unreachable("Unexpected opcode");
2755 case ISD::FCEIL:
2756 case ISD::VP_FCEIL:
2757 case ISD::FFLOOR:
2758 case ISD::VP_FFLOOR:
2759 case ISD::FROUND:
2760 case ISD::FROUNDEVEN:
2761 case ISD::VP_FROUND:
2762 case ISD::VP_FROUNDEVEN:
2763 case ISD::VP_FROUNDTOZERO: {
2766 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
2767 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
2768 break;
2769 }
2770 case ISD::FTRUNC:
2771 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
2772 Mask, VL);
2773 break;
2774 case ISD::FRINT:
2775 case ISD::VP_FRINT:
2776 Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
2777 break;
2778 case ISD::FNEARBYINT:
2779 case ISD::VP_FNEARBYINT:
2780 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
2781 Mask, VL);
2782 break;
2783 }
2784
2785 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2786 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
2787 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
2788 Mask, VL);
2789
2790 // Restore the original sign so that -0.0 is preserved.
2791 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2792 Src, Src, Mask, VL);
2793
2794 if (!VT.isFixedLengthVector())
2795 return Truncated;
2796
2797 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2798}
2799
2800// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
2801// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
2802// qNan and coverting the new source to integer and back to FP.
2803static SDValue
2805 const RISCVSubtarget &Subtarget) {
2806 SDLoc DL(Op);
2807 MVT VT = Op.getSimpleValueType();
2808 SDValue Chain = Op.getOperand(0);
2809 SDValue Src = Op.getOperand(1);
2810
2811 MVT ContainerVT = VT;
2812 if (VT.isFixedLengthVector()) {
2813 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
2814 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
2815 }
2816
2817 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
2818
2819 // Freeze the source since we are increasing the number of uses.
2820 Src = DAG.getFreeze(Src);
2821
2822 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
2823 MVT MaskVT = Mask.getSimpleValueType();
2825 DAG.getVTList(MaskVT, MVT::Other),
2826 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2827 DAG.getUNDEF(MaskVT), Mask, VL});
2828 Chain = Unorder.getValue(1);
2830 DAG.getVTList(ContainerVT, MVT::Other),
2831 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2832 Chain = Src.getValue(1);
2833
2834 // We do the conversion on the absolute value and fix the sign at the end.
2835 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
2836
2837 // Determine the largest integer that can be represented exactly. This and
2838 // values larger than it don't have any fractional bits so don't need to
2839 // be converted.
2840 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
2841 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2842 APFloat MaxVal = APFloat(FltSem);
2843 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2844 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2845 SDValue MaxValNode =
2846 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
2847 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
2848 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
2849
2850 // If abs(Src) was larger than MaxVal or nan, keep it.
2851 Mask = DAG.getNode(
2852 RISCVISD::SETCC_VL, DL, MaskVT,
2853 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
2854
2855 // Truncate to integer and convert back to FP.
2856 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
2857 MVT XLenVT = Subtarget.getXLenVT();
2858 SDValue Truncated;
2859
2860 switch (Op.getOpcode()) {
2861 default:
2862 llvm_unreachable("Unexpected opcode");
2863 case ISD::STRICT_FCEIL:
2864 case ISD::STRICT_FFLOOR:
2865 case ISD::STRICT_FROUND:
2869 Truncated = DAG.getNode(
2870 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
2871 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
2872 break;
2873 }
2874 case ISD::STRICT_FTRUNC:
2875 Truncated =
2877 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
2878 break;
2881 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
2882 Mask, VL);
2883 break;
2884 }
2885 Chain = Truncated.getValue(1);
2886
2887 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
2888 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
2889 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
2890 DAG.getVTList(ContainerVT, MVT::Other), Chain,
2891 Truncated, Mask, VL);
2892 Chain = Truncated.getValue(1);
2893 }
2894
2895 // Restore the original sign so that -0.0 is preserved.
2896 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
2897 Src, Src, Mask, VL);
2898
2899 if (VT.isFixedLengthVector())
2900 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2901 return DAG.getMergeValues({Truncated, Chain}, DL);
2902}
2903
2904static SDValue
2906 const RISCVSubtarget &Subtarget) {
2907 MVT VT = Op.getSimpleValueType();
2908 if (VT.isVector())
2909 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
2910
2911 if (DAG.shouldOptForSize())
2912 return SDValue();
2913
2914 SDLoc DL(Op);
2915 SDValue Src = Op.getOperand(0);
2916
2917 // Create an integer the size of the mantissa with the MSB set. This and all
2918 // values larger than it don't have any fractional bits so don't need to be
2919 // converted.
2920 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
2921 unsigned Precision = APFloat::semanticsPrecision(FltSem);
2922 APFloat MaxVal = APFloat(FltSem);
2923 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
2924 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
2925 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
2926
2928 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
2929 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
2930}
2931
2932static SDValue
2934 const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
2935 SDValue Offset, SDValue Mask, SDValue VL,
2937 if (Merge.isUndef())
2939 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2940 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2941 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
2942}
2943
2944static SDValue
2945getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
2947 SDValue VL,
2949 if (Merge.isUndef())
2951 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
2952 SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
2953 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
2954}
2955
2959 int64_t Addend;
2960};
2961
2962static std::optional<uint64_t> getExactInteger(const APFloat &APF,
2964 APSInt ValInt(BitWidth, !APF.isNegative());
2965 // We use an arbitrary rounding mode here. If a floating-point is an exact
2966 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
2967 // the rounding mode changes the output value, then it is not an exact
2968 // integer.
2970 bool IsExact;
2971 // If it is out of signed integer range, it will return an invalid operation.
2972 // If it is not an exact integer, IsExact is false.
2973 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
2975 !IsExact)
2976 return std::nullopt;
2977 return ValInt.extractBitsAsZExtValue(BitWidth, 0);
2978}
2979
2980// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
2981// to the (non-zero) step S and start value X. This can be then lowered as the
2982// RVV sequence (VID * S) + X, for example.
2983// The step S is represented as an integer numerator divided by a positive
2984// denominator. Note that the implementation currently only identifies
2985// sequences in which either the numerator is +/- 1 or the denominator is 1. It
2986// cannot detect 2/3, for example.
2987// Note that this method will also match potentially unappealing index
2988// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
2989// determine whether this is worth generating code for.
2990static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
2991 unsigned NumElts = Op.getNumOperands();
2992 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
2993 bool IsInteger = Op.getValueType().isInteger();
2994
2995 std::optional<unsigned> SeqStepDenom;
2996 std::optional<int64_t> SeqStepNum, SeqAddend;
2997 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2998 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2999 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3000 // Assume undef elements match the sequence; we just have to be careful
3001 // when interpolating across them.
3002 if (Op.getOperand(Idx).isUndef())
3003 continue;
3004
3005 uint64_t Val;
3006 if (IsInteger) {
3007 // The BUILD_VECTOR must be all constants.
3008 if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
3009 return std::nullopt;
3010 Val = Op.getConstantOperandVal(Idx) &
3011 maskTrailingOnes<uint64_t>(EltSizeInBits);
3012 } else {
3013 // The BUILD_VECTOR must be all constants.
3014 if (!isa<ConstantFPSDNode>(Op.getOperand(Idx)))
3015 return std::nullopt;
3016 if (auto ExactInteger = getExactInteger(
3017 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3018 EltSizeInBits))
3019 Val = *ExactInteger;
3020 else
3021 return std::nullopt;
3022 }
3023
3024 if (PrevElt) {
3025 // Calculate the step since the last non-undef element, and ensure
3026 // it's consistent across the entire sequence.
3027 unsigned IdxDiff = Idx - PrevElt->second;
3028 int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
3029
3030 // A zero-value value difference means that we're somewhere in the middle
3031 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3032 // step change before evaluating the sequence.
3033 if (ValDiff == 0)
3034 continue;
3035
3036 int64_t Remainder = ValDiff % IdxDiff;
3037 // Normalize the step if it's greater than 1.
3038 if (Remainder != ValDiff) {
3039 // The difference must cleanly divide the element span.
3040 if (Remainder != 0)
3041 return std::nullopt;
3042 ValDiff /= IdxDiff;
3043 IdxDiff = 1;
3044 }
3045
3046 if (!SeqStepNum)
3047 SeqStepNum = ValDiff;
3048 else if (ValDiff != SeqStepNum)
3049 return std::nullopt;
3050
3051 if (!SeqStepDenom)
3052 SeqStepDenom = IdxDiff;
3053 else if (IdxDiff != *SeqStepDenom)
3054 return std::nullopt;
3055 }
3056
3057 // Record this non-undef element for later.
3058 if (!PrevElt || PrevElt->first != Val)
3059 PrevElt = std::make_pair(Val, Idx);
3060 }
3061
3062 // We need to have logged a step for this to count as a legal index sequence.
3063 if (!SeqStepNum || !SeqStepDenom)
3064 return std::nullopt;
3065
3066 // Loop back through the sequence and validate elements we might have skipped
3067 // while waiting for a valid step. While doing this, log any sequence addend.
3068 for (unsigned Idx = 0; Idx < NumElts; Idx++) {
3069 if (Op.getOperand(Idx).isUndef())
3070 continue;
3071 uint64_t Val;
3072 if (IsInteger) {
3073 Val = Op.getConstantOperandVal(Idx) &
3074 maskTrailingOnes<uint64_t>(EltSizeInBits);
3075 } else {
3076 Val = *getExactInteger(
3077 cast<ConstantFPSDNode>(Op.getOperand(Idx))->getValueAPF(),
3078 EltSizeInBits);
3079 }
3080 uint64_t ExpectedVal =
3081 (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
3082 int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
3083 if (!SeqAddend)
3084 SeqAddend = Addend;
3085 else if (Addend != SeqAddend)
3086 return std::nullopt;
3087 }
3088
3089 assert(SeqAddend && "Must have an addend if we have a step");
3090
3091 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3092}
3093
3094// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3095// and lower it as a VRGATHER_VX_VL from the source vector.
3096static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3097 SelectionDAG &DAG,
3098 const RISCVSubtarget &Subtarget) {
3099 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3100 return SDValue();
3101 SDValue Vec = SplatVal.getOperand(0);
3102 // Only perform this optimization on vectors of the same size for simplicity.
3103 // Don't perform this optimization for i1 vectors.
3104 // FIXME: Support i1 vectors, maybe by promoting to i8?
3105 if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
3106 return SDValue();
3107 SDValue Idx = SplatVal.getOperand(1);
3108 // The index must be a legal type.
3109 if (Idx.getValueType() != Subtarget.getXLenVT())
3110 return SDValue();
3111
3112 MVT ContainerVT = VT;
3113 if (VT.isFixedLengthVector()) {
3114 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3115 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3116 }
3117
3118 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3119
3120 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3121 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3122
3123 if (!VT.isFixedLengthVector())
3124 return Gather;
3125
3126 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3127}
3128
3129
3130/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3131/// which constitute a large proportion of the elements. In such cases we can
3132/// splat a vector with the dominant element and make up the shortfall with
3133/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3134/// Note that this includes vectors of 2 elements by association. The
3135/// upper-most element is the "dominant" one, allowing us to use a splat to
3136/// "insert" the upper element, and an insert of the lower element at position
3137/// 0, which improves codegen.
3139 const RISCVSubtarget &Subtarget) {
3140 MVT VT = Op.getSimpleValueType();
3141 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3142
3143 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3144
3145 SDLoc DL(Op);
3146 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3147
3148 MVT XLenVT = Subtarget.getXLenVT();
3149 unsigned NumElts = Op.getNumOperands();
3150
3151 SDValue DominantValue;
3152 unsigned MostCommonCount = 0;
3153 DenseMap<SDValue, unsigned> ValueCounts;
3154 unsigned NumUndefElts =
3155 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3156
3157 // Track the number of scalar loads we know we'd be inserting, estimated as
3158 // any non-zero floating-point constant. Other kinds of element are either
3159 // already in registers or are materialized on demand. The threshold at which
3160 // a vector load is more desirable than several scalar materializion and
3161 // vector-insertion instructions is not known.
3162 unsigned NumScalarLoads = 0;
3163
3164 for (SDValue V : Op->op_values()) {
3165 if (V.isUndef())
3166 continue;
3167
3168 ValueCounts.insert(std::make_pair(V, 0));
3169 unsigned &Count = ValueCounts[V];
3170 if (0 == Count)
3171 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3172 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3173
3174 // Is this value dominant? In case of a tie, prefer the highest element as
3175 // it's cheaper to insert near the beginning of a vector than it is at the
3176 // end.
3177 if (++Count >= MostCommonCount) {
3178 DominantValue = V;
3179 MostCommonCount = Count;
3180 }
3181 }
3182
3183 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3184 unsigned NumDefElts = NumElts - NumUndefElts;
3185 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3186
3187 // Don't perform this optimization when optimizing for size, since
3188 // materializing elements and inserting them tends to cause code bloat.
3189 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3190 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3191 ((MostCommonCount > DominantValueCountThreshold) ||
3192 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3193 // Start by splatting the most common element.
3194 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3195
3196 DenseSet<SDValue> Processed{DominantValue};
3197
3198 // We can handle an insert into the last element (of a splat) via
3199 // v(f)slide1down. This is slightly better than the vslideup insert
3200 // lowering as it avoids the need for a vector group temporary. It
3201 // is also better than using vmerge.vx as it avoids the need to
3202 // materialize the mask in a vector register.
3203 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3204 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3205 LastOp != DominantValue) {
3206 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3207 auto OpCode =
3209 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3210 LastOp, Mask, VL);
3211 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3212 Processed.insert(LastOp);
3213 }
3214
3215 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3216 for (const auto &OpIdx : enumerate(Op->ops())) {
3217 const SDValue &V = OpIdx.value();
3218 if (V.isUndef() || !Processed.insert(V).second)
3219 continue;
3220 if (ValueCounts[V] == 1) {
3221 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3222 DAG.getConstant(OpIdx.index(), DL, XLenVT));
3223 } else {
3224 // Blend in all instances of this value using a VSELECT, using a
3225 // mask where each bit signals whether that element is the one
3226 // we're after.
3228 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3229 return DAG.getConstant(V == V1, DL, XLenVT);
3230 });
3231 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3232 DAG.getBuildVector(SelMaskTy, DL, Ops),
3233 DAG.getSplatBuildVector(VT, DL, V), Vec);
3234 }
3235 }
3236
3237 return Vec;
3238 }
3239
3240 return SDValue();
3241}
3242
3244 const RISCVSubtarget &Subtarget) {
3245 MVT VT = Op.getSimpleValueType();
3246 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3247
3248 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3249
3250 SDLoc DL(Op);
3251 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3252
3253 MVT XLenVT = Subtarget.getXLenVT();
3254 unsigned NumElts = Op.getNumOperands();
3255
3256 if (VT.getVectorElementType() == MVT::i1) {
3257 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3258 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3259 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3260 }
3261
3262 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3263 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3264 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3265 }
3266
3267 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3268 // scalar integer chunks whose bit-width depends on the number of mask
3269 // bits and XLEN.
3270 // First, determine the most appropriate scalar integer type to use. This
3271 // is at most XLenVT, but may be shrunk to a smaller vector element type
3272 // according to the size of the final vector - use i8 chunks rather than
3273 // XLenVT if we're producing a v8i1. This results in more consistent
3274 // codegen across RV32 and RV64.
3275 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3276 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3277 // If we have to use more than one INSERT_VECTOR_ELT then this
3278 // optimization is likely to increase code size; avoid peforming it in
3279 // such a case. We can use a load from a constant pool in this case.
3280 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3281 return SDValue();
3282 // Now we can create our integer vector type. Note that it may be larger
3283 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3284 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3285 MVT IntegerViaVecVT =
3286 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3287 IntegerViaVecElts);
3288
3289 uint64_t Bits = 0;
3290 unsigned BitPos = 0, IntegerEltIdx = 0;
3291 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3292
3293 for (unsigned I = 0; I < NumElts;) {
3294 SDValue V = Op.getOperand(I);
3295 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3296 Bits |= ((uint64_t)BitValue << BitPos);
3297 ++BitPos;
3298 ++I;
3299
3300 // Once we accumulate enough bits to fill our scalar type or process the
3301 // last element, insert into our vector and clear our accumulated data.
3302 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3303 if (NumViaIntegerBits <= 32)
3304 Bits = SignExtend64<32>(Bits);
3305 SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
3306 Elts[IntegerEltIdx] = Elt;
3307 Bits = 0;
3308 BitPos = 0;
3309 IntegerEltIdx++;
3310 }
3311 }
3312
3313 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3314
3315 if (NumElts < NumViaIntegerBits) {
3316 // If we're producing a smaller vector than our minimum legal integer
3317 // type, bitcast to the equivalent (known-legal) mask type, and extract
3318 // our final mask.
3319 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3320 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3321 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3322 DAG.getConstant(0, DL, XLenVT));
3323 } else {
3324 // Else we must have produced an integer type with the same size as the
3325 // mask type; bitcast for the final result.
3326 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3327 Vec = DAG.getBitcast(VT, Vec);
3328 }
3329
3330 return Vec;
3331 }
3332
3333 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3334 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3336 Splat =
3337 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3338 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3339 }
3340
3341 // Try and match index sequences, which we can lower to the vid instruction
3342 // with optional modifications. An all-undef vector is matched by
3343 // getSplatValue, above.
3344 if (auto SimpleVID = isSimpleVIDSequence(Op)) {
3345 int64_t StepNumerator = SimpleVID->StepNumerator;
3346 unsigned StepDenominator = SimpleVID->StepDenominator;
3347 int64_t Addend = SimpleVID->Addend;
3348
3349 assert(StepNumerator != 0 && "Invalid step");
3350 bool Negate = false;
3351 int64_t SplatStepVal = StepNumerator;
3352 unsigned StepOpcode = ISD::MUL;
3353 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3354 // anyway as the shift of 63 won't fit in uimm5.
3355 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3356 isPowerOf2_64(std::abs(StepNumerator))) {
3357 Negate = StepNumerator < 0;
3358 StepOpcode = ISD::SHL;
3359 SplatStepVal = Log2_64(std::abs(StepNumerator));
3360 }
3361
3362 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3363 // threshold since it's the immediate value many RVV instructions accept.
3364 // There is no vmul.vi instruction so ensure multiply constant can fit in
3365 // a single addi instruction.
3366 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3367 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3368 isPowerOf2_32(StepDenominator) &&
3369 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3370 MVT VIDVT =
3372 MVT VIDContainerVT =
3373 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3374 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3375 // Convert right out of the scalable type so we can use standard ISD
3376 // nodes for the rest of the computation. If we used scalable types with
3377 // these, we'd lose the fixed-length vector info and generate worse
3378 // vsetvli code.
3379 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3380 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3381 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3382 SDValue SplatStep = DAG.getSplatBuildVector(
3383 VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
3384 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3385 }
3386 if (StepDenominator != 1) {
3387 SDValue SplatStep = DAG.getSplatBuildVector(
3388 VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
3389 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3390 }
3391 if (Addend != 0 || Negate) {
3392 SDValue SplatAddend = DAG.getSplatBuildVector(
3393 VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
3394 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3395 VID);
3396 }
3397 if (VT.isFloatingPoint()) {
3398 // TODO: Use vfwcvt to reduce register pressure.
3399 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3400 }
3401 return VID;
3402 }
3403 }
3404
3405 // For very small build_vectors, use a single scalar insert of a constant.
3406 // TODO: Base this on constant rematerialization cost, not size.
3407 const unsigned EltBitSize = VT.getScalarSizeInBits();
3408 if (VT.getSizeInBits() <= 32 &&
3410 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3411 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3412 "Unexpected sequence type");
3413 // If we can use the original VL with the modified element type, this
3414 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3415 // be moved into InsertVSETVLI?
3416 unsigned ViaVecLen =
3417 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3418 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3419
3420 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3421 uint64_t SplatValue = 0;
3422 // Construct the amalgamated value at this larger vector type.
3423 for (const auto &OpIdx : enumerate(Op->op_values())) {
3424 const auto &SeqV = OpIdx.value();
3425 if (!SeqV.isUndef())
3426 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3427 << (OpIdx.index() * EltBitSize));
3428 }
3429
3430 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3431 // achieve better constant materializion.
3432 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3433 SplatValue = SignExtend64<32>(SplatValue);
3434
3435 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3436 DAG.getUNDEF(ViaVecVT),
3437 DAG.getConstant(SplatValue, DL, XLenVT),
3438 DAG.getConstant(0, DL, XLenVT));
3439 if (ViaVecLen != 1)
3441 MVT::getVectorVT(ViaIntVT, 1), Vec,
3442 DAG.getConstant(0, DL, XLenVT));
3443 return DAG.getBitcast(VT, Vec);
3444 }
3445
3446
3447 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3448 // when re-interpreted as a vector with a larger element type. For example,
3449 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3450 // could be instead splat as
3451 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3452 // TODO: This optimization could also work on non-constant splats, but it
3453 // would require bit-manipulation instructions to construct the splat value.
3454 SmallVector<SDValue> Sequence;
3455 const auto *BV = cast<BuildVectorSDNode>(Op);
3456 if (VT.isInteger() && EltBitSize < 64 &&
3458 BV->getRepeatedSequence(Sequence) &&
3459 (Sequence.size() * EltBitSize) <= 64) {
3460 unsigned SeqLen = Sequence.size();
3461 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3462 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3463 ViaIntVT == MVT::i64) &&
3464 "Unexpected sequence type");
3465
3466 // If we can use the original VL with the modified element type, this
3467 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3468 // be moved into InsertVSETVLI?
3469 const unsigned RequiredVL = NumElts / SeqLen;
3470 const unsigned ViaVecLen =
3471 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3472 NumElts : RequiredVL;
3473 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3474
3475 unsigned EltIdx = 0;
3476 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3477 uint64_t SplatValue = 0;
3478 // Construct the amalgamated value which can be splatted as this larger
3479 // vector type.
3480 for (const auto &SeqV : Sequence) {
3481 if (!SeqV.isUndef())
3482 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3483 << (EltIdx * EltBitSize));
3484 EltIdx++;
3485 }
3486
3487 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3488 // achieve better constant materializion.
3489 if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
3490 SplatValue = SignExtend64<32>(SplatValue);
3491
3492 // Since we can't introduce illegal i64 types at this stage, we can only
3493 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3494 // way we can use RVV instructions to splat.
3495 assert((ViaIntVT.bitsLE(XLenVT) ||
3496 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3497 "Unexpected bitcast sequence");
3498 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3499 SDValue ViaVL =
3500 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3501 MVT ViaContainerVT =
3502 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3503 SDValue Splat =
3504 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3505 DAG.getUNDEF(ViaContainerVT),
3506 DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
3507 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3508 if (ViaVecLen != RequiredVL)
3510 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3511 DAG.getConstant(0, DL, XLenVT));
3512 return DAG.getBitcast(VT, Splat);
3513 }
3514 }
3515
3516 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3517 return Res;
3518
3519 // If the number of signbits allows, see if we can lower as a <N x i8>.
3520 // We restrict this to N <= 4 to ensure the resulting narrow vector is
3521 // 32 bits of smaller and can thus be materialized cheaply from scalar.
3522 // The main motivation for this is the constant index vector required
3523 // by vrgather.vv. This covers all indice vectors up to size 4.
3524 // TODO: We really should be costing the smaller vector. There are
3525 // profitable cases this misses.
3526 const unsigned ScalarSize =
3527 Op.getSimpleValueType().getScalarSizeInBits();
3528 if (ScalarSize > 8 && NumElts <= 4) {
3529 unsigned SignBits = DAG.ComputeNumSignBits(Op);
3530 if (ScalarSize - SignBits < 8) {
3531 SDValue Source =
3533 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3534 Source, DAG, Subtarget);
3535 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3536 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3537 }
3538 }
3539
3540 // For constant vectors, use generic constant pool lowering. Otherwise,
3541 // we'd have to materialize constants in GPRs just to move them into the
3542 // vector.
3543 return SDValue();
3544}
3545
3547 const RISCVSubtarget &Subtarget) {
3548 MVT VT = Op.getSimpleValueType();
3549 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3550
3551 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
3553 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
3554
3555 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3556
3557 SDLoc DL(Op);
3558 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3559
3560 MVT XLenVT = Subtarget.getXLenVT();
3561
3562 if (VT.getVectorElementType() == MVT::i1) {
3563 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
3564 // vector type, we have a legal equivalently-sized i8 type, so we can use
3565 // that.
3566 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
3567 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
3568
3569 SDValue WideVec;
3570 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3571 // For a splat, perform a scalar truncate before creating the wider
3572 // vector.
3573 assert(Splat.getValueType() == XLenVT &&
3574 "Unexpected type for i1 splat value");
3575 Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
3576 DAG.getConstant(1, DL, XLenVT));
3577 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
3578 } else {
3579 SmallVector<SDValue, 8> Ops(Op->op_values());
3580 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
3581 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
3582 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
3583 }
3584
3585 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
3586 }
3587
3588 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3589 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
3590 return Gather;
3591 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3593 Splat =
3594 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3595 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3596 }
3597
3598 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3599 return Res;
3600
3601 // Cap the cost at a value linear to the number of elements in the vector.
3602 // The default lowering is to use the stack. The vector store + scalar loads
3603 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
3604 // being (at least) linear in LMUL. As a result, using the vslidedown
3605 // lowering for every element ends up being VL*LMUL..
3606 // TODO: Should we be directly costing the stack alternative? Doing so might
3607 // give us a more accurate upper bound.
3608 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
3609
3610 // TODO: unify with TTI getSlideCost.
3611 InstructionCost PerSlideCost = 1;
3612 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
3613 default: break;
3615 PerSlideCost = 2;
3616 break;
3618 PerSlideCost = 4;
3619 break;
3621 PerSlideCost = 8;
3622 break;
3623 }
3624
3625 // TODO: Should we be using the build instseq then cost + evaluate scheme
3626 // we use for integer constants here?
3627 unsigned UndefCount = 0;
3628 for (const SDValue &V : Op->ops()) {
3629 if (V.isUndef()) {
3630 UndefCount++;
3631 continue;
3632 }
3633 if (UndefCount) {
3634 LinearBudget -= PerSlideCost;
3635 UndefCount = 0;
3636 }
3637 LinearBudget -= PerSlideCost;
3638 }
3639 if (UndefCount) {
3640 LinearBudget -= PerSlideCost;
3641 }
3642
3643 if (LinearBudget < 0)
3644 return SDValue();
3645
3646 assert((!VT.isFloatingPoint() ||
3647 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
3648 "Illegal type which will result in reserved encoding");
3649
3650 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
3651
3652 SDValue Vec = DAG.getUNDEF(ContainerVT);
3653 UndefCount = 0;
3654 for (const SDValue &V : Op->ops()) {
3655 if (V.isUndef()) {
3656 UndefCount++;
3657 continue;
3658 }
3659 if (UndefCount) {
3660 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3661 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3662 Vec, Offset, Mask, VL, Policy);
3663 UndefCount = 0;
3664 }
3665 auto OpCode =
3667 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3668 V, Mask, VL);
3669 }
3670 if (UndefCount) {
3671 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
3672 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
3673 Vec, Offset, Mask, VL, Policy);
3674 }
3675 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
3676}
3677
3678static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3680 SelectionDAG &DAG) {
3681 if (!Passthru)
3682 Passthru = DAG.getUNDEF(VT);
3683 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
3684 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
3685 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
3686 // If Hi constant is all the same sign bit as Lo, lower this as a custom
3687 // node in order to try and match RVV vector/scalar instructions.
3688 if ((LoC >> 31) == HiC)
3689 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3690
3691 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
3692 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
3693 // vlmax vsetvli or vsetivli to change the VL.
3694 // FIXME: Support larger constants?
3695 // FIXME: Support non-constant VLs by saturating?
3696 if (LoC == HiC) {
3697 SDValue NewVL;
3698 if (isAllOnesConstant(VL) ||
3699 (isa<RegisterSDNode>(VL) &&
3700 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
3701 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
3702 else if (isa<ConstantSDNode>(VL) &&
3703 isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
3704 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
3705
3706 if (NewVL) {
3707 MVT InterVT =
3708 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
3709 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
3710 DAG.getUNDEF(InterVT), Lo,
3711 DAG.getRegister(RISCV::X0, MVT::i32));
3712 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
3713 }
3714 }
3715 }
3716
3717 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
3718 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
3719 isa<ConstantSDNode>(Hi.getOperand(1)) &&
3720 Hi.getConstantOperandVal(1) == 31)
3721 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3722
3723 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
3724 // even if it might be sign extended.
3725 if (Hi.isUndef())
3726 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
3727
3728 // Fall back to a stack store and stride x0 vector load.
3729 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
3730 Hi, VL);
3731}
3732
3733// Called by type legalization to handle splat of i64 on RV32.
3734// FIXME: We can optimize this when the type has sign or zero bits in one
3735// of the halves.
3736static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
3737 SDValue Scalar, SDValue VL,
3738 SelectionDAG &DAG) {
3739 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
3740 SDValue Lo, Hi;
3741 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
3742 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
3743}
3744
3745// This function lowers a splat of a scalar operand Splat with the vector
3746// length VL. It ensures the final sequence is type legal, which is useful when
3747// lowering a splat after type legalization.
3748static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
3749 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
3750 const RISCVSubtarget &Subtarget) {
3751 bool HasPassthru = Passthru && !Passthru.isUndef();
3752 if (!HasPassthru && !Passthru)
3753 Passthru = DAG.getUNDEF(VT);
3754 if (VT.isFloatingPoint())
3755 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
3756
3757 MVT XLenVT = Subtarget.getXLenVT();
3758
3759 // Simplest case is that the operand needs to be promoted to XLenVT.
3760 if (Scalar.getValueType().bitsLE(XLenVT)) {
3761 // If the operand is a constant, sign extend to increase our chances
3762 // of being able to use a .vi instruction. ANY_EXTEND would become a
3763 // a zero extend and the simm5 check in isel would fail.
3764 // FIXME: Should we ignore the upper bits in isel instead?
3765 unsigned ExtOpc =
3766 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3767 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3768 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
3769 }
3770
3771 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3772 "Unexpected scalar for splat lowering!");
3773
3774 if (isOneConstant(VL) && isNullConstant(Scalar))
3775 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
3776 DAG.getConstant(0, DL, XLenVT), VL);
3777
3778 // Otherwise use the more complicated splatting algorithm.
3779 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
3780}
3781
3782static MVT getLMUL1VT(MVT VT) {
3784 "Unexpected vector MVT");
3788}
3789
3790// This function lowers an insert of a scalar operand Scalar into lane
3791// 0 of the vector regardless of the value of VL. The contents of the
3792// remaining lanes of the result vector are unspecified. VL is assumed
3793// to be non-zero.
3795 const SDLoc &DL, SelectionDAG &DAG,
3796 const RISCVSubtarget &Subtarget) {
3797 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
3798
3799 const MVT XLenVT = Subtarget.getXLenVT();
3800 SDValue Passthru = DAG.getUNDEF(VT);
3801
3802 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3803 isNullConstant(Scalar.getOperand(1))) {
3804 SDValue ExtractedVal = Scalar.getOperand(0);
3805 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
3806 MVT ExtractedContainerVT = ExtractedVT;
3807 if (ExtractedContainerVT.isFixedLengthVector()) {
3808 ExtractedContainerVT = getContainerForFixedLengthVector(
3809 DAG, ExtractedContainerVT, Subtarget);
3810 ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
3811 DAG, Subtarget);
3812 }
3813 if (ExtractedContainerVT.bitsLE(VT))
3814 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
3815 DAG.getConstant(0, DL, XLenVT));
3816 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
3817 DAG.getConstant(0, DL, XLenVT));
3818 }
3819
3820
3821 if (VT.isFloatingPoint())
3822 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
3823 DAG.getUNDEF(VT), Scalar, VL);
3824
3825 // Avoid the tricky legalization cases by falling back to using the
3826 // splat code which already handles it gracefully.
3827 if (!Scalar.getValueType().bitsLE(XLenVT))
3828 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
3829 DAG.getConstant(1, DL, XLenVT),
3830 VT, DL, DAG, Subtarget);
3831
3832 // If the operand is a constant, sign extend to increase our chances
3833 // of being able to use a .vi instruction. ANY_EXTEND would become a
3834 // a zero extend and the simm5 check in isel would fail.
3835 // FIXME: Should we ignore the upper bits in isel instead?
3836 unsigned ExtOpc =
3837 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
3838 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
3839 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT,
3840 DAG.getUNDEF(VT), Scalar, VL);
3841}
3842
3843// Is this a shuffle extracts either the even or odd elements of a vector?
3844// That is, specifically, either (a) or (b) below.
3845// t34: v8i8 = extract_subvector t11, Constant:i64<0>
3846// t33: v8i8 = extract_subvector t11, Constant:i64<8>
3847// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
3848// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
3849// Returns {Src Vector, Even Elements} om success
3850static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
3851 SDValue V2, ArrayRef<int> Mask,
3852 const RISCVSubtarget &Subtarget) {
3853 // Need to be able to widen the vector.
3854 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
3855 return false;
3856
3857 // Both input must be extracts.
3858 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
3859 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3860 return false;
3861
3862 // Extracting from the same source.
3863 SDValue Src = V1.getOperand(0);
3864 if (Src != V2.getOperand(0))
3865 return false;
3866
3867 // Src needs to have twice the number of elements.
3868 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3869 return false;
3870
3871 // The extracts must extract the two halves of the source.
3872 if (V1.getConstantOperandVal(1) != 0 ||
3873 V2.getConstantOperandVal(1) != Mask.size())
3874 return false;
3875
3876 // First index must be the first even or odd element from V1.
3877 if (Mask[0] != 0 && Mask[0] != 1)
3878 return false;
3879
3880 // The others must increase by 2 each time.
3881 // TODO: Support undef elements?
3882 for (unsigned i = 1; i != Mask.size(); ++i)
3883 if (Mask[i] != Mask[i - 1] + 2)
3884 return false;
3885
3886 return true;
3887}
3888
3889/// Is this shuffle interleaving contiguous elements from one vector into the
3890/// even elements and contiguous elements from another vector into the odd
3891/// elements. \p EvenSrc will contain the element that should be in the first
3892/// even element. \p OddSrc will contain the element that should be in the first
3893/// odd element. These can be the first element in a source or the element half
3894/// way through the source.
3895static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
3896 int &OddSrc, const RISCVSubtarget &Subtarget) {
3897 // We need to be able to widen elements to the next larger integer type.
3898 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
3899 return false;
3900
3901 int Size = Mask.size();
3902 int NumElts = VT.getVectorNumElements();
3903 assert(Size == (int)NumElts && "Unexpected mask size");
3904
3905 SmallVector<unsigned, 2> StartIndexes;
3906 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
3907 return false;
3908
3909 EvenSrc = StartIndexes[0];
3910 OddSrc = StartIndexes[1];
3911
3912 // One source should be low half of first vector.
3913 if (EvenSrc != 0 && OddSrc != 0)
3914 return false;
3915
3916 // Subvectors will be subtracted from either at the start of the two input
3917 // vectors, or at the start and middle of the first vector if it's an unary
3918 // interleave.
3919 // In both cases, HalfNumElts will be extracted.
3920 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
3921 // we'll create an illegal extract_subvector.
3922 // FIXME: We could support other values using a slidedown first.
3923 int HalfNumElts = NumElts / 2;
3924 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
3925}
3926
3927/// Match shuffles that concatenate two vectors, rotate the concatenation,
3928/// and then extract the original number of elements from the rotated result.
3929/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
3930/// returned rotation amount is for a rotate right, where elements move from
3931/// higher elements to lower elements. \p LoSrc indicates the first source
3932/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
3933/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
3934/// 0 or 1 if a rotation is found.
3935///
3936/// NOTE: We talk about rotate to the right which matches how bit shift and
3937/// rotate instructions are described where LSBs are on the right, but LLVM IR
3938/// and the table below write vectors with the lowest elements on the left.
3939static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
3940 int Size = Mask.size();
3941
3942 // We need to detect various ways of spelling a rotation:
3943 // [11, 12, 13, 14, 15, 0, 1, 2]
3944 // [-1, 12, 13, 14, -1, -1, 1, -1]
3945 // [-1, -1, -1, -1, -1, -1, 1, 2]
3946 // [ 3, 4, 5, 6, 7, 8, 9, 10]
3947 // [-1, 4, 5, 6, -1, -1, 9, -1]
3948 // [-1, 4, 5, 6, -1, -1, -1, -1]
3949 int Rotation = 0;
3950 LoSrc = -1;
3951 HiSrc = -1;
3952 for (int i = 0; i != Size; ++i) {
3953 int M = Mask[i];
3954 if (M < 0)
3955 continue;
3956
3957 // Determine where a rotate vector would have started.
3958 int StartIdx = i - (M % Size);
3959 // The identity rotation isn't interesting, stop.
3960 if (StartIdx == 0)
3961 return -1;
3962
3963 // If we found the tail of a vector the rotation must be the missing
3964 // front. If we found the head of a vector, it must be how much of the
3965 // head.
3966 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
3967
3968 if (Rotation == 0)
3969 Rotation = CandidateRotation;
3970 else if (Rotation != CandidateRotation)
3971 // The rotations don't match, so we can't match this mask.
3972 return -1;
3973
3974 // Compute which value this mask is pointing at.
3975 int MaskSrc = M < Size ? 0 : 1;
3976
3977 // Compute which of the two target values this index should be assigned to.
3978 // This reflects whether the high elements are remaining or the low elemnts
3979 // are remaining.
3980 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3981
3982 // Either set up this value if we've not encountered it before, or check
3983 // that it remains consistent.
3984 if (TargetSrc < 0)
3985 TargetSrc = MaskSrc;
3986 else if (TargetSrc != MaskSrc)
3987 // This may be a rotation, but it pulls from the inputs in some
3988 // unsupported interleaving.
3989 return -1;
3990 }
3991
3992 // Check that we successfully analyzed the mask, and normalize the results.
3993 assert(Rotation != 0 && "Failed to locate a viable rotation!");
3994 assert((LoSrc >= 0 || HiSrc >= 0) &&
3995 "Failed to find a rotated input vector!");
3996
3997 return Rotation;
3998}
3999
4000// Lower a deinterleave shuffle to vnsrl.
4001// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
4002// -> [p, q, r, s] (EvenElts == false)
4003// VT is the type of the vector to return, <[vscale x ]n x ty>
4004// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
4006 bool EvenElts,
4007 const RISCVSubtarget &Subtarget,
4008 SelectionDAG &DAG) {
4009 // The result is a vector of type <m x n x ty>
4010 MVT ContainerVT = VT;
4011 // Convert fixed vectors to scalable if needed
4012 if (ContainerVT.isFixedLengthVector()) {
4013 assert(Src.getSimpleValueType().isFixedLengthVector());
4014 ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
4015
4016 // The source is a vector of type <m x n*2 x ty>
4017 MVT SrcContainerVT =
4019 ContainerVT.getVectorElementCount() * 2);
4020 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
4021 }
4022
4023 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4024
4025 // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
4026 // This also converts FP to int.
4027 unsigned EltBits = ContainerVT.getScalarSizeInBits();
4028 MVT WideSrcContainerVT = MVT::getVectorVT(
4029 MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
4030 Src = DAG.getBitcast(WideSrcContainerVT, Src);
4031
4032 // The integer version of the container type.
4033 MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
4034
4035 // If we want even elements, then the shift amount is 0. Otherwise, shift by
4036 // the original element size.
4037 unsigned Shift = EvenElts ? 0 : EltBits;
4038 SDValue SplatShift = DAG.getNode(
4039 RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
4040 DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
4041 SDValue Res =
4042 DAG.getNode(RISCVISD::VNSRL_VL, DL, IntContainerVT, Src, SplatShift,
4043 DAG.getUNDEF(IntContainerVT), TrueMask, VL);
4044 // Cast back to FP if needed.
4045 Res = DAG.getBitcast(ContainerVT, Res);
4046
4047 if (VT.isFixedLengthVector())
4048 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
4049 return Res;
4050}
4051
4052// Lower the following shuffle to vslidedown.
4053// a)
4054// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4055// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4056// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4057// b)
4058// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4059// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4060// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4061// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4062// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4063// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4065 SDValue V1, SDValue V2,
4066 ArrayRef<int> Mask,
4067 const RISCVSubtarget &Subtarget,
4068 SelectionDAG &DAG) {
4069 auto findNonEXTRACT_SUBVECTORParent =
4070 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4071 uint64_t Offset = 0;
4072 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4073 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4074 // a scalable vector. But we don't want to match the case.
4075 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4076 Offset += Parent.getConstantOperandVal(1);
4077 Parent = Parent.getOperand(0);
4078 }
4079 return std::make_pair(Parent, Offset);
4080 };
4081
4082 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4083 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4084
4085 // Extracting from the same source.
4086 SDValue Src = V1Src;
4087 if (Src != V2Src)
4088 return SDValue();
4089
4090 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4091 SmallVector<int, 16> NewMask(Mask);
4092 for (size_t i = 0; i != NewMask.size(); ++i) {
4093 if (NewMask[i] == -1)
4094 continue;
4095
4096 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4097 NewMask[i] = NewMask[i] + V1IndexOffset;
4098 } else {
4099 // Minus NewMask.size() is needed. Otherwise, the b case would be
4100 // <5,6,7,12> instead of <5,6,7,8>.
4101 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4102 }
4103 }
4104
4105 // First index must be known and non-zero. It will be used as the slidedown
4106 // amount.
4107 if (NewMask[0] <= 0)
4108 return SDValue();
4109
4110 // NewMask is also continuous.
4111 for (unsigned i = 1; i != NewMask.size(); ++i)
4112 if (NewMask[i - 1] + 1 != NewMask[i])
4113 return SDValue();
4114
4115 MVT XLenVT = Subtarget.getXLenVT();
4116 MVT SrcVT = Src.getSimpleValueType();
4117 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4118 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4119 SDValue Slidedown =
4120 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4121 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4122 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4123 return DAG.getNode(
4125 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4126 DAG.getConstant(0, DL, XLenVT));
4127}
4128
4129// Because vslideup leaves the destination elements at the start intact, we can
4130// use it to perform shuffles that insert subvectors:
4131//
4132// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4133// ->
4134// vsetvli zero, 8, e8, mf2, ta, ma
4135// vslideup.vi v8, v9, 4
4136//
4137// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4138// ->
4139// vsetvli zero, 5, e8, mf2, tu, ma
4140// vslideup.v1 v8, v9, 2
4142 SDValue V1, SDValue V2,
4143 ArrayRef<int> Mask,
4144 const RISCVSubtarget &Subtarget,
4145 SelectionDAG &DAG) {
4146 unsigned NumElts = VT.getVectorNumElements();
4147 int NumSubElts, Index;
4148 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4149 Index))
4150 return SDValue();
4151
4152 bool OpsSwapped = Mask[Index] < (int)NumElts;
4153 SDValue InPlace = OpsSwapped ? V2 : V1;
4154 SDValue ToInsert = OpsSwapped ? V1 : V2;
4155
4156 MVT XLenVT = Subtarget.getXLenVT();
4157 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4158 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4159 // We slide up by the index that the subvector is being inserted at, and set
4160 // VL to the index + the number of elements being inserted.
4162 // If the we're adding a suffix to the in place vector, i.e. inserting right
4163 // up to the very end of it, then we don't actually care about the tail.
4164 if (NumSubElts + Index >= (int)NumElts)
4165 Policy |= RISCVII::TAIL_AGNOSTIC;
4166
4167 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4168 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4169 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4170
4171 SDValue Res;
4172 // If we're inserting into the lowest elements, use a tail undisturbed
4173 // vmv.v.v.
4174 if (Index == 0)
4175 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4176 VL);
4177 else
4178 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4179 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4180 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4181}
4182
4183/// Match v(f)slide1up/down idioms. These operations involve sliding
4184/// N-1 elements to make room for an inserted scalar at one end.
4186 SDValue V1, SDValue V2,
4187 ArrayRef<int> Mask,
4188 const RISCVSubtarget &Subtarget,
4189 SelectionDAG &DAG) {
4190 bool OpsSwapped = false;
4191 if (!isa<BuildVectorSDNode>(V1)) {
4192 if (!isa<BuildVectorSDNode>(V2))
4193 return SDValue();
4194 std::swap(V1, V2);
4195 OpsSwapped = true;
4196 }
4197 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4198 if (!Splat)
4199 return SDValue();
4200
4201 // Return true if the mask could describe a slide of Mask.size() - 1
4202 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4203 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4204 const unsigned S = (Offset > 0) ? 0 : -Offset;
4205 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4206 for (unsigned i = S; i != E; ++i)
4207 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4208 return false;
4209 return true;
4210 };
4211
4212 const unsigned NumElts = VT.getVectorNumElements();
4213 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4214 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4215 return SDValue();
4216
4217 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4218 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4219 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4220 return SDValue();
4221
4222 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4223 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4224 auto OpCode = IsVSlidedown ?
4227 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4228 DAG.getUNDEF(ContainerVT),
4229 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4230 Splat, TrueMask, VL);
4231 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4232}
4233
4234// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4235// to create an interleaved vector of <[vscale x] n*2 x ty>.
4236// This requires that the size of ty is less than the subtarget's maximum ELEN.
4238 const SDLoc &DL, SelectionDAG &DAG,
4239 const RISCVSubtarget &Subtarget) {
4240 MVT VecVT = EvenV.getSimpleValueType();
4241 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4242 // Convert fixed vectors to scalable if needed
4243 if (VecContainerVT.isFixedLengthVector()) {
4244 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4245 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4246 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4247 }
4248
4249 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4250
4251 // We're working with a vector of the same size as the resulting
4252 // interleaved vector, but with half the number of elements and
4253 // twice the SEW (Hence the restriction on not using the maximum
4254 // ELEN)
4255 MVT WideVT =
4257 VecVT.getVectorElementCount());
4258 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4259 if (WideContainerVT.isFixedLengthVector())
4260 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4261
4262 // Bitcast the input vectors to integers in case they are FP
4263 VecContainerVT = VecContainerVT.changeTypeToInteger();
4264 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4265 OddV = DAG.getBitcast(VecContainerVT, OddV);
4266
4267 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4268 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4269
4270 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4271 // vwaddu.vv
4272 SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
4273 EvenV, OddV, Passthru, Mask, VL);
4274
4275 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4276 SDValue AllOnesVec = DAG.getSplatVector(
4277 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4278 SDValue OddsMul = DAG.getNode(