LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (!Subtarget.hasVendorXTHeadCondMov())
414
415 static const unsigned FPLegalNodeTypes[] = {
423
424 static const ISD::CondCode FPCCToExpand[] = {
428
429 static const unsigned FPOpToExpand[] = {
431 ISD::FREM};
432
433 static const unsigned FPRndMode[] = {
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
454 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
461 }
462
463 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
464 if (Subtarget.hasStdExtZfhOrZhinx()) {
465 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
466 setOperationAction(FPRndMode, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 if (Subtarget.hasStdExtZfa())
473 } else {
474 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
479 setOperationAction(Op, MVT::f16, Custom);
485 }
486
488
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
495
497 ISD::FNEARBYINT, MVT::f16,
498 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
503 MVT::f16, Promote);
504
505 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
506 // complete support for all operations in LegalizeDAG.
511 MVT::f16, Promote);
512
513 // We need to custom promote this.
514 if (Subtarget.is64Bit())
516 }
517
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
539
540 if (Subtarget.hasStdExtZfa()) {
544 } else {
546 }
547 }
548
549 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
551
552 if (Subtarget.hasStdExtDOrZdinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
554
555 if (!Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtZfa()) {
560 setOperationAction(FPRndMode, MVT::f64, Legal);
563 } else {
564 if (Subtarget.is64Bit())
565 setOperationAction(FPRndMode, MVT::f64, Custom);
566
568 }
569
572 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
578 setOperationAction(FPOpToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
586 Subtarget.isSoftFPABI() ? LibCall : Custom);
591 }
592
593 if (Subtarget.is64Bit()) {
596 MVT::i32, Custom);
598 }
599
600 if (Subtarget.hasStdExtFOrZfinx()) {
602 Custom);
603
604 // f16/bf16 require custom handling.
606 Custom);
608 Custom);
609
612 }
613
616 XLenVT, Custom);
617
619
620 if (Subtarget.is64Bit())
622
623 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
624 // Unfortunately this can't be determined just from the ISA naming string.
626 Subtarget.is64Bit() ? Legal : Custom);
628 Subtarget.is64Bit() ? Legal : Custom);
629
630 if (Subtarget.is64Bit()) {
633 }
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (getTargetMachine().getTargetTriple().isOSLinux()) {
661 // Custom lowering of llvm.clear_cache.
663 }
664
665 if (Subtarget.hasVInstructions()) {
667
669
670 // RVV intrinsics may have illegal operands.
671 // We also need to custom legalize vmv.x.s.
674 {MVT::i8, MVT::i16}, Custom);
675 if (Subtarget.is64Bit())
677 MVT::i32, Custom);
678 else
680 MVT::i64, Custom);
681
683 MVT::Other, Custom);
684
685 static const unsigned IntegerVPOps[] = {
686 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
687 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
688 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
689 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
690 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
691 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
692 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
693 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
694 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
695 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
696 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
697 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
698 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
699 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
700 ISD::EXPERIMENTAL_VP_SPLAT};
701
702 static const unsigned FloatingPointVPOps[] = {
703 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
704 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
705 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
706 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
707 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
708 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
709 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
710 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
711 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
712 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
713 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
714 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
715 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
716 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 static const unsigned FloatingPointLibCallOps[] = {
730
731 if (!Subtarget.is64Bit()) {
732 // We must custom-lower certain vXi64 operations on RV32 due to the vector
733 // element type being illegal.
735 MVT::i64, Custom);
736
737 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
738
739 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
740 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
741 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
742 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
743 MVT::i64, Custom);
744 }
745
746 for (MVT VT : BoolVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
751
752 // Mask VTs are custom-expanded into a series of standard nodes
756 VT, Custom);
757
759 Custom);
760
762 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
763 Expand);
764 setOperationAction(ISD::VP_MERGE, VT, Custom);
765
766 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
767 Custom);
768
769 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
770
773 Custom);
774
776 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
777 Custom);
778
779 // RVV has native int->float & float->int conversions where the
780 // element type sizes are within one power-of-two of each other. Any
781 // wider distances between type sizes have to be lowered as sequences
782 // which progressively narrow the gap in stages.
787 VT, Custom);
789 Custom);
790
791 // Expand all extending loads to types larger than this, and truncating
792 // stores from types larger than this.
794 setTruncStoreAction(VT, OtherVT, Expand);
796 OtherVT, Expand);
797 }
798
799 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
800 ISD::VP_TRUNCATE, ISD::VP_SETCC},
801 VT, Custom);
802
805
807
808 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
809 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
810
813 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
814 }
815
816 for (MVT VT : IntVecVTs) {
817 if (!isTypeLegal(VT))
818 continue;
819
822
823 // Vectors implement MULHS/MULHU.
825
826 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
827 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
829
831 Legal);
832
834
835 // Custom-lower extensions and truncations from/to mask types.
837 VT, Custom);
838
839 // RVV has native int->float & float->int conversions where the
840 // element type sizes are within one power-of-two of each other. Any
841 // wider distances between type sizes have to be lowered as sequences
842 // which progressively narrow the gap in stages.
847 VT, Custom);
849 Custom);
853 VT, Legal);
854
855 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
856 // nodes which truncate by one power of two at a time.
859 Custom);
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933
935 }
936
937 for (MVT VT : VecTupleVTs) {
938 if (!isTypeLegal(VT))
939 continue;
940
942 }
943
944 // Expand various CCs to best match the RVV ISA, which natively supports UNE
945 // but no other unordered comparisons, and supports all ordered comparisons
946 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
947 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
948 // and we pattern-match those back to the "original", swapping operands once
949 // more. This way we catch both operations and both "vf" and "fv" forms with
950 // fewer patterns.
951 static const ISD::CondCode VFPCCToExpand[] = {
955 };
956
957 // TODO: support more ops.
958 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
966
967 // TODO: support more vp ops.
968 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
969 ISD::VP_FADD,
970 ISD::VP_FSUB,
971 ISD::VP_FMUL,
972 ISD::VP_FDIV,
973 ISD::VP_FMA,
974 ISD::VP_REDUCE_FMIN,
975 ISD::VP_REDUCE_FMAX,
976 ISD::VP_SQRT,
977 ISD::VP_FMINNUM,
978 ISD::VP_FMAXNUM,
979 ISD::VP_FCEIL,
980 ISD::VP_FFLOOR,
981 ISD::VP_FROUND,
982 ISD::VP_FROUNDEVEN,
983 ISD::VP_FROUNDTOZERO,
984 ISD::VP_FRINT,
985 ISD::VP_FNEARBYINT,
986 ISD::VP_SETCC,
987 ISD::VP_FMINIMUM,
988 ISD::VP_FMAXIMUM,
989 ISD::VP_REDUCE_FMINIMUM,
990 ISD::VP_REDUCE_FMAXIMUM};
991
992 // Sets common operation actions on RVV floating-point vector types.
993 const auto SetCommonVFPActions = [&](MVT VT) {
995 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
996 // sizes are within one power-of-two of each other. Therefore conversions
997 // between vXf16 and vXf64 must be lowered as sequences which convert via
998 // vXf32.
1001 // Custom-lower insert/extract operations to simplify patterns.
1003 Custom);
1004 // Expand various condition codes (explained above).
1005 setCondCodeAction(VFPCCToExpand, VT, Expand);
1006
1009
1013 VT, Custom);
1014
1015 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1016
1017 // Expand FP operations that need libcalls.
1018 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1019
1021
1023
1025 VT, Custom);
1026
1028 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1029 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1030 VT, Custom);
1031
1034
1037 VT, Custom);
1038
1041
1043
1044 setOperationAction(FloatingPointVPOps, VT, Custom);
1045
1047 Custom);
1050 VT, Legal);
1055 VT, Custom);
1056
1058 };
1059
1060 // Sets common extload/truncstore actions on RVV floating-point vector
1061 // types.
1062 const auto SetCommonVFPExtLoadTruncStoreActions =
1063 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1064 for (auto SmallVT : SmallerVTs) {
1065 setTruncStoreAction(VT, SmallVT, Expand);
1066 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1067 }
1068 };
1069
1070 // Sets common actions for f16 and bf16 for when there's only
1071 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1072 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075 Custom);
1076 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1077 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1078 Custom);
1080 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1086 VT, Custom);
1087 MVT EltVT = VT.getVectorElementType();
1088 if (isTypeLegal(EltVT))
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1091 VT, Custom);
1092 else
1093 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1094 EltVT, Custom);
1096 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1097 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1098 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1099 ISD::VP_SCATTER},
1100 VT, Custom);
1101
1105
1106 // Expand FP operations that need libcalls.
1107 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1108
1109 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1110 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1111 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1112 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1113 } else {
1114 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1116 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1117 }
1118 };
1119
1120 if (Subtarget.hasVInstructionsF16()) {
1121 for (MVT VT : F16VecVTs) {
1122 if (!isTypeLegal(VT))
1123 continue;
1124 SetCommonVFPActions(VT);
1125 }
1126 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1127 for (MVT VT : F16VecVTs) {
1128 if (!isTypeLegal(VT))
1129 continue;
1130 SetCommonPromoteToF32Actions(VT);
1131 }
1132 }
1133
1134 if (Subtarget.hasVInstructionsBF16Minimal()) {
1135 for (MVT VT : BF16VecVTs) {
1136 if (!isTypeLegal(VT))
1137 continue;
1138 SetCommonPromoteToF32Actions(VT);
1139 }
1140 }
1141
1142 if (Subtarget.hasVInstructionsF32()) {
1143 for (MVT VT : F32VecVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146 SetCommonVFPActions(VT);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1148 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1149 }
1150 }
1151
1152 if (Subtarget.hasVInstructionsF64()) {
1153 for (MVT VT : F64VecVTs) {
1154 if (!isTypeLegal(VT))
1155 continue;
1156 SetCommonVFPActions(VT);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1160 }
1161 }
1162
1163 if (Subtarget.useRVVForFixedLengthVectors()) {
1165 if (!useRVVForFixedLengthVectorVT(VT))
1166 continue;
1167
1168 // By default everything must be expanded.
1169 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172 setTruncStoreAction(VT, OtherVT, Expand);
1174 OtherVT, Expand);
1175 }
1176
1177 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1178 // expansion to a build_vector of 0s.
1180
1181 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1183 Custom);
1184
1187 Custom);
1188
1190 VT, Custom);
1191
1193
1195
1197
1199
1202 Custom);
1203
1205
1208 Custom);
1209
1211 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1212 Custom);
1213
1215 {
1224 },
1225 VT, Custom);
1227 Custom);
1228
1230
1231 // Operations below are different for between masks and other vectors.
1232 if (VT.getVectorElementType() == MVT::i1) {
1233 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1234 ISD::OR, ISD::XOR},
1235 VT, Custom);
1236
1237 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1238 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1239 VT, Custom);
1240
1241 setOperationAction(ISD::VP_MERGE, VT, Custom);
1242
1243 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1244 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1245 continue;
1246 }
1247
1248 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1249 // it before type legalization for i64 vectors on RV32. It will then be
1250 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1251 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1252 // improvements first.
1253 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256 }
1257
1260
1261 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1262 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1263 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1264 ISD::VP_SCATTER},
1265 VT, Custom);
1266
1270 VT, Custom);
1271
1274
1276
1277 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1278 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1280
1284 VT, Custom);
1285
1287
1290
1291 // Custom-lower reduction operations to set up the corresponding custom
1292 // nodes' operands.
1296 VT, Custom);
1297
1298 setOperationAction(IntegerVPOps, VT, Custom);
1299
1300 if (Subtarget.hasStdExtZvkb())
1302
1303 if (Subtarget.hasStdExtZvbb()) {
1306 VT, Custom);
1307 } else {
1308 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1309 // range of f32.
1310 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1311 if (isTypeLegal(FloatVT))
1314 Custom);
1315 }
1316
1318 }
1319
1321 // There are no extending loads or truncating stores.
1322 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1323 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1324 setTruncStoreAction(VT, InnerVT, Expand);
1325 }
1326
1327 if (!useRVVForFixedLengthVectorVT(VT))
1328 continue;
1329
1330 // By default everything must be expanded.
1331 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1342 VT, Custom);
1343
1346 VT, Custom);
1347 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1348 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1349 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1350 VT, Custom);
1351
1354 Custom);
1355
1356 if (VT.getVectorElementType() == MVT::f16 &&
1357 !Subtarget.hasVInstructionsF16()) {
1359 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1361 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1362 Custom);
1363 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1364 Custom);
1365 if (Subtarget.hasStdExtZfhmin()) {
1367 } else {
1368 // We need to custom legalize f16 build vectors if Zfhmin isn't
1369 // available.
1371 }
1375 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1376 // Don't promote f16 vector operations to f32 if f32 vector type is
1377 // not legal.
1378 // TODO: could split the f16 vector into two vectors and do promotion.
1379 if (!isTypeLegal(F32VecVT))
1380 continue;
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1382 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1383 continue;
1384 }
1385
1386 if (VT.getVectorElementType() == MVT::bf16) {
1388 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1389 if (Subtarget.hasStdExtZfbfmin()) {
1391 } else {
1392 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1393 // available.
1395 }
1397 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1398 Custom);
1399 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1400 // Don't promote f16 vector operations to f32 if f32 vector type is
1401 // not legal.
1402 // TODO: could split the f16 vector into two vectors and do promotion.
1403 if (!isTypeLegal(F32VecVT))
1404 continue;
1405 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1406 // TODO: Promote VP ops to fp32.
1407 continue;
1408 }
1409
1411 Custom);
1412
1417 VT, Custom);
1418
1421 VT, Custom);
1422
1423 setCondCodeAction(VFPCCToExpand, VT, Expand);
1424
1427
1429
1430 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1431
1432 setOperationAction(FloatingPointVPOps, VT, Custom);
1433
1440 VT, Custom);
1441 }
1442
1443 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1444 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1445 if (Subtarget.is64Bit())
1447 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1449 if (Subtarget.hasStdExtZfbfmin())
1451 if (Subtarget.hasStdExtFOrZfinx())
1453 if (Subtarget.hasStdExtDOrZdinx())
1455 }
1456 }
1457
1458 if (Subtarget.hasStdExtA())
1460
1461 if (Subtarget.hasForcedAtomics()) {
1462 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1468 XLenVT, LibCall);
1469 }
1470
1471 if (Subtarget.hasVendorXTHeadMemIdx()) {
1472 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1473 setIndexedLoadAction(im, MVT::i8, Legal);
1474 setIndexedStoreAction(im, MVT::i8, Legal);
1475 setIndexedLoadAction(im, MVT::i16, Legal);
1476 setIndexedStoreAction(im, MVT::i16, Legal);
1477 setIndexedLoadAction(im, MVT::i32, Legal);
1478 setIndexedStoreAction(im, MVT::i32, Legal);
1479
1480 if (Subtarget.is64Bit()) {
1481 setIndexedLoadAction(im, MVT::i64, Legal);
1482 setIndexedStoreAction(im, MVT::i64, Legal);
1483 }
1484 }
1485 }
1486
1487 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1491
1495 }
1496
1497 // Function alignments.
1498 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1499 setMinFunctionAlignment(FunctionAlignment);
1500 // Set preferred alignments.
1503
1509
1510 if (Subtarget.hasStdExtFOrZfinx())
1512
1513 if (Subtarget.hasStdExtZbb())
1515
1516 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1517 Subtarget.hasVInstructions())
1519
1520 if (Subtarget.hasStdExtZbkb())
1522
1523 if (Subtarget.hasStdExtFOrZfinx())
1526 if (Subtarget.hasVInstructions())
1528 ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
1531 ISD::EXPERIMENTAL_VP_REVERSE, ISD::MUL,
1534 if (Subtarget.hasVendorXTHeadMemPair())
1536 if (Subtarget.useRVVForFixedLengthVectors())
1538
1539 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1540 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1541
1542 // Disable strict node mutation.
1543 IsStrictFPEnabled = true;
1544 EnableExtLdPromotion = true;
1545
1546 // Let the subtarget decide if a predictable select is more expensive than the
1547 // corresponding branch. This information is used in CGP/SelectOpt to decide
1548 // when to convert selects into branches.
1549 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1550
1551 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1552 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1553
1555 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1556 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1557
1559 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1560 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1561
1562 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1563 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1564}
1565
1567 LLVMContext &Context,
1568 EVT VT) const {
1569 if (!VT.isVector())
1570 return getPointerTy(DL);
1571 if (Subtarget.hasVInstructions() &&
1572 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1573 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1575}
1576
1577MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1578 return Subtarget.getXLenVT();
1579}
1580
1581// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1582bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1583 unsigned VF,
1584 bool IsScalable) const {
1585 if (!Subtarget.hasVInstructions())
1586 return true;
1587
1588 if (!IsScalable)
1589 return true;
1590
1591 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1592 return true;
1593
1594 // Don't allow VF=1 if those types are't legal.
1595 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1596 return true;
1597
1598 // VLEN=32 support is incomplete.
1599 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1600 return true;
1601
1602 // The maximum VF is for the smallest element width with LMUL=8.
1603 // VF must be a power of 2.
1604 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1605 return VF > MaxVF || !isPowerOf2_32(VF);
1606}
1607
1609 return !Subtarget.hasVInstructions() ||
1610 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1611}
1612
1614 const CallInst &I,
1615 MachineFunction &MF,
1616 unsigned Intrinsic) const {
1617 auto &DL = I.getDataLayout();
1618
1619 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1620 bool IsUnitStrided, bool UsePtrVal = false) {
1622 // We can't use ptrVal if the intrinsic can access memory before the
1623 // pointer. This means we can't use it for strided or indexed intrinsics.
1624 if (UsePtrVal)
1625 Info.ptrVal = I.getArgOperand(PtrOp);
1626 else
1627 Info.fallbackAddressSpace =
1628 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1629 Type *MemTy;
1630 if (IsStore) {
1631 // Store value is the first operand.
1632 MemTy = I.getArgOperand(0)->getType();
1633 } else {
1634 // Use return type. If it's segment load, return type is a struct.
1635 MemTy = I.getType();
1636 if (MemTy->isStructTy())
1637 MemTy = MemTy->getStructElementType(0);
1638 }
1639 if (!IsUnitStrided)
1640 MemTy = MemTy->getScalarType();
1641
1642 Info.memVT = getValueType(DL, MemTy);
1643 if (MemTy->isTargetExtTy()) {
1644 // RISC-V vector tuple type's alignment type should be its element type.
1645 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1646 MemTy = Type::getIntNTy(
1647 MemTy->getContext(),
1648 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1649 ->getZExtValue());
1650 Info.align = DL.getABITypeAlign(MemTy);
1651 } else {
1652 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1653 }
1655 Info.flags |=
1657 return true;
1658 };
1659
1660 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1662
1664 switch (Intrinsic) {
1665 default:
1666 return false;
1667 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1668 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1669 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1670 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1671 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1672 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1675 case Intrinsic::riscv_masked_cmpxchg_i32:
1677 Info.memVT = MVT::i32;
1678 Info.ptrVal = I.getArgOperand(0);
1679 Info.offset = 0;
1680 Info.align = Align(4);
1683 return true;
1684 case Intrinsic::riscv_seg2_load:
1685 case Intrinsic::riscv_seg3_load:
1686 case Intrinsic::riscv_seg4_load:
1687 case Intrinsic::riscv_seg5_load:
1688 case Intrinsic::riscv_seg6_load:
1689 case Intrinsic::riscv_seg7_load:
1690 case Intrinsic::riscv_seg8_load:
1691 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1692 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1693 case Intrinsic::riscv_seg2_store:
1694 case Intrinsic::riscv_seg3_store:
1695 case Intrinsic::riscv_seg4_store:
1696 case Intrinsic::riscv_seg5_store:
1697 case Intrinsic::riscv_seg6_store:
1698 case Intrinsic::riscv_seg7_store:
1699 case Intrinsic::riscv_seg8_store:
1700 // Operands are (vec, ..., vec, ptr, vl)
1701 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1702 /*IsStore*/ true,
1703 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1704 case Intrinsic::riscv_vle:
1705 case Intrinsic::riscv_vle_mask:
1706 case Intrinsic::riscv_vleff:
1707 case Intrinsic::riscv_vleff_mask:
1708 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1709 /*IsStore*/ false,
1710 /*IsUnitStrided*/ true,
1711 /*UsePtrVal*/ true);
1712 case Intrinsic::riscv_vse:
1713 case Intrinsic::riscv_vse_mask:
1714 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1715 /*IsStore*/ true,
1716 /*IsUnitStrided*/ true,
1717 /*UsePtrVal*/ true);
1718 case Intrinsic::riscv_vlse:
1719 case Intrinsic::riscv_vlse_mask:
1720 case Intrinsic::riscv_vloxei:
1721 case Intrinsic::riscv_vloxei_mask:
1722 case Intrinsic::riscv_vluxei:
1723 case Intrinsic::riscv_vluxei_mask:
1724 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1725 /*IsStore*/ false,
1726 /*IsUnitStrided*/ false);
1727 case Intrinsic::riscv_vsse:
1728 case Intrinsic::riscv_vsse_mask:
1729 case Intrinsic::riscv_vsoxei:
1730 case Intrinsic::riscv_vsoxei_mask:
1731 case Intrinsic::riscv_vsuxei:
1732 case Intrinsic::riscv_vsuxei_mask:
1733 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1734 /*IsStore*/ true,
1735 /*IsUnitStrided*/ false);
1736 case Intrinsic::riscv_vlseg2:
1737 case Intrinsic::riscv_vlseg3:
1738 case Intrinsic::riscv_vlseg4:
1739 case Intrinsic::riscv_vlseg5:
1740 case Intrinsic::riscv_vlseg6:
1741 case Intrinsic::riscv_vlseg7:
1742 case Intrinsic::riscv_vlseg8:
1743 case Intrinsic::riscv_vlseg2ff:
1744 case Intrinsic::riscv_vlseg3ff:
1745 case Intrinsic::riscv_vlseg4ff:
1746 case Intrinsic::riscv_vlseg5ff:
1747 case Intrinsic::riscv_vlseg6ff:
1748 case Intrinsic::riscv_vlseg7ff:
1749 case Intrinsic::riscv_vlseg8ff:
1750 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1751 /*IsStore*/ false,
1752 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1753 case Intrinsic::riscv_vlseg2_mask:
1754 case Intrinsic::riscv_vlseg3_mask:
1755 case Intrinsic::riscv_vlseg4_mask:
1756 case Intrinsic::riscv_vlseg5_mask:
1757 case Intrinsic::riscv_vlseg6_mask:
1758 case Intrinsic::riscv_vlseg7_mask:
1759 case Intrinsic::riscv_vlseg8_mask:
1760 case Intrinsic::riscv_vlseg2ff_mask:
1761 case Intrinsic::riscv_vlseg3ff_mask:
1762 case Intrinsic::riscv_vlseg4ff_mask:
1763 case Intrinsic::riscv_vlseg5ff_mask:
1764 case Intrinsic::riscv_vlseg6ff_mask:
1765 case Intrinsic::riscv_vlseg7ff_mask:
1766 case Intrinsic::riscv_vlseg8ff_mask:
1767 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1768 /*IsStore*/ false,
1769 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1770 case Intrinsic::riscv_vlsseg2:
1771 case Intrinsic::riscv_vlsseg3:
1772 case Intrinsic::riscv_vlsseg4:
1773 case Intrinsic::riscv_vlsseg5:
1774 case Intrinsic::riscv_vlsseg6:
1775 case Intrinsic::riscv_vlsseg7:
1776 case Intrinsic::riscv_vlsseg8:
1777 case Intrinsic::riscv_vloxseg2:
1778 case Intrinsic::riscv_vloxseg3:
1779 case Intrinsic::riscv_vloxseg4:
1780 case Intrinsic::riscv_vloxseg5:
1781 case Intrinsic::riscv_vloxseg6:
1782 case Intrinsic::riscv_vloxseg7:
1783 case Intrinsic::riscv_vloxseg8:
1784 case Intrinsic::riscv_vluxseg2:
1785 case Intrinsic::riscv_vluxseg3:
1786 case Intrinsic::riscv_vluxseg4:
1787 case Intrinsic::riscv_vluxseg5:
1788 case Intrinsic::riscv_vluxseg6:
1789 case Intrinsic::riscv_vluxseg7:
1790 case Intrinsic::riscv_vluxseg8:
1791 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1792 /*IsStore*/ false,
1793 /*IsUnitStrided*/ false);
1794 case Intrinsic::riscv_vlsseg2_mask:
1795 case Intrinsic::riscv_vlsseg3_mask:
1796 case Intrinsic::riscv_vlsseg4_mask:
1797 case Intrinsic::riscv_vlsseg5_mask:
1798 case Intrinsic::riscv_vlsseg6_mask:
1799 case Intrinsic::riscv_vlsseg7_mask:
1800 case Intrinsic::riscv_vlsseg8_mask:
1801 case Intrinsic::riscv_vloxseg2_mask:
1802 case Intrinsic::riscv_vloxseg3_mask:
1803 case Intrinsic::riscv_vloxseg4_mask:
1804 case Intrinsic::riscv_vloxseg5_mask:
1805 case Intrinsic::riscv_vloxseg6_mask:
1806 case Intrinsic::riscv_vloxseg7_mask:
1807 case Intrinsic::riscv_vloxseg8_mask:
1808 case Intrinsic::riscv_vluxseg2_mask:
1809 case Intrinsic::riscv_vluxseg3_mask:
1810 case Intrinsic::riscv_vluxseg4_mask:
1811 case Intrinsic::riscv_vluxseg5_mask:
1812 case Intrinsic::riscv_vluxseg6_mask:
1813 case Intrinsic::riscv_vluxseg7_mask:
1814 case Intrinsic::riscv_vluxseg8_mask:
1815 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1816 /*IsStore*/ false,
1817 /*IsUnitStrided*/ false);
1818 case Intrinsic::riscv_vsseg2:
1819 case Intrinsic::riscv_vsseg3:
1820 case Intrinsic::riscv_vsseg4:
1821 case Intrinsic::riscv_vsseg5:
1822 case Intrinsic::riscv_vsseg6:
1823 case Intrinsic::riscv_vsseg7:
1824 case Intrinsic::riscv_vsseg8:
1825 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1826 /*IsStore*/ true,
1827 /*IsUnitStrided*/ false);
1828 case Intrinsic::riscv_vsseg2_mask:
1829 case Intrinsic::riscv_vsseg3_mask:
1830 case Intrinsic::riscv_vsseg4_mask:
1831 case Intrinsic::riscv_vsseg5_mask:
1832 case Intrinsic::riscv_vsseg6_mask:
1833 case Intrinsic::riscv_vsseg7_mask:
1834 case Intrinsic::riscv_vsseg8_mask:
1835 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1836 /*IsStore*/ true,
1837 /*IsUnitStrided*/ false);
1838 case Intrinsic::riscv_vssseg2:
1839 case Intrinsic::riscv_vssseg3:
1840 case Intrinsic::riscv_vssseg4:
1841 case Intrinsic::riscv_vssseg5:
1842 case Intrinsic::riscv_vssseg6:
1843 case Intrinsic::riscv_vssseg7:
1844 case Intrinsic::riscv_vssseg8:
1845 case Intrinsic::riscv_vsoxseg2:
1846 case Intrinsic::riscv_vsoxseg3:
1847 case Intrinsic::riscv_vsoxseg4:
1848 case Intrinsic::riscv_vsoxseg5:
1849 case Intrinsic::riscv_vsoxseg6:
1850 case Intrinsic::riscv_vsoxseg7:
1851 case Intrinsic::riscv_vsoxseg8:
1852 case Intrinsic::riscv_vsuxseg2:
1853 case Intrinsic::riscv_vsuxseg3:
1854 case Intrinsic::riscv_vsuxseg4:
1855 case Intrinsic::riscv_vsuxseg5:
1856 case Intrinsic::riscv_vsuxseg6:
1857 case Intrinsic::riscv_vsuxseg7:
1858 case Intrinsic::riscv_vsuxseg8:
1859 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1860 /*IsStore*/ true,
1861 /*IsUnitStrided*/ false);
1862 case Intrinsic::riscv_vssseg2_mask:
1863 case Intrinsic::riscv_vssseg3_mask:
1864 case Intrinsic::riscv_vssseg4_mask:
1865 case Intrinsic::riscv_vssseg5_mask:
1866 case Intrinsic::riscv_vssseg6_mask:
1867 case Intrinsic::riscv_vssseg7_mask:
1868 case Intrinsic::riscv_vssseg8_mask:
1869 case Intrinsic::riscv_vsoxseg2_mask:
1870 case Intrinsic::riscv_vsoxseg3_mask:
1871 case Intrinsic::riscv_vsoxseg4_mask:
1872 case Intrinsic::riscv_vsoxseg5_mask:
1873 case Intrinsic::riscv_vsoxseg6_mask:
1874 case Intrinsic::riscv_vsoxseg7_mask:
1875 case Intrinsic::riscv_vsoxseg8_mask:
1876 case Intrinsic::riscv_vsuxseg2_mask:
1877 case Intrinsic::riscv_vsuxseg3_mask:
1878 case Intrinsic::riscv_vsuxseg4_mask:
1879 case Intrinsic::riscv_vsuxseg5_mask:
1880 case Intrinsic::riscv_vsuxseg6_mask:
1881 case Intrinsic::riscv_vsuxseg7_mask:
1882 case Intrinsic::riscv_vsuxseg8_mask:
1883 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1884 /*IsStore*/ true,
1885 /*IsUnitStrided*/ false);
1886 }
1887}
1888
1890 const AddrMode &AM, Type *Ty,
1891 unsigned AS,
1892 Instruction *I) const {
1893 // No global is ever allowed as a base.
1894 if (AM.BaseGV)
1895 return false;
1896
1897 // None of our addressing modes allows a scalable offset
1898 if (AM.ScalableOffset)
1899 return false;
1900
1901 // RVV instructions only support register addressing.
1902 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1903 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1904
1905 // Require a 12-bit signed offset.
1906 if (!isInt<12>(AM.BaseOffs))
1907 return false;
1908
1909 switch (AM.Scale) {
1910 case 0: // "r+i" or just "i", depending on HasBaseReg.
1911 break;
1912 case 1:
1913 if (!AM.HasBaseReg) // allow "r+i".
1914 break;
1915 return false; // disallow "r+r" or "r+r+i".
1916 default:
1917 return false;
1918 }
1919
1920 return true;
1921}
1922
1924 return isInt<12>(Imm);
1925}
1926
1928 return isInt<12>(Imm);
1929}
1930
1931// On RV32, 64-bit integers are split into their high and low parts and held
1932// in two different registers, so the trunc is free since the low register can
1933// just be used.
1934// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1935// isTruncateFree?
1937 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1938 return false;
1939 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1940 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1941 return (SrcBits == 64 && DestBits == 32);
1942}
1943
1945 // We consider i64->i32 free on RV64 since we have good selection of W
1946 // instructions that make promoting operations back to i64 free in many cases.
1947 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1948 !DstVT.isInteger())
1949 return false;
1950 unsigned SrcBits = SrcVT.getSizeInBits();
1951 unsigned DestBits = DstVT.getSizeInBits();
1952 return (SrcBits == 64 && DestBits == 32);
1953}
1954
1956 EVT SrcVT = Val.getValueType();
1957 // free truncate from vnsrl and vnsra
1958 if (Subtarget.hasVInstructions() &&
1959 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1960 SrcVT.isVector() && VT2.isVector()) {
1961 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1962 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1963 if (SrcBits == DestBits * 2) {
1964 return true;
1965 }
1966 }
1967 return TargetLowering::isTruncateFree(Val, VT2);
1968}
1969
1971 // Zexts are free if they can be combined with a load.
1972 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1973 // poorly with type legalization of compares preferring sext.
1974 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1975 EVT MemVT = LD->getMemoryVT();
1976 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1977 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1978 LD->getExtensionType() == ISD::ZEXTLOAD))
1979 return true;
1980 }
1981
1982 return TargetLowering::isZExtFree(Val, VT2);
1983}
1984
1986 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1987}
1988
1990 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1991}
1992
1994 return Subtarget.hasStdExtZbb() ||
1995 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
1996}
1997
1999 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2000 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2001}
2002
2004 const Instruction &AndI) const {
2005 // We expect to be able to match a bit extraction instruction if the Zbs
2006 // extension is supported and the mask is a power of two. However, we
2007 // conservatively return false if the mask would fit in an ANDI instruction,
2008 // on the basis that it's possible the sinking+duplication of the AND in
2009 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2010 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2011 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2012 return false;
2013 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2014 if (!Mask)
2015 return false;
2016 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2017}
2018
2020 EVT VT = Y.getValueType();
2021
2022 // FIXME: Support vectors once we have tests.
2023 if (VT.isVector())
2024 return false;
2025
2026 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2027 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2028}
2029
2031 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2032 if (Subtarget.hasStdExtZbs())
2033 return X.getValueType().isScalarInteger();
2034 auto *C = dyn_cast<ConstantSDNode>(Y);
2035 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2036 if (Subtarget.hasVendorXTHeadBs())
2037 return C != nullptr;
2038 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2039 return C && C->getAPIntValue().ule(10);
2040}
2041
2043 EVT VT) const {
2044 // Only enable for rvv.
2045 if (!VT.isVector() || !Subtarget.hasVInstructions())
2046 return false;
2047
2048 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2049 return false;
2050
2051 return true;
2052}
2053
2055 Type *Ty) const {
2056 assert(Ty->isIntegerTy());
2057
2058 unsigned BitSize = Ty->getIntegerBitWidth();
2059 if (BitSize > Subtarget.getXLen())
2060 return false;
2061
2062 // Fast path, assume 32-bit immediates are cheap.
2063 int64_t Val = Imm.getSExtValue();
2064 if (isInt<32>(Val))
2065 return true;
2066
2067 // A constant pool entry may be more aligned thant he load we're trying to
2068 // replace. If we don't support unaligned scalar mem, prefer the constant
2069 // pool.
2070 // TODO: Can the caller pass down the alignment?
2071 if (!Subtarget.enableUnalignedScalarMem())
2072 return true;
2073
2074 // Prefer to keep the load if it would require many instructions.
2075 // This uses the same threshold we use for constant pools but doesn't
2076 // check useConstantPoolForLargeInts.
2077 // TODO: Should we keep the load only when we're definitely going to emit a
2078 // constant pool?
2079
2081 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2082}
2083
2087 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2088 SelectionDAG &DAG) const {
2089 // One interesting pattern that we'd want to form is 'bit extract':
2090 // ((1 >> Y) & 1) ==/!= 0
2091 // But we also need to be careful not to try to reverse that fold.
2092
2093 // Is this '((1 >> Y) & 1)'?
2094 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2095 return false; // Keep the 'bit extract' pattern.
2096
2097 // Will this be '((1 >> Y) & 1)' after the transform?
2098 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2099 return true; // Do form the 'bit extract' pattern.
2100
2101 // If 'X' is a constant, and we transform, then we will immediately
2102 // try to undo the fold, thus causing endless combine loop.
2103 // So only do the transform if X is not a constant. This matches the default
2104 // implementation of this function.
2105 return !XC;
2106}
2107
2109 unsigned Opc = VecOp.getOpcode();
2110
2111 // Assume target opcodes can't be scalarized.
2112 // TODO - do we have any exceptions?
2113 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2114 return false;
2115
2116 // If the vector op is not supported, try to convert to scalar.
2117 EVT VecVT = VecOp.getValueType();
2118 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2119 return true;
2120
2121 // If the vector op is supported, but the scalar op is not, the transform may
2122 // not be worthwhile.
2123 // Permit a vector binary operation can be converted to scalar binary
2124 // operation which is custom lowered with illegal type.
2125 EVT ScalarVT = VecVT.getScalarType();
2126 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2127 isOperationCustom(Opc, ScalarVT);
2128}
2129
2131 const GlobalAddressSDNode *GA) const {
2132 // In order to maximise the opportunity for common subexpression elimination,
2133 // keep a separate ADD node for the global address offset instead of folding
2134 // it in the global address node. Later peephole optimisations may choose to
2135 // fold it back in when profitable.
2136 return false;
2137}
2138
2139// Returns 0-31 if the fli instruction is available for the type and this is
2140// legal FP immediate for the type. Returns -1 otherwise.
2142 if (!Subtarget.hasStdExtZfa())
2143 return -1;
2144
2145 bool IsSupportedVT = false;
2146 if (VT == MVT::f16) {
2147 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2148 } else if (VT == MVT::f32) {
2149 IsSupportedVT = true;
2150 } else if (VT == MVT::f64) {
2151 assert(Subtarget.hasStdExtD() && "Expect D extension");
2152 IsSupportedVT = true;
2153 }
2154
2155 if (!IsSupportedVT)
2156 return -1;
2157
2158 return RISCVLoadFPImm::getLoadFPImm(Imm);
2159}
2160
2162 bool ForCodeSize) const {
2163 bool IsLegalVT = false;
2164 if (VT == MVT::f16)
2165 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2166 else if (VT == MVT::f32)
2167 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2168 else if (VT == MVT::f64)
2169 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2170 else if (VT == MVT::bf16)
2171 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2172
2173 if (!IsLegalVT)
2174 return false;
2175
2176 if (getLegalZfaFPImm(Imm, VT) >= 0)
2177 return true;
2178
2179 // Cannot create a 64 bit floating-point immediate value for rv32.
2180 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2181 // td can handle +0.0 or -0.0 already.
2182 // -0.0 can be created by fmv + fneg.
2183 return Imm.isZero();
2184 }
2185
2186 // Special case: fmv + fneg
2187 if (Imm.isNegZero())
2188 return true;
2189
2190 // Building an integer and then converting requires a fmv at the end of
2191 // the integer sequence. The fmv is not required for Zfinx.
2192 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2193 const int Cost =
2194 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2195 Subtarget.getXLen(), Subtarget);
2196 return Cost <= FPImmCost;
2197}
2198
2199// TODO: This is very conservative.
2201 unsigned Index) const {
2203 return false;
2204
2205 // Only support extracting a fixed from a fixed vector for now.
2206 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2207 return false;
2208
2209 EVT EltVT = ResVT.getVectorElementType();
2210 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2211
2212 // The smallest type we can slide is i8.
2213 // TODO: We can extract index 0 from a mask vector without a slide.
2214 if (EltVT == MVT::i1)
2215 return false;
2216
2217 unsigned ResElts = ResVT.getVectorNumElements();
2218 unsigned SrcElts = SrcVT.getVectorNumElements();
2219
2220 unsigned MinVLen = Subtarget.getRealMinVLen();
2221 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2222
2223 // If we're extracting only data from the first VLEN bits of the source
2224 // then we can always do this with an m1 vslidedown.vx. Restricting the
2225 // Index ensures we can use a vslidedown.vi.
2226 // TODO: We can generalize this when the exact VLEN is known.
2227 if (Index + ResElts <= MinVLMAX && Index < 31)
2228 return true;
2229
2230 // Convervatively only handle extracting half of a vector.
2231 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2232 // the upper half of a vector until we have more test coverage.
2233 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2234 // a cheap extract. However, this case is important in practice for
2235 // shuffled extracts of longer vectors. How resolve?
2236 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2237}
2238
2241 EVT VT) const {
2242 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2243 // We might still end up using a GPR but that will be decided based on ABI.
2244 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2245 !Subtarget.hasStdExtZfhminOrZhinxmin())
2246 return MVT::f32;
2247
2249
2250 return PartVT;
2251}
2252
2253unsigned
2255 std::optional<MVT> RegisterVT) const {
2256 // Pair inline assembly operand
2257 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2258 *RegisterVT == MVT::Untyped)
2259 return 1;
2260
2261 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2262}
2263
2266 EVT VT) const {
2267 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2268 // We might still end up using a GPR but that will be decided based on ABI.
2269 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2270 !Subtarget.hasStdExtZfhminOrZhinxmin())
2271 return 1;
2272
2274}
2275
2277 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2278 unsigned &NumIntermediates, MVT &RegisterVT) const {
2280 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2281
2282 return NumRegs;
2283}
2284
2285// Changes the condition code and swaps operands if necessary, so the SetCC
2286// operation matches one of the comparisons supported directly by branches
2287// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2288// with 1/-1.
2289static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2290 ISD::CondCode &CC, SelectionDAG &DAG) {
2291 // If this is a single bit test that can't be handled by ANDI, shift the
2292 // bit to be tested to the MSB and perform a signed compare with 0.
2293 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2294 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2295 isa<ConstantSDNode>(LHS.getOperand(1))) {
2296 uint64_t Mask = LHS.getConstantOperandVal(1);
2297 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2298 unsigned ShAmt = 0;
2299 if (isPowerOf2_64(Mask)) {
2301 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2302 } else {
2303 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2304 }
2305
2306 LHS = LHS.getOperand(0);
2307 if (ShAmt != 0)
2308 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2309 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2310 return;
2311 }
2312 }
2313
2314 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2315 int64_t C = RHSC->getSExtValue();
2316 switch (CC) {
2317 default: break;
2318 case ISD::SETGT:
2319 // Convert X > -1 to X >= 0.
2320 if (C == -1) {
2321 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2322 CC = ISD::SETGE;
2323 return;
2324 }
2325 break;
2326 case ISD::SETLT:
2327 // Convert X < 1 to 0 >= X.
2328 if (C == 1) {
2329 RHS = LHS;
2330 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2331 CC = ISD::SETGE;
2332 return;
2333 }
2334 break;
2335 }
2336 }
2337
2338 switch (CC) {
2339 default:
2340 break;
2341 case ISD::SETGT:
2342 case ISD::SETLE:
2343 case ISD::SETUGT:
2344 case ISD::SETULE:
2346 std::swap(LHS, RHS);
2347 break;
2348 }
2349}
2350
2352 if (VT.isRISCVVectorTuple()) {
2353 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2354 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2355 return RISCVII::LMUL_F8;
2356 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2357 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2358 return RISCVII::LMUL_F4;
2359 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2360 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2361 return RISCVII::LMUL_F2;
2362 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2363 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2364 return RISCVII::LMUL_1;
2365 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2366 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2367 return RISCVII::LMUL_2;
2368 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2369 return RISCVII::LMUL_4;
2370 llvm_unreachable("Invalid vector tuple type LMUL.");
2371 }
2372
2373 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2374 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2375 if (VT.getVectorElementType() == MVT::i1)
2376 KnownSize *= 8;
2377
2378 switch (KnownSize) {
2379 default:
2380 llvm_unreachable("Invalid LMUL.");
2381 case 8:
2383 case 16:
2385 case 32:
2387 case 64:
2389 case 128:
2391 case 256:
2393 case 512:
2395 }
2396}
2397
2399 switch (LMul) {
2400 default:
2401 llvm_unreachable("Invalid LMUL.");
2406 return RISCV::VRRegClassID;
2408 return RISCV::VRM2RegClassID;
2410 return RISCV::VRM4RegClassID;
2412 return RISCV::VRM8RegClassID;
2413 }
2414}
2415
2416unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2417 RISCVII::VLMUL LMUL = getLMUL(VT);
2418 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2419 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2420 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2421 LMUL == RISCVII::VLMUL::LMUL_1) {
2422 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2423 "Unexpected subreg numbering");
2424 return RISCV::sub_vrm1_0 + Index;
2425 }
2426 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2427 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm2_0 + Index;
2430 }
2431 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2432 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2433 "Unexpected subreg numbering");
2434 return RISCV::sub_vrm4_0 + Index;
2435 }
2436 llvm_unreachable("Invalid vector type.");
2437}
2438
2440 if (VT.isRISCVVectorTuple()) {
2441 unsigned NF = VT.getRISCVVectorTupleNumFields();
2442 unsigned RegsPerField =
2443 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2444 (NF * RISCV::RVVBitsPerBlock));
2445 switch (RegsPerField) {
2446 case 1:
2447 if (NF == 2)
2448 return RISCV::VRN2M1RegClassID;
2449 if (NF == 3)
2450 return RISCV::VRN3M1RegClassID;
2451 if (NF == 4)
2452 return RISCV::VRN4M1RegClassID;
2453 if (NF == 5)
2454 return RISCV::VRN5M1RegClassID;
2455 if (NF == 6)
2456 return RISCV::VRN6M1RegClassID;
2457 if (NF == 7)
2458 return RISCV::VRN7M1RegClassID;
2459 if (NF == 8)
2460 return RISCV::VRN8M1RegClassID;
2461 break;
2462 case 2:
2463 if (NF == 2)
2464 return RISCV::VRN2M2RegClassID;
2465 if (NF == 3)
2466 return RISCV::VRN3M2RegClassID;
2467 if (NF == 4)
2468 return RISCV::VRN4M2RegClassID;
2469 break;
2470 case 4:
2471 assert(NF == 2);
2472 return RISCV::VRN2M4RegClassID;
2473 default:
2474 break;
2475 }
2476 llvm_unreachable("Invalid vector tuple type RegClass.");
2477 }
2478
2479 if (VT.getVectorElementType() == MVT::i1)
2480 return RISCV::VRRegClassID;
2481 return getRegClassIDForLMUL(getLMUL(VT));
2482}
2483
2484// Attempt to decompose a subvector insert/extract between VecVT and
2485// SubVecVT via subregister indices. Returns the subregister index that
2486// can perform the subvector insert/extract with the given element index, as
2487// well as the index corresponding to any leftover subvectors that must be
2488// further inserted/extracted within the register class for SubVecVT.
2489std::pair<unsigned, unsigned>
2491 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2492 const RISCVRegisterInfo *TRI) {
2493 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2494 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2495 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2496 "Register classes not ordered");
2497 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2498 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2499
2500 // If VecVT is a vector tuple type, either it's the tuple type with same
2501 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2502 if (VecVT.isRISCVVectorTuple()) {
2503 if (VecRegClassID == SubRegClassID)
2504 return {RISCV::NoSubRegister, 0};
2505
2506 assert(SubVecVT.isScalableVector() &&
2507 "Only allow scalable vector subvector.");
2508 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2509 "Invalid vector tuple insert/extract for vector and subvector with "
2510 "different LMUL.");
2511 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2512 }
2513
2514 // Try to compose a subregister index that takes us from the incoming
2515 // LMUL>1 register class down to the outgoing one. At each step we half
2516 // the LMUL:
2517 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2518 // Note that this is not guaranteed to find a subregister index, such as
2519 // when we are extracting from one VR type to another.
2520 unsigned SubRegIdx = RISCV::NoSubRegister;
2521 for (const unsigned RCID :
2522 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2523 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2524 VecVT = VecVT.getHalfNumVectorElementsVT();
2525 bool IsHi =
2526 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2527 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2528 getSubregIndexByMVT(VecVT, IsHi));
2529 if (IsHi)
2530 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2531 }
2532 return {SubRegIdx, InsertExtractIdx};
2533}
2534
2535// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2536// stores for those types.
2537bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2538 return !Subtarget.useRVVForFixedLengthVectors() ||
2539 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2540}
2541
2543 if (!ScalarTy.isSimple())
2544 return false;
2545 switch (ScalarTy.getSimpleVT().SimpleTy) {
2546 case MVT::iPTR:
2547 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2548 case MVT::i8:
2549 case MVT::i16:
2550 case MVT::i32:
2551 return true;
2552 case MVT::i64:
2553 return Subtarget.hasVInstructionsI64();
2554 case MVT::f16:
2555 return Subtarget.hasVInstructionsF16Minimal();
2556 case MVT::bf16:
2557 return Subtarget.hasVInstructionsBF16Minimal();
2558 case MVT::f32:
2559 return Subtarget.hasVInstructionsF32();
2560 case MVT::f64:
2561 return Subtarget.hasVInstructionsF64();
2562 default:
2563 return false;
2564 }
2565}
2566
2567
2568unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2569 return NumRepeatedDivisors;
2570}
2571
2573 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2574 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2575 "Unexpected opcode");
2576 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2577 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2579 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2580 if (!II)
2581 return SDValue();
2582 return Op.getOperand(II->VLOperand + 1 + HasChain);
2583}
2584
2586 const RISCVSubtarget &Subtarget) {
2587 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2588 if (!Subtarget.useRVVForFixedLengthVectors())
2589 return false;
2590
2591 // We only support a set of vector types with a consistent maximum fixed size
2592 // across all supported vector element types to avoid legalization issues.
2593 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2594 // fixed-length vector type we support is 1024 bytes.
2595 if (VT.getFixedSizeInBits() > 1024 * 8)
2596 return false;
2597
2598 unsigned MinVLen = Subtarget.getRealMinVLen();
2599
2600 MVT EltVT = VT.getVectorElementType();
2601
2602 // Don't use RVV for vectors we cannot scalarize if required.
2603 switch (EltVT.SimpleTy) {
2604 // i1 is supported but has different rules.
2605 default:
2606 return false;
2607 case MVT::i1:
2608 // Masks can only use a single register.
2609 if (VT.getVectorNumElements() > MinVLen)
2610 return false;
2611 MinVLen /= 8;
2612 break;
2613 case MVT::i8:
2614 case MVT::i16:
2615 case MVT::i32:
2616 break;
2617 case MVT::i64:
2618 if (!Subtarget.hasVInstructionsI64())
2619 return false;
2620 break;
2621 case MVT::f16:
2622 if (!Subtarget.hasVInstructionsF16Minimal())
2623 return false;
2624 break;
2625 case MVT::bf16:
2626 if (!Subtarget.hasVInstructionsBF16Minimal())
2627 return false;
2628 break;
2629 case MVT::f32:
2630 if (!Subtarget.hasVInstructionsF32())
2631 return false;
2632 break;
2633 case MVT::f64:
2634 if (!Subtarget.hasVInstructionsF64())
2635 return false;
2636 break;
2637 }
2638
2639 // Reject elements larger than ELEN.
2640 if (EltVT.getSizeInBits() > Subtarget.getELen())
2641 return false;
2642
2643 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2644 // Don't use RVV for types that don't fit.
2645 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2646 return false;
2647
2648 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2649 // the base fixed length RVV support in place.
2650 if (!VT.isPow2VectorType())
2651 return false;
2652
2653 return true;
2654}
2655
2656bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2657 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2658}
2659
2660// Return the largest legal scalable vector type that matches VT's element type.
2662 const RISCVSubtarget &Subtarget) {
2663 // This may be called before legal types are setup.
2664 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2665 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2666 "Expected legal fixed length vector!");
2667
2668 unsigned MinVLen = Subtarget.getRealMinVLen();
2669 unsigned MaxELen = Subtarget.getELen();
2670
2671 MVT EltVT = VT.getVectorElementType();
2672 switch (EltVT.SimpleTy) {
2673 default:
2674 llvm_unreachable("unexpected element type for RVV container");
2675 case MVT::i1:
2676 case MVT::i8:
2677 case MVT::i16:
2678 case MVT::i32:
2679 case MVT::i64:
2680 case MVT::bf16:
2681 case MVT::f16:
2682 case MVT::f32:
2683 case MVT::f64: {
2684 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2685 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2686 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2687 unsigned NumElts =
2689 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2690 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2691 return MVT::getScalableVectorVT(EltVT, NumElts);
2692 }
2693 }
2694}
2695
2697 const RISCVSubtarget &Subtarget) {
2699 Subtarget);
2700}
2701
2703 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2704}
2705
2706// Grow V to consume an entire RVV register.
2708 const RISCVSubtarget &Subtarget) {
2709 assert(VT.isScalableVector() &&
2710 "Expected to convert into a scalable vector!");
2711 assert(V.getValueType().isFixedLengthVector() &&
2712 "Expected a fixed length vector operand!");
2713 SDLoc DL(V);
2714 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2715 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2716}
2717
2718// Shrink V so it's just big enough to maintain a VT's worth of data.
2720 const RISCVSubtarget &Subtarget) {
2722 "Expected to convert into a fixed length vector!");
2723 assert(V.getValueType().isScalableVector() &&
2724 "Expected a scalable vector operand!");
2725 SDLoc DL(V);
2726 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2727 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2728}
2729
2730/// Return the type of the mask type suitable for masking the provided
2731/// vector type. This is simply an i1 element type vector of the same
2732/// (possibly scalable) length.
2733static MVT getMaskTypeFor(MVT VecVT) {
2734 assert(VecVT.isVector());
2736 return MVT::getVectorVT(MVT::i1, EC);
2737}
2738
2739/// Creates an all ones mask suitable for masking a vector of type VecTy with
2740/// vector length VL. .
2741static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2742 SelectionDAG &DAG) {
2743 MVT MaskVT = getMaskTypeFor(VecVT);
2744 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2745}
2746
2747static std::pair<SDValue, SDValue>
2749 const RISCVSubtarget &Subtarget) {
2750 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2751 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2752 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2753 return {Mask, VL};
2754}
2755
2756static std::pair<SDValue, SDValue>
2757getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2758 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2759 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2760 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2761 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2762 return {Mask, VL};
2763}
2764
2765// Gets the two common "VL" operands: an all-ones mask and the vector length.
2766// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2767// the vector type that the fixed-length vector is contained in. Otherwise if
2768// VecVT is scalable, then ContainerVT should be the same as VecVT.
2769static std::pair<SDValue, SDValue>
2770getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2771 const RISCVSubtarget &Subtarget) {
2772 if (VecVT.isFixedLengthVector())
2773 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2774 Subtarget);
2775 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2776 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2777}
2778
2780 SelectionDAG &DAG) const {
2781 assert(VecVT.isScalableVector() && "Expected scalable vector");
2782 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2783 VecVT.getVectorElementCount());
2784}
2785
2786std::pair<unsigned, unsigned>
2788 const RISCVSubtarget &Subtarget) {
2789 assert(VecVT.isScalableVector() && "Expected scalable vector");
2790
2791 unsigned EltSize = VecVT.getScalarSizeInBits();
2792 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2793
2794 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2795 unsigned MaxVLMAX =
2796 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2797
2798 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2799 unsigned MinVLMAX =
2800 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2801
2802 return std::make_pair(MinVLMAX, MaxVLMAX);
2803}
2804
2805// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2806// of either is (currently) supported. This can get us into an infinite loop
2807// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2808// as a ..., etc.
2809// Until either (or both) of these can reliably lower any node, reporting that
2810// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2811// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2812// which is not desirable.
2814 EVT VT, unsigned DefinedValues) const {
2815 return false;
2816}
2817
2819 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2820 // implementation-defined.
2821 if (!VT.isVector())
2823 unsigned DLenFactor = Subtarget.getDLenFactor();
2824 unsigned Cost;
2825 if (VT.isScalableVector()) {
2826 unsigned LMul;
2827 bool Fractional;
2828 std::tie(LMul, Fractional) =
2830 if (Fractional)
2831 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2832 else
2833 Cost = (LMul * DLenFactor);
2834 } else {
2835 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2836 }
2837 return Cost;
2838}
2839
2840
2841/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2842/// is generally quadratic in the number of vreg implied by LMUL. Note that
2843/// operand (index and possibly mask) are handled separately.
2845 return getLMULCost(VT) * getLMULCost(VT);
2846}
2847
2848/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2849/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2850/// or may track the vrgather.vv cost. It is implementation-dependent.
2852 return getLMULCost(VT);
2853}
2854
2855/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2856/// for the type VT. (This does not cover the vslide1up or vslide1down
2857/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2858/// or may track the vrgather.vv cost. It is implementation-dependent.
2860 return getLMULCost(VT);
2861}
2862
2863/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2864/// for the type VT. (This does not cover the vslide1up or vslide1down
2865/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2866/// or may track the vrgather.vv cost. It is implementation-dependent.
2868 return getLMULCost(VT);
2869}
2870
2872 const RISCVSubtarget &Subtarget) {
2873 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2874 // bf16 conversions are always promoted to f32.
2875 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2876 Op.getValueType() == MVT::bf16) {
2877 bool IsStrict = Op->isStrictFPOpcode();
2878
2879 SDLoc DL(Op);
2880 if (IsStrict) {
2881 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2882 {Op.getOperand(0), Op.getOperand(1)});
2883 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2884 {Op.getValueType(), MVT::Other},
2885 {Val.getValue(1), Val.getValue(0),
2886 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2887 }
2888 return DAG.getNode(
2889 ISD::FP_ROUND, DL, Op.getValueType(),
2890 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2892 }
2893
2894 // Other operations are legal.
2895 return Op;
2896}
2897
2899 const RISCVSubtarget &Subtarget) {
2900 // RISC-V FP-to-int conversions saturate to the destination register size, but
2901 // don't produce 0 for nan. We can use a conversion instruction and fix the
2902 // nan case with a compare and a select.
2903 SDValue Src = Op.getOperand(0);
2904
2905 MVT DstVT = Op.getSimpleValueType();
2906 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2907
2908 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2909
2910 if (!DstVT.isVector()) {
2911 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2912 // the result.
2913 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2914 Src.getValueType() == MVT::bf16) {
2915 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2916 }
2917
2918 unsigned Opc;
2919 if (SatVT == DstVT)
2920 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2921 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2923 else
2924 return SDValue();
2925 // FIXME: Support other SatVTs by clamping before or after the conversion.
2926
2927 SDLoc DL(Op);
2928 SDValue FpToInt = DAG.getNode(
2929 Opc, DL, DstVT, Src,
2931
2932 if (Opc == RISCVISD::FCVT_WU_RV64)
2933 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2934
2935 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2936 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2938 }
2939
2940 // Vectors.
2941
2942 MVT DstEltVT = DstVT.getVectorElementType();
2943 MVT SrcVT = Src.getSimpleValueType();
2944 MVT SrcEltVT = SrcVT.getVectorElementType();
2945 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2946 unsigned DstEltSize = DstEltVT.getSizeInBits();
2947
2948 // Only handle saturating to the destination type.
2949 if (SatVT != DstEltVT)
2950 return SDValue();
2951
2952 MVT DstContainerVT = DstVT;
2953 MVT SrcContainerVT = SrcVT;
2954 if (DstVT.isFixedLengthVector()) {
2955 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2956 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2957 assert(DstContainerVT.getVectorElementCount() ==
2958 SrcContainerVT.getVectorElementCount() &&
2959 "Expected same element count");
2960 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2961 }
2962
2963 SDLoc DL(Op);
2964
2965 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2966
2967 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2968 {Src, Src, DAG.getCondCode(ISD::SETNE),
2969 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2970
2971 // Need to widen by more than 1 step, promote the FP type, then do a widening
2972 // convert.
2973 if (DstEltSize > (2 * SrcEltSize)) {
2974 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2975 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2976 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2977 }
2978
2979 MVT CvtContainerVT = DstContainerVT;
2980 MVT CvtEltVT = DstEltVT;
2981 if (SrcEltSize > (2 * DstEltSize)) {
2982 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2983 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2984 }
2985
2986 unsigned RVVOpc =
2988 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2989
2990 while (CvtContainerVT != DstContainerVT) {
2991 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2992 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2993 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2994 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
2996 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
2997 }
2998
2999 SDValue SplatZero = DAG.getNode(
3000 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3001 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3002 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3003 Res, DAG.getUNDEF(DstContainerVT), VL);
3004
3005 if (DstVT.isFixedLengthVector())
3006 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3007
3008 return Res;
3009}
3010
3012 const RISCVSubtarget &Subtarget) {
3013 bool IsStrict = Op->isStrictFPOpcode();
3014 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3015
3016 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3017 // bf16 conversions are always promoted to f32.
3018 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3019 SrcVal.getValueType() == MVT::bf16) {
3020 SDLoc DL(Op);
3021 if (IsStrict) {
3022 SDValue Ext =
3023 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3024 {Op.getOperand(0), SrcVal});
3025 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3026 {Ext.getValue(1), Ext.getValue(0)});
3027 }
3028 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3029 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3030 }
3031
3032 // Other operations are legal.
3033 return Op;
3034}
3035
3037 switch (Opc) {
3038 case ISD::FROUNDEVEN:
3040 case ISD::VP_FROUNDEVEN:
3041 return RISCVFPRndMode::RNE;
3042 case ISD::FTRUNC:
3043 case ISD::STRICT_FTRUNC:
3044 case ISD::VP_FROUNDTOZERO:
3045 return RISCVFPRndMode::RTZ;
3046 case ISD::FFLOOR:
3047 case ISD::STRICT_FFLOOR:
3048 case ISD::VP_FFLOOR:
3049 return RISCVFPRndMode::RDN;
3050 case ISD::FCEIL:
3051 case ISD::STRICT_FCEIL:
3052 case ISD::VP_FCEIL:
3053 return RISCVFPRndMode::RUP;
3054 case ISD::FROUND:
3055 case ISD::STRICT_FROUND:
3056 case ISD::VP_FROUND:
3057 return RISCVFPRndMode::RMM;
3058 case ISD::FRINT:
3059 case ISD::VP_FRINT:
3060 return RISCVFPRndMode::DYN;
3061 }
3062
3064}
3065
3066// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3067// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3068// the integer domain and back. Taking care to avoid converting values that are
3069// nan or already correct.
3070static SDValue
3072 const RISCVSubtarget &Subtarget) {
3073 MVT VT = Op.getSimpleValueType();
3074 assert(VT.isVector() && "Unexpected type");
3075
3076 SDLoc DL(Op);
3077
3078 SDValue Src = Op.getOperand(0);
3079
3080 MVT ContainerVT = VT;
3081 if (VT.isFixedLengthVector()) {
3082 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3083 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3084 }
3085
3086 SDValue Mask, VL;
3087 if (Op->isVPOpcode()) {
3088 Mask = Op.getOperand(1);
3089 if (VT.isFixedLengthVector())
3090 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3091 Subtarget);
3092 VL = Op.getOperand(2);
3093 } else {
3094 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3095 }
3096
3097 // Freeze the source since we are increasing the number of uses.
3098 Src = DAG.getFreeze(Src);
3099
3100 // We do the conversion on the absolute value and fix the sign at the end.
3101 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3102
3103 // Determine the largest integer that can be represented exactly. This and
3104 // values larger than it don't have any fractional bits so don't need to
3105 // be converted.
3106 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3107 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3108 APFloat MaxVal = APFloat(FltSem);
3109 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3110 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3111 SDValue MaxValNode =
3112 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3113 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3114 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3115
3116 // If abs(Src) was larger than MaxVal or nan, keep it.
3117 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3118 Mask =
3119 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3120 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3121 Mask, Mask, VL});
3122
3123 // Truncate to integer and convert back to FP.
3124 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3125 MVT XLenVT = Subtarget.getXLenVT();
3126 SDValue Truncated;
3127
3128 switch (Op.getOpcode()) {
3129 default:
3130 llvm_unreachable("Unexpected opcode");
3131 case ISD::FRINT:
3132 case ISD::VP_FRINT:
3133 case ISD::FCEIL:
3134 case ISD::VP_FCEIL:
3135 case ISD::FFLOOR:
3136 case ISD::VP_FFLOOR:
3137 case ISD::FROUND:
3138 case ISD::FROUNDEVEN:
3139 case ISD::VP_FROUND:
3140 case ISD::VP_FROUNDEVEN:
3141 case ISD::VP_FROUNDTOZERO: {
3144 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3145 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3146 break;
3147 }
3148 case ISD::FTRUNC:
3149 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3150 Mask, VL);
3151 break;
3152 case ISD::FNEARBYINT:
3153 case ISD::VP_FNEARBYINT:
3154 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3155 Mask, VL);
3156 break;
3157 }
3158
3159 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3160 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3161 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3162 Mask, VL);
3163
3164 // Restore the original sign so that -0.0 is preserved.
3165 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3166 Src, Src, Mask, VL);
3167
3168 if (!VT.isFixedLengthVector())
3169 return Truncated;
3170
3171 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3172}
3173
3174// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3175// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3176// qNan and coverting the new source to integer and back to FP.
3177static SDValue
3179 const RISCVSubtarget &Subtarget) {
3180 SDLoc DL(Op);
3181 MVT VT = Op.getSimpleValueType();
3182 SDValue Chain = Op.getOperand(0);
3183 SDValue Src = Op.getOperand(1);
3184
3185 MVT ContainerVT = VT;
3186 if (VT.isFixedLengthVector()) {
3187 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3188 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3189 }
3190
3191 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3192
3193 // Freeze the source since we are increasing the number of uses.
3194 Src = DAG.getFreeze(Src);
3195
3196 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3197 MVT MaskVT = Mask.getSimpleValueType();
3199 DAG.getVTList(MaskVT, MVT::Other),
3200 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3201 DAG.getUNDEF(MaskVT), Mask, VL});
3202 Chain = Unorder.getValue(1);
3204 DAG.getVTList(ContainerVT, MVT::Other),
3205 {Chain, Src, Src, Src, Unorder, VL});
3206 Chain = Src.getValue(1);
3207
3208 // We do the conversion on the absolute value and fix the sign at the end.
3209 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3210
3211 // Determine the largest integer that can be represented exactly. This and
3212 // values larger than it don't have any fractional bits so don't need to
3213 // be converted.
3214 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3215 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3216 APFloat MaxVal = APFloat(FltSem);
3217 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3218 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3219 SDValue MaxValNode =
3220 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3221 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3222 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3223
3224 // If abs(Src) was larger than MaxVal or nan, keep it.
3225 Mask = DAG.getNode(
3226 RISCVISD::SETCC_VL, DL, MaskVT,
3227 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3228
3229 // Truncate to integer and convert back to FP.
3230 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3231 MVT XLenVT = Subtarget.getXLenVT();
3232 SDValue Truncated;
3233
3234 switch (Op.getOpcode()) {
3235 default:
3236 llvm_unreachable("Unexpected opcode");
3237 case ISD::STRICT_FCEIL:
3238 case ISD::STRICT_FFLOOR:
3239 case ISD::STRICT_FROUND:
3243 Truncated = DAG.getNode(
3244 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3245 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3246 break;
3247 }
3248 case ISD::STRICT_FTRUNC:
3249 Truncated =
3251 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3252 break;
3255 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3256 Mask, VL);
3257 break;
3258 }
3259 Chain = Truncated.getValue(1);
3260
3261 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3262 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3263 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3264 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3265 Truncated, Mask, VL);
3266 Chain = Truncated.getValue(1);
3267 }
3268
3269 // Restore the original sign so that -0.0 is preserved.
3270 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3271 Src, Src, Mask, VL);
3272
3273 if (VT.isFixedLengthVector())
3274 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3275 return DAG.getMergeValues({Truncated, Chain}, DL);
3276}
3277
3278static SDValue
3280 const RISCVSubtarget &Subtarget) {
3281 MVT VT = Op.getSimpleValueType();
3282 if (VT.isVector())
3283 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3284
3285 if (DAG.shouldOptForSize())
3286 return SDValue();
3287
3288 SDLoc DL(Op);
3289 SDValue Src = Op.getOperand(0);
3290
3291 // Create an integer the size of the mantissa with the MSB set. This and all
3292 // values larger than it don't have any fractional bits so don't need to be
3293 // converted.
3294 const fltSemantics &FltSem = VT.getFltSemantics();
3295 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3296 APFloat MaxVal = APFloat(FltSem);
3297 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3298 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3299 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3300
3302 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3303 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3304}
3305
3306// Expand vector LRINT and LLRINT by converting to the integer domain.
3308 const RISCVSubtarget &Subtarget) {
3309 MVT VT = Op.getSimpleValueType();
3310 assert(VT.isVector() && "Unexpected type");
3311
3312 SDLoc DL(Op);
3313 SDValue Src = Op.getOperand(0);
3314 MVT ContainerVT = VT;
3315
3316 if (VT.isFixedLengthVector()) {
3317 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3318 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3319 }
3320
3321 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3322 SDValue Truncated = DAG.getNode(
3323 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3325 VL);
3326
3327 if (!VT.isFixedLengthVector())
3328 return Truncated;
3329
3330 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3331}
3332
3333static SDValue
3335 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3336 SDValue Offset, SDValue Mask, SDValue VL,
3338 if (Passthru.isUndef())
3340 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3341 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3342 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3343}
3344
3345static SDValue
3346getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3347 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3348 SDValue VL,
3350 if (Passthru.isUndef())
3352 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3353 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3354 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3355}
3356
3357static MVT getLMUL1VT(MVT VT) {
3359 "Unexpected vector MVT");
3363}
3364
3368 int64_t Addend;
3369};
3370
3371static std::optional<APInt> getExactInteger(const APFloat &APF,
3373 // We will use a SINT_TO_FP to materialize this constant so we should use a
3374 // signed APSInt here.
3375 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3376 // We use an arbitrary rounding mode here. If a floating-point is an exact
3377 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3378 // the rounding mode changes the output value, then it is not an exact
3379 // integer.
3381 bool IsExact;
3382 // If it is out of signed integer range, it will return an invalid operation.
3383 // If it is not an exact integer, IsExact is false.
3384 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3386 !IsExact)
3387 return std::nullopt;
3388 return ValInt.extractBits(BitWidth, 0);
3389}
3390
3391// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3392// to the (non-zero) step S and start value X. This can be then lowered as the
3393// RVV sequence (VID * S) + X, for example.
3394// The step S is represented as an integer numerator divided by a positive
3395// denominator. Note that the implementation currently only identifies
3396// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3397// cannot detect 2/3, for example.
3398// Note that this method will also match potentially unappealing index
3399// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3400// determine whether this is worth generating code for.
3401//
3402// EltSizeInBits is the size of the type that the sequence will be calculated
3403// in, i.e. SEW for build_vectors or XLEN for address calculations.
3404static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3405 unsigned EltSizeInBits) {
3406 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3407 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3408 return std::nullopt;
3409 bool IsInteger = Op.getValueType().isInteger();
3410
3411 std::optional<unsigned> SeqStepDenom;
3412 std::optional<APInt> SeqStepNum;
3413 std::optional<APInt> SeqAddend;
3414 std::optional<std::pair<APInt, unsigned>> PrevElt;
3415 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3416
3417 // First extract the ops into a list of constant integer values. This may not
3418 // be possible for floats if they're not all representable as integers.
3420 const unsigned OpSize = Op.getScalarValueSizeInBits();
3421 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3422 if (Elt.isUndef()) {
3423 Elts[Idx] = std::nullopt;
3424 continue;
3425 }
3426 if (IsInteger) {
3427 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3428 } else {
3429 auto ExactInteger =
3430 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3431 if (!ExactInteger)
3432 return std::nullopt;
3433 Elts[Idx] = *ExactInteger;
3434 }
3435 }
3436
3437 for (auto [Idx, Elt] : enumerate(Elts)) {
3438 // Assume undef elements match the sequence; we just have to be careful
3439 // when interpolating across them.
3440 if (!Elt)
3441 continue;
3442
3443 if (PrevElt) {
3444 // Calculate the step since the last non-undef element, and ensure
3445 // it's consistent across the entire sequence.
3446 unsigned IdxDiff = Idx - PrevElt->second;
3447 APInt ValDiff = *Elt - PrevElt->first;
3448
3449 // A zero-value value difference means that we're somewhere in the middle
3450 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3451 // step change before evaluating the sequence.
3452 if (ValDiff == 0)
3453 continue;
3454
3455 int64_t Remainder = ValDiff.srem(IdxDiff);
3456 // Normalize the step if it's greater than 1.
3457 if (Remainder != ValDiff.getSExtValue()) {
3458 // The difference must cleanly divide the element span.
3459 if (Remainder != 0)
3460 return std::nullopt;
3461 ValDiff = ValDiff.sdiv(IdxDiff);
3462 IdxDiff = 1;
3463 }
3464
3465 if (!SeqStepNum)
3466 SeqStepNum = ValDiff;
3467 else if (ValDiff != SeqStepNum)
3468 return std::nullopt;
3469
3470 if (!SeqStepDenom)
3471 SeqStepDenom = IdxDiff;
3472 else if (IdxDiff != *SeqStepDenom)
3473 return std::nullopt;
3474 }
3475
3476 // Record this non-undef element for later.
3477 if (!PrevElt || PrevElt->first != *Elt)
3478 PrevElt = std::make_pair(*Elt, Idx);
3479 }
3480
3481 // We need to have logged a step for this to count as a legal index sequence.
3482 if (!SeqStepNum || !SeqStepDenom)
3483 return std::nullopt;
3484
3485 // Loop back through the sequence and validate elements we might have skipped
3486 // while waiting for a valid step. While doing this, log any sequence addend.
3487 for (auto [Idx, Elt] : enumerate(Elts)) {
3488 if (!Elt)
3489 continue;
3490 APInt ExpectedVal =
3491 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3492 *SeqStepNum)
3493 .sdiv(*SeqStepDenom);
3494
3495 APInt Addend = *Elt - ExpectedVal;
3496 if (!SeqAddend)
3497 SeqAddend = Addend;
3498 else if (Addend != SeqAddend)
3499 return std::nullopt;
3500 }
3501
3502 assert(SeqAddend && "Must have an addend if we have a step");
3503
3504 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3505 SeqAddend->getSExtValue()};
3506}
3507
3508// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3509// and lower it as a VRGATHER_VX_VL from the source vector.
3510static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3511 SelectionDAG &DAG,
3512 const RISCVSubtarget &Subtarget) {
3513 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3514 return SDValue();
3515 SDValue Vec = SplatVal.getOperand(0);
3516 // Don't perform this optimization for i1 vectors, or if the element types are
3517 // different
3518 // FIXME: Support i1 vectors, maybe by promoting to i8?
3519 MVT EltTy = VT.getVectorElementType();
3520 if (EltTy == MVT::i1 ||
3522 return SDValue();
3523 SDValue Idx = SplatVal.getOperand(1);
3524 // The index must be a legal type.
3525 if (Idx.getValueType() != Subtarget.getXLenVT())
3526 return SDValue();
3527
3528 // Check that Index lies within VT
3529 // TODO: Can we check if the Index is constant and known in-bounds?
3531 return SDValue();
3532
3533 MVT ContainerVT = VT;
3534 if (VT.isFixedLengthVector())
3535 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3536
3537 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3538 DAG.getUNDEF(ContainerVT), Vec,
3539 DAG.getVectorIdxConstant(0, DL));
3540
3541 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3542
3543 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
3544 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3545
3546 if (!VT.isFixedLengthVector())
3547 return Gather;
3548
3549 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
3550}
3551
3552/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3553/// which constitute a large proportion of the elements. In such cases we can
3554/// splat a vector with the dominant element and make up the shortfall with
3555/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3556/// Note that this includes vectors of 2 elements by association. The
3557/// upper-most element is the "dominant" one, allowing us to use a splat to
3558/// "insert" the upper element, and an insert of the lower element at position
3559/// 0, which improves codegen.
3561 const RISCVSubtarget &Subtarget) {
3562 MVT VT = Op.getSimpleValueType();
3563 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3564
3565 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3566
3567 SDLoc DL(Op);
3568 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3569
3570 MVT XLenVT = Subtarget.getXLenVT();
3571 unsigned NumElts = Op.getNumOperands();
3572
3573 SDValue DominantValue;
3574 unsigned MostCommonCount = 0;
3575 DenseMap<SDValue, unsigned> ValueCounts;
3576 unsigned NumUndefElts =
3577 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3578
3579 // Track the number of scalar loads we know we'd be inserting, estimated as
3580 // any non-zero floating-point constant. Other kinds of element are either
3581 // already in registers or are materialized on demand. The threshold at which
3582 // a vector load is more desirable than several scalar materializion and
3583 // vector-insertion instructions is not known.
3584 unsigned NumScalarLoads = 0;
3585
3586 for (SDValue V : Op->op_values()) {
3587 if (V.isUndef())
3588 continue;
3589
3590 unsigned &Count = ValueCounts[V];
3591 if (0 == Count)
3592 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3593 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3594
3595 // Is this value dominant? In case of a tie, prefer the highest element as
3596 // it's cheaper to insert near the beginning of a vector than it is at the
3597 // end.
3598 if (++Count >= MostCommonCount) {
3599 DominantValue = V;
3600 MostCommonCount = Count;
3601 }
3602 }
3603
3604 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3605 unsigned NumDefElts = NumElts - NumUndefElts;
3606 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3607
3608 // Don't perform this optimization when optimizing for size, since
3609 // materializing elements and inserting them tends to cause code bloat.
3610 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3611 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3612 ((MostCommonCount > DominantValueCountThreshold) ||
3613 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3614 // Start by splatting the most common element.
3615 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3616
3617 DenseSet<SDValue> Processed{DominantValue};
3618
3619 // We can handle an insert into the last element (of a splat) via
3620 // v(f)slide1down. This is slightly better than the vslideup insert
3621 // lowering as it avoids the need for a vector group temporary. It
3622 // is also better than using vmerge.vx as it avoids the need to
3623 // materialize the mask in a vector register.
3624 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3625 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3626 LastOp != DominantValue) {
3627 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3628 auto OpCode =
3630 if (!VT.isFloatingPoint())
3631 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3632 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3633 LastOp, Mask, VL);
3634 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3635 Processed.insert(LastOp);
3636 }
3637
3638 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3639 for (const auto &OpIdx : enumerate(Op->ops())) {
3640 const SDValue &V = OpIdx.value();
3641 if (V.isUndef() || !Processed.insert(V).second)
3642 continue;
3643 if (ValueCounts[V] == 1) {
3644 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3645 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3646 } else {
3647 // Blend in all instances of this value using a VSELECT, using a
3648 // mask where each bit signals whether that element is the one
3649 // we're after.
3651 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3652 return DAG.getConstant(V == V1, DL, XLenVT);
3653 });
3654 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3655 DAG.getBuildVector(SelMaskTy, DL, Ops),
3656 DAG.getSplatBuildVector(VT, DL, V), Vec);
3657 }
3658 }
3659
3660 return Vec;
3661 }
3662
3663 return SDValue();
3664}
3665
3667 const RISCVSubtarget &Subtarget) {
3668 MVT VT = Op.getSimpleValueType();
3669 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3670
3671 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3672
3673 SDLoc DL(Op);
3674 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3675
3676 MVT XLenVT = Subtarget.getXLenVT();
3677 unsigned NumElts = Op.getNumOperands();
3678
3679 if (VT.getVectorElementType() == MVT::i1) {
3680 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3681 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3682 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3683 }
3684
3685 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3686 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3687 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3688 }
3689
3690 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3691 // scalar integer chunks whose bit-width depends on the number of mask
3692 // bits and XLEN.
3693 // First, determine the most appropriate scalar integer type to use. This
3694 // is at most XLenVT, but may be shrunk to a smaller vector element type
3695 // according to the size of the final vector - use i8 chunks rather than
3696 // XLenVT if we're producing a v8i1. This results in more consistent
3697 // codegen across RV32 and RV64.
3698 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3699 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3700 // If we have to use more than one INSERT_VECTOR_ELT then this
3701 // optimization is likely to increase code size; avoid peforming it in
3702 // such a case. We can use a load from a constant pool in this case.
3703 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3704 return SDValue();
3705 // Now we can create our integer vector type. Note that it may be larger
3706 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3707 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3708 MVT IntegerViaVecVT =
3709 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3710 IntegerViaVecElts);
3711
3712 uint64_t Bits = 0;
3713 unsigned BitPos = 0, IntegerEltIdx = 0;
3714 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3715
3716 for (unsigned I = 0; I < NumElts;) {
3717 SDValue V = Op.getOperand(I);
3718 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3719 Bits |= ((uint64_t)BitValue << BitPos);
3720 ++BitPos;
3721 ++I;
3722
3723 // Once we accumulate enough bits to fill our scalar type or process the
3724 // last element, insert into our vector and clear our accumulated data.
3725 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3726 if (NumViaIntegerBits <= 32)
3727 Bits = SignExtend64<32>(Bits);
3728 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3729 Elts[IntegerEltIdx] = Elt;
3730 Bits = 0;
3731 BitPos = 0;
3732 IntegerEltIdx++;
3733 }
3734 }
3735
3736 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3737
3738 if (NumElts < NumViaIntegerBits) {
3739 // If we're producing a smaller vector than our minimum legal integer
3740 // type, bitcast to the equivalent (known-legal) mask type, and extract
3741 // our final mask.
3742 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3743 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3744 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3745 DAG.getConstant(0, DL, XLenVT));
3746 } else {
3747 // Else we must have produced an integer type with the same size as the
3748 // mask type; bitcast for the final result.
3749 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3750 Vec = DAG.getBitcast(VT, Vec);
3751 }
3752
3753 return Vec;
3754 }
3755
3756 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3757 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3759 if (!VT.isFloatingPoint())
3760 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3761 Splat =
3762 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3763 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3764 }
3765
3766 // Try and match index sequences, which we can lower to the vid instruction
3767 // with optional modifications. An all-undef vector is matched by
3768 // getSplatValue, above.
3769 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3770 int64_t StepNumerator = SimpleVID->StepNumerator;
3771 unsigned StepDenominator = SimpleVID->StepDenominator;
3772 int64_t Addend = SimpleVID->Addend;
3773
3774 assert(StepNumerator != 0 && "Invalid step");
3775 bool Negate = false;
3776 int64_t SplatStepVal = StepNumerator;
3777 unsigned StepOpcode = ISD::MUL;
3778 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3779 // anyway as the shift of 63 won't fit in uimm5.
3780 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3781 isPowerOf2_64(std::abs(StepNumerator))) {
3782 Negate = StepNumerator < 0;
3783 StepOpcode = ISD::SHL;
3784 SplatStepVal = Log2_64(std::abs(StepNumerator));
3785 }
3786
3787 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3788 // threshold since it's the immediate value many RVV instructions accept.
3789 // There is no vmul.vi instruction so ensure multiply constant can fit in
3790 // a single addi instruction.
3791 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3792 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3793 isPowerOf2_32(StepDenominator) &&
3794 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3795 MVT VIDVT =
3797 MVT VIDContainerVT =
3798 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3799 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3800 // Convert right out of the scalable type so we can use standard ISD
3801 // nodes for the rest of the computation. If we used scalable types with
3802 // these, we'd lose the fixed-length vector info and generate worse
3803 // vsetvli code.
3804 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3805 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3806 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3807 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3808 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3809 }
3810 if (StepDenominator != 1) {
3811 SDValue SplatStep =
3812 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3813 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3814 }
3815 if (Addend != 0 || Negate) {
3816 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3817 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3818 VID);
3819 }
3820 if (VT.isFloatingPoint()) {
3821 // TODO: Use vfwcvt to reduce register pressure.
3822 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3823 }
3824 return VID;
3825 }
3826 }
3827
3828 // For very small build_vectors, use a single scalar insert of a constant.
3829 // TODO: Base this on constant rematerialization cost, not size.
3830 const unsigned EltBitSize = VT.getScalarSizeInBits();
3831 if (VT.getSizeInBits() <= 32 &&
3833 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3834 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3835 "Unexpected sequence type");
3836 // If we can use the original VL with the modified element type, this
3837 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3838 // be moved into InsertVSETVLI?
3839 unsigned ViaVecLen =
3840 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3841 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3842
3843 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3844 uint64_t SplatValue = 0;
3845 // Construct the amalgamated value at this larger vector type.
3846 for (const auto &OpIdx : enumerate(Op->op_values())) {
3847 const auto &SeqV = OpIdx.value();
3848 if (!SeqV.isUndef())
3849 SplatValue |=
3850 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3851 }
3852
3853 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3854 // achieve better constant materializion.
3855 // On RV32, we need to sign-extend to use getSignedConstant.
3856 if (ViaIntVT == MVT::i32)
3857 SplatValue = SignExtend64<32>(SplatValue);
3858
3859 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3860 DAG.getUNDEF(ViaVecVT),
3861 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3862 DAG.getVectorIdxConstant(0, DL));
3863 if (ViaVecLen != 1)
3865 MVT::getVectorVT(ViaIntVT, 1), Vec,
3866 DAG.getConstant(0, DL, XLenVT));
3867 return DAG.getBitcast(VT, Vec);
3868 }
3869
3870
3871 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3872 // when re-interpreted as a vector with a larger element type. For example,
3873 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3874 // could be instead splat as
3875 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3876 // TODO: This optimization could also work on non-constant splats, but it
3877 // would require bit-manipulation instructions to construct the splat value.
3878 SmallVector<SDValue> Sequence;
3879 const auto *BV = cast<BuildVectorSDNode>(Op);
3880 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3882 BV->getRepeatedSequence(Sequence) &&
3883 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3884 unsigned SeqLen = Sequence.size();
3885 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3886 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3887 ViaIntVT == MVT::i64) &&
3888 "Unexpected sequence type");
3889
3890 // If we can use the original VL with the modified element type, this
3891 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3892 // be moved into InsertVSETVLI?
3893 const unsigned RequiredVL = NumElts / SeqLen;
3894 const unsigned ViaVecLen =
3895 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3896 NumElts : RequiredVL;
3897 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3898
3899 unsigned EltIdx = 0;
3900 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3901 uint64_t SplatValue = 0;
3902 // Construct the amalgamated value which can be splatted as this larger
3903 // vector type.
3904 for (const auto &SeqV : Sequence) {
3905 if (!SeqV.isUndef())
3906 SplatValue |=
3907 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3908 EltIdx++;
3909 }
3910
3911 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3912 // achieve better constant materializion.
3913 // On RV32, we need to sign-extend to use getSignedConstant.
3914 if (ViaIntVT == MVT::i32)
3915 SplatValue = SignExtend64<32>(SplatValue);
3916
3917 // Since we can't introduce illegal i64 types at this stage, we can only
3918 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3919 // way we can use RVV instructions to splat.
3920 assert((ViaIntVT.bitsLE(XLenVT) ||
3921 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3922 "Unexpected bitcast sequence");
3923 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3924 SDValue ViaVL =
3925 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3926 MVT ViaContainerVT =
3927 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3928 SDValue Splat =
3929 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3930 DAG.getUNDEF(ViaContainerVT),
3931 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3932 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3933 if (ViaVecLen != RequiredVL)
3935 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3936 DAG.getConstant(0, DL, XLenVT));
3937 return DAG.getBitcast(VT, Splat);
3938 }
3939 }
3940
3941 // If the number of signbits allows, see if we can lower as a <N x i8>.
3942 // Our main goal here is to reduce LMUL (and thus work) required to
3943 // build the constant, but we will also narrow if the resulting
3944 // narrow vector is known to materialize cheaply.
3945 // TODO: We really should be costing the smaller vector. There are
3946 // profitable cases this misses.
3947 if (EltBitSize > 8 && VT.isInteger() &&
3948 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3949 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3950 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3951 DL, Op->ops());
3952 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3953 Source, DAG, Subtarget);
3954 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3955 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3956 }
3957
3958 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3959 return Res;
3960
3961 // For constant vectors, use generic constant pool lowering. Otherwise,
3962 // we'd have to materialize constants in GPRs just to move them into the
3963 // vector.
3964 return SDValue();
3965}
3966
3967static unsigned getPACKOpcode(unsigned DestBW,
3968 const RISCVSubtarget &Subtarget) {
3969 switch (DestBW) {
3970 default:
3971 llvm_unreachable("Unsupported pack size");
3972 case 16:
3973 return RISCV::PACKH;
3974 case 32:
3975 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3976 case 64:
3977 assert(Subtarget.is64Bit());
3978 return RISCV::PACK;
3979 }
3980}
3981
3982/// Double the element size of the build vector to reduce the number
3983/// of vslide1down in the build vector chain. In the worst case, this
3984/// trades three scalar operations for 1 vector operation. Scalar
3985/// operations are generally lower latency, and for out-of-order cores
3986/// we also benefit from additional parallelism.
3988 const RISCVSubtarget &Subtarget) {
3989 SDLoc DL(Op);
3990 MVT VT = Op.getSimpleValueType();
3991 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3992 MVT ElemVT = VT.getVectorElementType();
3993 if (!ElemVT.isInteger())
3994 return SDValue();
3995
3996 // TODO: Relax these architectural restrictions, possibly with costing
3997 // of the actual instructions required.
3998 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
3999 return SDValue();
4000
4001 unsigned NumElts = VT.getVectorNumElements();
4002 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4003 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4004 NumElts % 2 != 0)
4005 return SDValue();
4006
4007 // Produce [B,A] packed into a type twice as wide. Note that all
4008 // scalars are XLenVT, possibly masked (see below).
4009 MVT XLenVT = Subtarget.getXLenVT();
4010 SDValue Mask = DAG.getConstant(
4011 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4012 auto pack = [&](SDValue A, SDValue B) {
4013 // Bias the scheduling of the inserted operations to near the
4014 // definition of the element - this tends to reduce register
4015 // pressure overall.
4016 SDLoc ElemDL(B);
4017 if (Subtarget.hasStdExtZbkb())
4018 // Note that we're relying on the high bits of the result being
4019 // don't care. For PACKW, the result is *sign* extended.
4020 return SDValue(
4021 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4022 ElemDL, XLenVT, A, B),
4023 0);
4024
4025 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4026 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4027 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4028 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4029 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4031 };
4032
4033 SmallVector<SDValue> NewOperands;
4034 NewOperands.reserve(NumElts / 2);
4035 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4036 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4037 assert(NumElts == NewOperands.size() * 2);
4038 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4039 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4040 return DAG.getNode(ISD::BITCAST, DL, VT,
4041 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4042}
4043
4045 const RISCVSubtarget &Subtarget) {
4046 MVT VT = Op.getSimpleValueType();
4047 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4048
4049 MVT EltVT = VT.getVectorElementType();
4050 MVT XLenVT = Subtarget.getXLenVT();
4051
4052 SDLoc DL(Op);
4053
4054 // Proper support for f16 requires Zvfh. bf16 always requires special
4055 // handling. We need to cast the scalar to integer and create an integer
4056 // build_vector.
4057 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4058 MVT IVT = VT.changeVectorElementType(MVT::i16);
4060 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4061 SDValue Elem = Op.getOperand(I);
4062 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4063 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4064 // Called by LegalizeDAG, we need to use XLenVT operations since we
4065 // can't create illegal types.
4066 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4067 // Manually constant fold so the integer build_vector can be lowered
4068 // better. Waiting for DAGCombine will be too late.
4069 APInt V =
4070 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4071 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4072 } else {
4073 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4074 }
4075 } else {
4076 // Called by scalar type legalizer, we can use i16.
4077 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4078 }
4079 }
4080 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4081 return DAG.getBitcast(VT, Res);
4082 }
4083
4084 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4086 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4087
4088 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4089
4090 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4091
4092 if (VT.getVectorElementType() == MVT::i1) {
4093 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4094 // vector type, we have a legal equivalently-sized i8 type, so we can use
4095 // that.
4096 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4097 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4098
4099 SDValue WideVec;
4100 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4101 // For a splat, perform a scalar truncate before creating the wider
4102 // vector.
4103 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4104 DAG.getConstant(1, DL, Splat.getValueType()));
4105 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4106 } else {
4107 SmallVector<SDValue, 8> Ops(Op->op_values());
4108 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4109 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4110 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4111 }
4112
4113 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4114 }
4115
4116 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4117 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4118 return Gather;
4119 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4121 if (!VT.isFloatingPoint())
4122 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4123 Splat =
4124 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4125 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4126 }
4127
4128 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4129 return Res;
4130
4131 // If we're compiling for an exact VLEN value, we can split our work per
4132 // register in the register group.
4133 if (const auto VLen = Subtarget.getRealVLen();
4134 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4135 MVT ElemVT = VT.getVectorElementType();
4136 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4137 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4138 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4139 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4140 assert(M1VT == getLMUL1VT(M1VT));
4141
4142 // The following semantically builds up a fixed length concat_vector
4143 // of the component build_vectors. We eagerly lower to scalable and
4144 // insert_subvector here to avoid DAG combining it back to a large
4145 // build_vector.
4146 SmallVector<SDValue> BuildVectorOps(Op->ops());
4147 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4148 SDValue Vec = DAG.getUNDEF(ContainerVT);
4149 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4150 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4151 SDValue SubBV =
4152 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4153 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4154 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4155 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4156 DAG.getVectorIdxConstant(InsertIdx, DL));
4157 }
4158 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4159 }
4160