LLVM 20.0.0git
RISCVISelLowering.cpp
Go to the documentation of this file.
1//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCVISelLowering.h"
16#include "RISCV.h"
19#include "RISCVRegisterInfo.h"
21#include "RISCVSubtarget.h"
22#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
43#include "llvm/Support/Debug.h"
49#include <optional>
50
51using namespace llvm;
52
53#define DEBUG_TYPE "riscv-lower"
54
55STATISTIC(NumTailCalls, "Number of tail calls");
56
58 DEBUG_TYPE "-ext-max-web-size", cl::Hidden,
59 cl::desc("Give the maximum size (in number of nodes) of the web of "
60 "instructions that we will consider for VW expansion"),
61 cl::init(18));
62
63static cl::opt<bool>
64 AllowSplatInVW_W(DEBUG_TYPE "-form-vw-w-with-splat", cl::Hidden,
65 cl::desc("Allow the formation of VW_W operations (e.g., "
66 "VWADD_W) with splat constants"),
67 cl::init(false));
68
70 DEBUG_TYPE "-fp-repeated-divisors", cl::Hidden,
71 cl::desc("Set the minimum number of repetitions of a divisor to allow "
72 "transformation to multiplications by the reciprocal"),
73 cl::init(2));
74
75static cl::opt<int>
77 cl::desc("Give the maximum number of instructions that we will "
78 "use for creating a floating-point immediate value"),
79 cl::init(2));
80
82 const RISCVSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84
85 RISCVABI::ABI ABI = Subtarget.getTargetABI();
86 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
87
88 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
89 !Subtarget.hasStdExtF()) {
90 errs() << "Hard-float 'f' ABI can't be used for a target that "
91 "doesn't support the F instruction set extension (ignoring "
92 "target-abi)\n";
94 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
95 !Subtarget.hasStdExtD()) {
96 errs() << "Hard-float 'd' ABI can't be used for a target that "
97 "doesn't support the D instruction set extension (ignoring "
98 "target-abi)\n";
100 }
101
102 switch (ABI) {
103 default:
104 report_fatal_error("Don't know how to lower this ABI");
113 break;
114 }
115
116 MVT XLenVT = Subtarget.getXLenVT();
117
118 // Set up the register classes.
119 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
120
121 if (Subtarget.hasStdExtZfhmin())
122 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
123 if (Subtarget.hasStdExtZfbfmin())
124 addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
125 if (Subtarget.hasStdExtF())
126 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
127 if (Subtarget.hasStdExtD())
128 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
129 if (Subtarget.hasStdExtZhinxmin())
130 addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
131 if (Subtarget.hasStdExtZfinx())
132 addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
133 if (Subtarget.hasStdExtZdinx()) {
134 if (Subtarget.is64Bit())
135 addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
136 else
137 addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
138 }
139
140 static const MVT::SimpleValueType BoolVecVTs[] = {
141 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
142 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
143 static const MVT::SimpleValueType IntVecVTs[] = {
144 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
145 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
146 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
147 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
148 MVT::nxv4i64, MVT::nxv8i64};
149 static const MVT::SimpleValueType F16VecVTs[] = {
150 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
151 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
152 static const MVT::SimpleValueType BF16VecVTs[] = {
153 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
154 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
155 static const MVT::SimpleValueType F32VecVTs[] = {
156 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
157 static const MVT::SimpleValueType F64VecVTs[] = {
158 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
159 static const MVT::SimpleValueType VecTupleVTs[] = {
160 MVT::riscv_nxv1i8x2, MVT::riscv_nxv1i8x3, MVT::riscv_nxv1i8x4,
161 MVT::riscv_nxv1i8x5, MVT::riscv_nxv1i8x6, MVT::riscv_nxv1i8x7,
162 MVT::riscv_nxv1i8x8, MVT::riscv_nxv2i8x2, MVT::riscv_nxv2i8x3,
163 MVT::riscv_nxv2i8x4, MVT::riscv_nxv2i8x5, MVT::riscv_nxv2i8x6,
164 MVT::riscv_nxv2i8x7, MVT::riscv_nxv2i8x8, MVT::riscv_nxv4i8x2,
165 MVT::riscv_nxv4i8x3, MVT::riscv_nxv4i8x4, MVT::riscv_nxv4i8x5,
166 MVT::riscv_nxv4i8x6, MVT::riscv_nxv4i8x7, MVT::riscv_nxv4i8x8,
167 MVT::riscv_nxv8i8x2, MVT::riscv_nxv8i8x3, MVT::riscv_nxv8i8x4,
168 MVT::riscv_nxv8i8x5, MVT::riscv_nxv8i8x6, MVT::riscv_nxv8i8x7,
169 MVT::riscv_nxv8i8x8, MVT::riscv_nxv16i8x2, MVT::riscv_nxv16i8x3,
170 MVT::riscv_nxv16i8x4, MVT::riscv_nxv32i8x2};
171
172 if (Subtarget.hasVInstructions()) {
173 auto addRegClassForRVV = [this](MVT VT) {
174 // Disable the smallest fractional LMUL types if ELEN is less than
175 // RVVBitsPerBlock.
176 unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELen();
177 if (VT.getVectorMinNumElements() < MinElts)
178 return;
179
180 unsigned Size = VT.getSizeInBits().getKnownMinValue();
181 const TargetRegisterClass *RC;
183 RC = &RISCV::VRRegClass;
184 else if (Size == 2 * RISCV::RVVBitsPerBlock)
185 RC = &RISCV::VRM2RegClass;
186 else if (Size == 4 * RISCV::RVVBitsPerBlock)
187 RC = &RISCV::VRM4RegClass;
188 else if (Size == 8 * RISCV::RVVBitsPerBlock)
189 RC = &RISCV::VRM8RegClass;
190 else
191 llvm_unreachable("Unexpected size");
192
193 addRegisterClass(VT, RC);
194 };
195
196 for (MVT VT : BoolVecVTs)
197 addRegClassForRVV(VT);
198 for (MVT VT : IntVecVTs) {
199 if (VT.getVectorElementType() == MVT::i64 &&
200 !Subtarget.hasVInstructionsI64())
201 continue;
202 addRegClassForRVV(VT);
203 }
204
205 if (Subtarget.hasVInstructionsF16Minimal())
206 for (MVT VT : F16VecVTs)
207 addRegClassForRVV(VT);
208
209 if (Subtarget.hasVInstructionsBF16Minimal())
210 for (MVT VT : BF16VecVTs)
211 addRegClassForRVV(VT);
212
213 if (Subtarget.hasVInstructionsF32())
214 for (MVT VT : F32VecVTs)
215 addRegClassForRVV(VT);
216
217 if (Subtarget.hasVInstructionsF64())
218 for (MVT VT : F64VecVTs)
219 addRegClassForRVV(VT);
220
221 if (Subtarget.useRVVForFixedLengthVectors()) {
222 auto addRegClassForFixedVectors = [this](MVT VT) {
223 MVT ContainerVT = getContainerForFixedLengthVector(VT);
224 unsigned RCID = getRegClassIDForVecVT(ContainerVT);
225 const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
226 addRegisterClass(VT, TRI.getRegClass(RCID));
227 };
229 if (useRVVForFixedLengthVectorVT(VT))
230 addRegClassForFixedVectors(VT);
231
233 if (useRVVForFixedLengthVectorVT(VT))
234 addRegClassForFixedVectors(VT);
235 }
236
237 addRegisterClass(MVT::riscv_nxv1i8x2, &RISCV::VRN2M1RegClass);
238 addRegisterClass(MVT::riscv_nxv1i8x3, &RISCV::VRN3M1RegClass);
239 addRegisterClass(MVT::riscv_nxv1i8x4, &RISCV::VRN4M1RegClass);
240 addRegisterClass(MVT::riscv_nxv1i8x5, &RISCV::VRN5M1RegClass);
241 addRegisterClass(MVT::riscv_nxv1i8x6, &RISCV::VRN6M1RegClass);
242 addRegisterClass(MVT::riscv_nxv1i8x7, &RISCV::VRN7M1RegClass);
243 addRegisterClass(MVT::riscv_nxv1i8x8, &RISCV::VRN8M1RegClass);
244 addRegisterClass(MVT::riscv_nxv2i8x2, &RISCV::VRN2M1RegClass);
245 addRegisterClass(MVT::riscv_nxv2i8x3, &RISCV::VRN3M1RegClass);
246 addRegisterClass(MVT::riscv_nxv2i8x4, &RISCV::VRN4M1RegClass);
247 addRegisterClass(MVT::riscv_nxv2i8x5, &RISCV::VRN5M1RegClass);
248 addRegisterClass(MVT::riscv_nxv2i8x6, &RISCV::VRN6M1RegClass);
249 addRegisterClass(MVT::riscv_nxv2i8x7, &RISCV::VRN7M1RegClass);
250 addRegisterClass(MVT::riscv_nxv2i8x8, &RISCV::VRN8M1RegClass);
251 addRegisterClass(MVT::riscv_nxv4i8x2, &RISCV::VRN2M1RegClass);
252 addRegisterClass(MVT::riscv_nxv4i8x3, &RISCV::VRN3M1RegClass);
253 addRegisterClass(MVT::riscv_nxv4i8x4, &RISCV::VRN4M1RegClass);
254 addRegisterClass(MVT::riscv_nxv4i8x5, &RISCV::VRN5M1RegClass);
255 addRegisterClass(MVT::riscv_nxv4i8x6, &RISCV::VRN6M1RegClass);
256 addRegisterClass(MVT::riscv_nxv4i8x7, &RISCV::VRN7M1RegClass);
257 addRegisterClass(MVT::riscv_nxv4i8x8, &RISCV::VRN8M1RegClass);
258 addRegisterClass(MVT::riscv_nxv8i8x2, &RISCV::VRN2M1RegClass);
259 addRegisterClass(MVT::riscv_nxv8i8x3, &RISCV::VRN3M1RegClass);
260 addRegisterClass(MVT::riscv_nxv8i8x4, &RISCV::VRN4M1RegClass);
261 addRegisterClass(MVT::riscv_nxv8i8x5, &RISCV::VRN5M1RegClass);
262 addRegisterClass(MVT::riscv_nxv8i8x6, &RISCV::VRN6M1RegClass);
263 addRegisterClass(MVT::riscv_nxv8i8x7, &RISCV::VRN7M1RegClass);
264 addRegisterClass(MVT::riscv_nxv8i8x8, &RISCV::VRN8M1RegClass);
265 addRegisterClass(MVT::riscv_nxv16i8x2, &RISCV::VRN2M2RegClass);
266 addRegisterClass(MVT::riscv_nxv16i8x3, &RISCV::VRN3M2RegClass);
267 addRegisterClass(MVT::riscv_nxv16i8x4, &RISCV::VRN4M2RegClass);
268 addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
269 }
270
271 // Compute derived properties from the register classes.
273
275
277 MVT::i1, Promote);
278 // DAGCombiner can call isLoadExtLegal for types that aren't legal.
280 MVT::i1, Promote);
281
282 // TODO: add all necessary setOperationAction calls.
284
289
294 if (!(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
297 }
298
300
303
304 if (!Subtarget.hasVendorXTHeadBb())
306
308
309 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb() &&
310 !(Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()))
311 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
312
313 if (Subtarget.is64Bit()) {
315
318 MVT::i32, Custom);
320 if (!Subtarget.hasStdExtZbb())
323 Custom);
325 }
326 if (!Subtarget.hasStdExtZmmul()) {
328 } else if (Subtarget.is64Bit()) {
331 } else {
333 }
334
335 if (!Subtarget.hasStdExtM()) {
337 Expand);
338 } else if (Subtarget.is64Bit()) {
340 {MVT::i8, MVT::i16, MVT::i32}, Custom);
341 }
342
345 Expand);
346
348 Custom);
349
350 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
351 if (Subtarget.is64Bit())
353 } else if (Subtarget.hasVendorXTHeadBb()) {
354 if (Subtarget.is64Bit())
357 } else if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
359 } else {
361 }
362
363 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
364 // pattern match it directly in isel.
366 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
367 Subtarget.hasVendorXTHeadBb())
368 ? Legal
369 : Expand);
370
371 if (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit()) {
373 } else {
374 // Zbkb can use rev8+brev8 to implement bitreverse.
376 Subtarget.hasStdExtZbkb() ? Custom : Expand);
377 }
378
379 if (Subtarget.hasStdExtZbb() ||
380 (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) {
382 Legal);
383 }
384
385 if (Subtarget.hasStdExtZbb() ||
386 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
387 if (Subtarget.is64Bit())
389 } else {
391 }
392
393 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
394 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
395 // We need the custom lowering to make sure that the resulting sequence
396 // for the 32bit case is efficient on 64bit targets.
397 if (Subtarget.is64Bit())
399 } else {
401 }
402
403 if (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit()) {
405 } else if (Subtarget.hasShortForwardBranchOpt()) {
406 // We can use PseudoCCSUB to implement ABS.
408 } else if (Subtarget.is64Bit()) {
410 }
411
412 if (!Subtarget.hasVendorXTHeadCondMov())
414
415 static const unsigned FPLegalNodeTypes[] = {
423
424 static const ISD::CondCode FPCCToExpand[] = {
428
429 static const unsigned FPOpToExpand[] = {
431 ISD::FREM};
432
433 static const unsigned FPRndMode[] = {
436
437 static const unsigned ZfhminZfbfminPromoteOps[] = {
447
448 if (Subtarget.hasStdExtZfbfmin()) {
454 setOperationAction(ZfhminZfbfminPromoteOps, MVT::bf16, Promote);
461 }
462
463 if (Subtarget.hasStdExtZfhminOrZhinxmin()) {
464 if (Subtarget.hasStdExtZfhOrZhinx()) {
465 setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
466 setOperationAction(FPRndMode, MVT::f16,
467 Subtarget.hasStdExtZfa() ? Legal : Custom);
470 Subtarget.hasStdExtZfa() ? Legal : Custom);
471 if (Subtarget.hasStdExtZfa())
473 } else {
474 setOperationAction(ZfhminZfbfminPromoteOps, MVT::f16, Promote);
479 setOperationAction(Op, MVT::f16, Custom);
485 }
486
488
491 setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
495
497 ISD::FNEARBYINT, MVT::f16,
498 Subtarget.hasStdExtZfh() && Subtarget.hasStdExtZfa() ? Legal : Promote);
503 MVT::f16, Promote);
504
505 // FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
506 // complete support for all operations in LegalizeDAG.
511 MVT::f16, Promote);
512
513 // We need to custom promote this.
514 if (Subtarget.is64Bit())
516 }
517
518 if (Subtarget.hasStdExtFOrZfinx()) {
519 setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
520 setOperationAction(FPRndMode, MVT::f32,
521 Subtarget.hasStdExtZfa() ? Legal : Custom);
522 setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
526 setOperationAction(FPOpToExpand, MVT::f32, Expand);
527 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
528 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
529 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
530 setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
534 Subtarget.isSoftFPABI() ? LibCall : Custom);
539
540 if (Subtarget.hasStdExtZfa()) {
544 } else {
546 }
547 }
548
549 if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
551
552 if (Subtarget.hasStdExtDOrZdinx()) {
553 setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
554
555 if (!Subtarget.is64Bit())
557
558 if (Subtarget.hasStdExtZfa()) {
560 setOperationAction(FPRndMode, MVT::f64, Legal);
563 } else {
564 if (Subtarget.is64Bit())
565 setOperationAction(FPRndMode, MVT::f64, Custom);
566
568 }
569
572 setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
576 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
577 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
578 setOperationAction(FPOpToExpand, MVT::f64, Expand);
579 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
580 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
581 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
582 setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
586 Subtarget.isSoftFPABI() ? LibCall : Custom);
591 }
592
593 if (Subtarget.is64Bit()) {
596 MVT::i32, Custom);
598 }
599
600 if (Subtarget.hasStdExtFOrZfinx()) {
602 Custom);
603
604 // f16/bf16 require custom handling.
606 Custom);
608 Custom);
609
612 }
613
616 XLenVT, Custom);
617
619
620 if (Subtarget.is64Bit())
622
623 // TODO: On M-mode only targets, the cycle[h]/time[h] CSR may not be present.
624 // Unfortunately this can't be determined just from the ISA naming string.
626 Subtarget.is64Bit() ? Legal : Custom);
628 Subtarget.is64Bit() ? Legal : Custom);
629
630 if (Subtarget.is64Bit()) {
633 }
634
637 if (Subtarget.is64Bit())
639
640 if (Subtarget.hasStdExtZicbop()) {
642 }
643
644 if (Subtarget.hasStdExtA()) {
646 if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
648 else
650 } else if (Subtarget.hasForcedAtomics()) {
652 } else {
654 }
655
657
659
660 if (getTargetMachine().getTargetTriple().isOSLinux()) {
661 // Custom lowering of llvm.clear_cache.
663 }
664
665 if (Subtarget.hasVInstructions()) {
667
669
670 // RVV intrinsics may have illegal operands.
671 // We also need to custom legalize vmv.x.s.
674 {MVT::i8, MVT::i16}, Custom);
675 if (Subtarget.is64Bit())
677 MVT::i32, Custom);
678 else
680 MVT::i64, Custom);
681
683 MVT::Other, Custom);
684
685 static const unsigned IntegerVPOps[] = {
686 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
687 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
688 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
689 ISD::VP_XOR, ISD::VP_SRA, ISD::VP_SRL,
690 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
691 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
692 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
693 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
694 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
695 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
696 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
697 ISD::VP_ABS, ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE,
698 ISD::VP_SADDSAT, ISD::VP_UADDSAT, ISD::VP_SSUBSAT,
699 ISD::VP_USUBSAT, ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF,
700 ISD::EXPERIMENTAL_VP_SPLAT};
701
702 static const unsigned FloatingPointVPOps[] = {
703 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
704 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
705 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
706 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
707 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
708 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
709 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
710 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
711 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
712 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
713 ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::VP_LRINT,
714 ISD::VP_LLRINT, ISD::EXPERIMENTAL_VP_REVERSE,
715 ISD::EXPERIMENTAL_VP_SPLICE, ISD::VP_REDUCE_FMINIMUM,
716 ISD::VP_REDUCE_FMAXIMUM, ISD::EXPERIMENTAL_VP_SPLAT};
717
718 static const unsigned IntegerVecReduceOps[] = {
722
723 static const unsigned FloatingPointVecReduceOps[] = {
726
727 static const unsigned FloatingPointLibCallOps[] = {
730
731 if (!Subtarget.is64Bit()) {
732 // We must custom-lower certain vXi64 operations on RV32 due to the vector
733 // element type being illegal.
735 MVT::i64, Custom);
736
737 setOperationAction(IntegerVecReduceOps, MVT::i64, Custom);
738
739 setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
740 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
741 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
742 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
743 MVT::i64, Custom);
744 }
745
746 for (MVT VT : BoolVecVTs) {
747 if (!isTypeLegal(VT))
748 continue;
749
751
752 // Mask VTs are custom-expanded into a series of standard nodes
756 VT, Custom);
757
759 Custom);
760
762 setOperationAction({ISD::SELECT_CC, ISD::VSELECT, ISD::VP_SELECT}, VT,
763 Expand);
764 setOperationAction(ISD::VP_MERGE, VT, Custom);
765
766 setOperationAction({ISD::VP_CTTZ_ELTS, ISD::VP_CTTZ_ELTS_ZERO_UNDEF}, VT,
767 Custom);
768
769 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
770
773 Custom);
774
776 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
777 Custom);
778
779 // RVV has native int->float & float->int conversions where the
780 // element type sizes are within one power-of-two of each other. Any
781 // wider distances between type sizes have to be lowered as sequences
782 // which progressively narrow the gap in stages.
787 VT, Custom);
789 Custom);
790
791 // Expand all extending loads to types larger than this, and truncating
792 // stores from types larger than this.
794 setTruncStoreAction(VT, OtherVT, Expand);
796 OtherVT, Expand);
797 }
798
799 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
800 ISD::VP_TRUNCATE, ISD::VP_SETCC},
801 VT, Custom);
802
805
807
808 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
809 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
810
813 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount()));
814 }
815
816 for (MVT VT : IntVecVTs) {
817 if (!isTypeLegal(VT))
818 continue;
819
822
823 // Vectors implement MULHS/MULHU.
825
826 // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
827 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
829
831 Legal);
832
834
835 // Custom-lower extensions and truncations from/to mask types.
837 VT, Custom);
838
839 // RVV has native int->float & float->int conversions where the
840 // element type sizes are within one power-of-two of each other. Any
841 // wider distances between type sizes have to be lowered as sequences
842 // which progressively narrow the gap in stages.
847 VT, Custom);
849 Custom);
853 VT, Legal);
854
855 // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
856 // nodes which truncate by one power of two at a time.
859 Custom);
860
861 // Custom-lower insert/extract operations to simplify patterns.
863 Custom);
864
865 // Custom-lower reduction operations to set up the corresponding custom
866 // nodes' operands.
867 setOperationAction(IntegerVecReduceOps, VT, Custom);
868
869 setOperationAction(IntegerVPOps, VT, Custom);
870
872
874 VT, Custom);
875
877 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
878 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
879 VT, Custom);
880
883 VT, Custom);
884
887
889
891 setTruncStoreAction(VT, OtherVT, Expand);
893 OtherVT, Expand);
894 }
895
898
899 // Splice
901
902 if (Subtarget.hasStdExtZvkb()) {
904 setOperationAction(ISD::VP_BSWAP, VT, Custom);
905 } else {
906 setOperationAction({ISD::BSWAP, ISD::VP_BSWAP}, VT, Expand);
908 }
909
910 if (Subtarget.hasStdExtZvbb()) {
912 setOperationAction(ISD::VP_BITREVERSE, VT, Custom);
913 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
914 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
915 VT, Custom);
916 } else {
917 setOperationAction({ISD::BITREVERSE, ISD::VP_BITREVERSE}, VT, Expand);
919 setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
920 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
921 VT, Expand);
922
923 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
924 // range of f32.
925 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
926 if (isTypeLegal(FloatVT)) {
928 ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
929 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
930 VT, Custom);
931 }
932 }
933
935 }
936
937 for (MVT VT : VecTupleVTs) {
938 if (!isTypeLegal(VT))
939 continue;
940
942 }
943
944 // Expand various CCs to best match the RVV ISA, which natively supports UNE
945 // but no other unordered comparisons, and supports all ordered comparisons
946 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
947 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
948 // and we pattern-match those back to the "original", swapping operands once
949 // more. This way we catch both operations and both "vf" and "fv" forms with
950 // fewer patterns.
951 static const ISD::CondCode VFPCCToExpand[] = {
955 };
956
957 // TODO: support more ops.
958 static const unsigned ZvfhminZvfbfminPromoteOps[] = {
966
967 // TODO: support more vp ops.
968 static const unsigned ZvfhminZvfbfminPromoteVPOps[] = {
969 ISD::VP_FADD,
970 ISD::VP_FSUB,
971 ISD::VP_FMUL,
972 ISD::VP_FDIV,
973 ISD::VP_FMA,
974 ISD::VP_REDUCE_FMIN,
975 ISD::VP_REDUCE_FMAX,
976 ISD::VP_SQRT,
977 ISD::VP_FMINNUM,
978 ISD::VP_FMAXNUM,
979 ISD::VP_FCEIL,
980 ISD::VP_FFLOOR,
981 ISD::VP_FROUND,
982 ISD::VP_FROUNDEVEN,
983 ISD::VP_FROUNDTOZERO,
984 ISD::VP_FRINT,
985 ISD::VP_FNEARBYINT,
986 ISD::VP_SETCC,
987 ISD::VP_FMINIMUM,
988 ISD::VP_FMAXIMUM,
989 ISD::VP_REDUCE_FMINIMUM,
990 ISD::VP_REDUCE_FMAXIMUM};
991
992 // Sets common operation actions on RVV floating-point vector types.
993 const auto SetCommonVFPActions = [&](MVT VT) {
995 // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
996 // sizes are within one power-of-two of each other. Therefore conversions
997 // between vXf16 and vXf64 must be lowered as sequences which convert via
998 // vXf32.
1001 // Custom-lower insert/extract operations to simplify patterns.
1003 Custom);
1004 // Expand various condition codes (explained above).
1005 setCondCodeAction(VFPCCToExpand, VT, Expand);
1006
1009
1013 VT, Custom);
1014
1015 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1016
1017 // Expand FP operations that need libcalls.
1018 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1019
1021
1023
1025 VT, Custom);
1026
1028 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1029 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
1030 VT, Custom);
1031
1034
1037 VT, Custom);
1038
1041
1043
1044 setOperationAction(FloatingPointVPOps, VT, Custom);
1045
1047 Custom);
1050 VT, Legal);
1055 VT, Custom);
1056
1058 };
1059
1060 // Sets common extload/truncstore actions on RVV floating-point vector
1061 // types.
1062 const auto SetCommonVFPExtLoadTruncStoreActions =
1063 [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
1064 for (auto SmallVT : SmallerVTs) {
1065 setTruncStoreAction(VT, SmallVT, Expand);
1066 setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
1067 }
1068 };
1069
1070 // Sets common actions for f16 and bf16 for when there's only
1071 // zvfhmin/zvfbfmin and we need to promote to f32 for most operations.
1072 const auto SetCommonPromoteToF32Actions = [&](MVT VT) {
1075 Custom);
1076 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1077 setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
1078 Custom);
1080 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
1086 VT, Custom);
1087 MVT EltVT = VT.getVectorElementType();
1088 if (isTypeLegal(EltVT))
1089 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
1091 VT, Custom);
1092 else
1093 setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
1094 EltVT, Custom);
1096 ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
1097 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1098 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1099 ISD::VP_SCATTER},
1100 VT, Custom);
1101
1105
1106 // Expand FP operations that need libcalls.
1107 setOperationAction(FloatingPointLibCallOps, VT, Expand);
1108
1109 // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
1110 if (getLMUL(VT) == RISCVII::VLMUL::LMUL_8) {
1111 setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
1112 setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
1113 } else {
1114 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1115 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1116 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1117 }
1118 };
1119
1120 if (Subtarget.hasVInstructionsF16()) {
1121 for (MVT VT : F16VecVTs) {
1122 if (!isTypeLegal(VT))
1123 continue;
1124 SetCommonVFPActions(VT);
1125 }
1126 } else if (Subtarget.hasVInstructionsF16Minimal()) {
1127 for (MVT VT : F16VecVTs) {
1128 if (!isTypeLegal(VT))
1129 continue;
1130 SetCommonPromoteToF32Actions(VT);
1131 }
1132 }
1133
1134 if (Subtarget.hasVInstructionsBF16Minimal()) {
1135 for (MVT VT : BF16VecVTs) {
1136 if (!isTypeLegal(VT))
1137 continue;
1138 SetCommonPromoteToF32Actions(VT);
1139 }
1140 }
1141
1142 if (Subtarget.hasVInstructionsF32()) {
1143 for (MVT VT : F32VecVTs) {
1144 if (!isTypeLegal(VT))
1145 continue;
1146 SetCommonVFPActions(VT);
1147 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1148 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1149 }
1150 }
1151
1152 if (Subtarget.hasVInstructionsF64()) {
1153 for (MVT VT : F64VecVTs) {
1154 if (!isTypeLegal(VT))
1155 continue;
1156 SetCommonVFPActions(VT);
1157 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
1158 SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
1159 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
1160 }
1161 }
1162
1163 if (Subtarget.useRVVForFixedLengthVectors()) {
1165 if (!useRVVForFixedLengthVectorVT(VT))
1166 continue;
1167
1168 // By default everything must be expanded.
1169 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1172 setTruncStoreAction(VT, OtherVT, Expand);
1174 OtherVT, Expand);
1175 }
1176
1177 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1178 // expansion to a build_vector of 0s.
1180
1181 // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
1183 Custom);
1184
1187 Custom);
1188
1190 VT, Custom);
1191
1193
1195
1197
1199
1202 Custom);
1203
1205
1208 Custom);
1209
1211 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1212 Custom);
1213
1215 {
1224 },
1225 VT, Custom);
1227 Custom);
1228
1230
1231 // Operations below are different for between masks and other vectors.
1232 if (VT.getVectorElementType() == MVT::i1) {
1233 setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
1234 ISD::OR, ISD::XOR},
1235 VT, Custom);
1236
1237 setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
1238 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1239 VT, Custom);
1240
1241 setOperationAction(ISD::VP_MERGE, VT, Custom);
1242
1243 setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
1244 setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
1245 continue;
1246 }
1247
1248 // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
1249 // it before type legalization for i64 vectors on RV32. It will then be
1250 // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
1251 // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
1252 // improvements first.
1253 if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
1256 }
1257
1260
1261 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE,
1262 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1263 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1264 ISD::VP_SCATTER},
1265 VT, Custom);
1266
1270 VT, Custom);
1271
1274
1276
1277 // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
1278 if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
1280
1284 VT, Custom);
1285
1287
1290
1291 // Custom-lower reduction operations to set up the corresponding custom
1292 // nodes' operands.
1296 VT, Custom);
1297
1298 setOperationAction(IntegerVPOps, VT, Custom);
1299
1300 if (Subtarget.hasStdExtZvkb())
1302
1303 if (Subtarget.hasStdExtZvbb()) {
1306 VT, Custom);
1307 } else {
1308 // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
1309 // range of f32.
1310 EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1311 if (isTypeLegal(FloatVT))
1314 Custom);
1315 }
1316
1318 }
1319
1321 // There are no extending loads or truncating stores.
1322 for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
1323 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1324 setTruncStoreAction(VT, InnerVT, Expand);
1325 }
1326
1327 if (!useRVVForFixedLengthVectorVT(VT))
1328 continue;
1329
1330 // By default everything must be expanded.
1331 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1333
1334 // Custom lower fixed vector undefs to scalable vector undefs to avoid
1335 // expansion to a build_vector of 0s.
1337
1342 VT, Custom);
1343
1346 VT, Custom);
1347 setOperationAction({ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER,
1348 ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1349 ISD::EXPERIMENTAL_VP_STRIDED_STORE},
1350 VT, Custom);
1351
1354 Custom);
1355
1356 if (VT.getVectorElementType() == MVT::f16 &&
1357 !Subtarget.hasVInstructionsF16()) {
1359 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1361 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1362 Custom);
1363 setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT,
1364 Custom);
1365 if (Subtarget.hasStdExtZfhmin()) {
1367 } else {
1368 // We need to custom legalize f16 build vectors if Zfhmin isn't
1369 // available.
1371 }
1375 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1376 // Don't promote f16 vector operations to f32 if f32 vector type is
1377 // not legal.
1378 // TODO: could split the f16 vector into two vectors and do promotion.
1379 if (!isTypeLegal(F32VecVT))
1380 continue;
1381 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1382 setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
1383 continue;
1384 }
1385
1386 if (VT.getVectorElementType() == MVT::bf16) {
1388 setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1389 if (Subtarget.hasStdExtZfbfmin()) {
1391 } else {
1392 // We need to custom legalize bf16 build vectors if Zfbfmin isn't
1393 // available.
1395 }
1397 {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
1398 Custom);
1399 MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
1400 // Don't promote f16 vector operations to f32 if f32 vector type is
1401 // not legal.
1402 // TODO: could split the f16 vector into two vectors and do promotion.
1403 if (!isTypeLegal(F32VecVT))
1404 continue;
1405 setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1406 // TODO: Promote VP ops to fp32.
1407 continue;
1408 }
1409
1411 Custom);
1412
1417 VT, Custom);
1418
1421 VT, Custom);
1422
1423 setCondCodeAction(VFPCCToExpand, VT, Expand);
1424
1427
1429
1430 setOperationAction(FloatingPointVecReduceOps, VT, Custom);
1431
1432 setOperationAction(FloatingPointVPOps, VT, Custom);
1433
1440 VT, Custom);
1441 }
1442
1443 // Custom-legalize bitcasts from fixed-length vectors to scalar types.
1444 setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32}, Custom);
1445 if (Subtarget.is64Bit())
1447 if (Subtarget.hasStdExtZfhminOrZhinxmin())
1449 if (Subtarget.hasStdExtZfbfmin())
1451 if (Subtarget.hasStdExtFOrZfinx())
1453 if (Subtarget.hasStdExtDOrZdinx())
1455 }
1456 }
1457
1458 if (Subtarget.hasStdExtA())
1460
1461 if (Subtarget.hasForcedAtomics()) {
1462 // Force __sync libcalls to be emitted for atomic rmw/cas operations.
1468 XLenVT, LibCall);
1469 }
1470
1471 if (Subtarget.hasVendorXTHeadMemIdx()) {
1472 for (unsigned im : {ISD::PRE_INC, ISD::POST_INC}) {
1473 setIndexedLoadAction(im, MVT::i8, Legal);
1474 setIndexedStoreAction(im, MVT::i8, Legal);
1475 setIndexedLoadAction(im, MVT::i16, Legal);
1476 setIndexedStoreAction(im, MVT::i16, Legal);
1477 setIndexedLoadAction(im, MVT::i32, Legal);
1478 setIndexedStoreAction(im, MVT::i32, Legal);
1479
1480 if (Subtarget.is64Bit()) {
1481 setIndexedLoadAction(im, MVT::i64, Legal);
1482 setIndexedStoreAction(im, MVT::i64, Legal);
1483 }
1484 }
1485 }
1486
1487 if (Subtarget.hasVendorXCVmem() && !Subtarget.is64Bit()) {
1491
1495 }
1496
1497 // Function alignments.
1498 const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
1499 setMinFunctionAlignment(FunctionAlignment);
1500 // Set preferred alignments.
1503
1509
1510 if (Subtarget.hasStdExtFOrZfinx())
1512
1513 if (Subtarget.hasStdExtZbb())
1515
1516 if ((Subtarget.hasStdExtZbs() && Subtarget.is64Bit()) ||
1517 Subtarget.hasVInstructions())
1519
1520 if (Subtarget.hasStdExtZbkb())
1522
1523 if (Subtarget.hasStdExtFOrZfinx())
1526 if (Subtarget.hasVInstructions())
1528 ISD::MSCATTER, ISD::VP_GATHER,
1529 ISD::VP_SCATTER, ISD::SRA,
1533 ISD::VP_STORE, ISD::EXPERIMENTAL_VP_REVERSE,
1539 if (Subtarget.hasVendorXTHeadMemPair())
1541 if (Subtarget.useRVVForFixedLengthVectors())
1543
1544 setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1545 setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1546
1547 // Disable strict node mutation.
1548 IsStrictFPEnabled = true;
1549 EnableExtLdPromotion = true;
1550
1551 // Let the subtarget decide if a predictable select is more expensive than the
1552 // corresponding branch. This information is used in CGP/SelectOpt to decide
1553 // when to convert selects into branches.
1554 PredictableSelectIsExpensive = Subtarget.predictableSelectIsExpensive();
1555
1556 MaxStoresPerMemsetOptSize = Subtarget.getMaxStoresPerMemset(/*OptSize=*/true);
1557 MaxStoresPerMemset = Subtarget.getMaxStoresPerMemset(/*OptSize=*/false);
1558
1560 MaxStoresPerMemcpyOptSize = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/true);
1561 MaxStoresPerMemcpy = Subtarget.getMaxStoresPerMemcpy(/*OptSize=*/false);
1562
1564 Subtarget.getMaxStoresPerMemmove(/*OptSize=*/true);
1565 MaxStoresPerMemmove = Subtarget.getMaxStoresPerMemmove(/*OptSize=*/false);
1566
1567 MaxLoadsPerMemcmpOptSize = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/true);
1568 MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
1569}
1570
1572 LLVMContext &Context,
1573 EVT VT) const {
1574 if (!VT.isVector())
1575 return getPointerTy(DL);
1576 if (Subtarget.hasVInstructions() &&
1577 (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
1578 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
1580}
1581
1582MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
1583 return Subtarget.getXLenVT();
1584}
1585
1586// Return false if we can lower get_vector_length to a vsetvli intrinsic.
1587bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
1588 unsigned VF,
1589 bool IsScalable) const {
1590 if (!Subtarget.hasVInstructions())
1591 return true;
1592
1593 if (!IsScalable)
1594 return true;
1595
1596 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
1597 return true;
1598
1599 // Don't allow VF=1 if those types are't legal.
1600 if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELen())
1601 return true;
1602
1603 // VLEN=32 support is incomplete.
1604 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
1605 return true;
1606
1607 // The maximum VF is for the smallest element width with LMUL=8.
1608 // VF must be a power of 2.
1609 unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
1610 return VF > MaxVF || !isPowerOf2_32(VF);
1611}
1612
1614 return !Subtarget.hasVInstructions() ||
1615 VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT);
1616}
1617
1619 const CallInst &I,
1620 MachineFunction &MF,
1621 unsigned Intrinsic) const {
1622 auto &DL = I.getDataLayout();
1623
1624 auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
1625 bool IsUnitStrided, bool UsePtrVal = false) {
1627 // We can't use ptrVal if the intrinsic can access memory before the
1628 // pointer. This means we can't use it for strided or indexed intrinsics.
1629 if (UsePtrVal)
1630 Info.ptrVal = I.getArgOperand(PtrOp);
1631 else
1632 Info.fallbackAddressSpace =
1633 I.getArgOperand(PtrOp)->getType()->getPointerAddressSpace();
1634 Type *MemTy;
1635 if (IsStore) {
1636 // Store value is the first operand.
1637 MemTy = I.getArgOperand(0)->getType();
1638 } else {
1639 // Use return type. If it's segment load, return type is a struct.
1640 MemTy = I.getType();
1641 if (MemTy->isStructTy())
1642 MemTy = MemTy->getStructElementType(0);
1643 }
1644 if (!IsUnitStrided)
1645 MemTy = MemTy->getScalarType();
1646
1647 Info.memVT = getValueType(DL, MemTy);
1648 if (MemTy->isTargetExtTy()) {
1649 // RISC-V vector tuple type's alignment type should be its element type.
1650 if (cast<TargetExtType>(MemTy)->getName() == "riscv.vector.tuple")
1651 MemTy = Type::getIntNTy(
1652 MemTy->getContext(),
1653 1 << cast<ConstantInt>(I.getArgOperand(I.arg_size() - 1))
1654 ->getZExtValue());
1655 Info.align = DL.getABITypeAlign(MemTy);
1656 } else {
1657 Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
1658 }
1660 Info.flags |=
1662 return true;
1663 };
1664
1665 if (I.hasMetadata(LLVMContext::MD_nontemporal))
1667
1669 switch (Intrinsic) {
1670 default:
1671 return false;
1672 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1673 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1674 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1675 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1676 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1677 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1678 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1679 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1680 case Intrinsic::riscv_masked_cmpxchg_i32:
1682 Info.memVT = MVT::i32;
1683 Info.ptrVal = I.getArgOperand(0);
1684 Info.offset = 0;
1685 Info.align = Align(4);
1688 return true;
1689 case Intrinsic::riscv_seg2_load:
1690 case Intrinsic::riscv_seg3_load:
1691 case Intrinsic::riscv_seg4_load:
1692 case Intrinsic::riscv_seg5_load:
1693 case Intrinsic::riscv_seg6_load:
1694 case Intrinsic::riscv_seg7_load:
1695 case Intrinsic::riscv_seg8_load:
1696 return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
1697 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1698 case Intrinsic::riscv_seg2_store:
1699 case Intrinsic::riscv_seg3_store:
1700 case Intrinsic::riscv_seg4_store:
1701 case Intrinsic::riscv_seg5_store:
1702 case Intrinsic::riscv_seg6_store:
1703 case Intrinsic::riscv_seg7_store:
1704 case Intrinsic::riscv_seg8_store:
1705 // Operands are (vec, ..., vec, ptr, vl)
1706 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
1707 /*IsStore*/ true,
1708 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1709 case Intrinsic::riscv_vle:
1710 case Intrinsic::riscv_vle_mask:
1711 case Intrinsic::riscv_vleff:
1712 case Intrinsic::riscv_vleff_mask:
1713 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1714 /*IsStore*/ false,
1715 /*IsUnitStrided*/ true,
1716 /*UsePtrVal*/ true);
1717 case Intrinsic::riscv_vse:
1718 case Intrinsic::riscv_vse_mask:
1719 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1720 /*IsStore*/ true,
1721 /*IsUnitStrided*/ true,
1722 /*UsePtrVal*/ true);
1723 case Intrinsic::riscv_vlse:
1724 case Intrinsic::riscv_vlse_mask:
1725 case Intrinsic::riscv_vloxei:
1726 case Intrinsic::riscv_vloxei_mask:
1727 case Intrinsic::riscv_vluxei:
1728 case Intrinsic::riscv_vluxei_mask:
1729 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1730 /*IsStore*/ false,
1731 /*IsUnitStrided*/ false);
1732 case Intrinsic::riscv_vsse:
1733 case Intrinsic::riscv_vsse_mask:
1734 case Intrinsic::riscv_vsoxei:
1735 case Intrinsic::riscv_vsoxei_mask:
1736 case Intrinsic::riscv_vsuxei:
1737 case Intrinsic::riscv_vsuxei_mask:
1738 return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
1739 /*IsStore*/ true,
1740 /*IsUnitStrided*/ false);
1741 case Intrinsic::riscv_vlseg2:
1742 case Intrinsic::riscv_vlseg3:
1743 case Intrinsic::riscv_vlseg4:
1744 case Intrinsic::riscv_vlseg5:
1745 case Intrinsic::riscv_vlseg6:
1746 case Intrinsic::riscv_vlseg7:
1747 case Intrinsic::riscv_vlseg8:
1748 case Intrinsic::riscv_vlseg2ff:
1749 case Intrinsic::riscv_vlseg3ff:
1750 case Intrinsic::riscv_vlseg4ff:
1751 case Intrinsic::riscv_vlseg5ff:
1752 case Intrinsic::riscv_vlseg6ff:
1753 case Intrinsic::riscv_vlseg7ff:
1754 case Intrinsic::riscv_vlseg8ff:
1755 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1756 /*IsStore*/ false,
1757 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1758 case Intrinsic::riscv_vlseg2_mask:
1759 case Intrinsic::riscv_vlseg3_mask:
1760 case Intrinsic::riscv_vlseg4_mask:
1761 case Intrinsic::riscv_vlseg5_mask:
1762 case Intrinsic::riscv_vlseg6_mask:
1763 case Intrinsic::riscv_vlseg7_mask:
1764 case Intrinsic::riscv_vlseg8_mask:
1765 case Intrinsic::riscv_vlseg2ff_mask:
1766 case Intrinsic::riscv_vlseg3ff_mask:
1767 case Intrinsic::riscv_vlseg4ff_mask:
1768 case Intrinsic::riscv_vlseg5ff_mask:
1769 case Intrinsic::riscv_vlseg6ff_mask:
1770 case Intrinsic::riscv_vlseg7ff_mask:
1771 case Intrinsic::riscv_vlseg8ff_mask:
1772 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1773 /*IsStore*/ false,
1774 /*IsUnitStrided*/ false, /*UsePtrVal*/ true);
1775 case Intrinsic::riscv_vlsseg2:
1776 case Intrinsic::riscv_vlsseg3:
1777 case Intrinsic::riscv_vlsseg4:
1778 case Intrinsic::riscv_vlsseg5:
1779 case Intrinsic::riscv_vlsseg6:
1780 case Intrinsic::riscv_vlsseg7:
1781 case Intrinsic::riscv_vlsseg8:
1782 case Intrinsic::riscv_vloxseg2:
1783 case Intrinsic::riscv_vloxseg3:
1784 case Intrinsic::riscv_vloxseg4:
1785 case Intrinsic::riscv_vloxseg5:
1786 case Intrinsic::riscv_vloxseg6:
1787 case Intrinsic::riscv_vloxseg7:
1788 case Intrinsic::riscv_vloxseg8:
1789 case Intrinsic::riscv_vluxseg2:
1790 case Intrinsic::riscv_vluxseg3:
1791 case Intrinsic::riscv_vluxseg4:
1792 case Intrinsic::riscv_vluxseg5:
1793 case Intrinsic::riscv_vluxseg6:
1794 case Intrinsic::riscv_vluxseg7:
1795 case Intrinsic::riscv_vluxseg8:
1796 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1797 /*IsStore*/ false,
1798 /*IsUnitStrided*/ false);
1799 case Intrinsic::riscv_vlsseg2_mask:
1800 case Intrinsic::riscv_vlsseg3_mask:
1801 case Intrinsic::riscv_vlsseg4_mask:
1802 case Intrinsic::riscv_vlsseg5_mask:
1803 case Intrinsic::riscv_vlsseg6_mask:
1804 case Intrinsic::riscv_vlsseg7_mask:
1805 case Intrinsic::riscv_vlsseg8_mask:
1806 case Intrinsic::riscv_vloxseg2_mask:
1807 case Intrinsic::riscv_vloxseg3_mask:
1808 case Intrinsic::riscv_vloxseg4_mask:
1809 case Intrinsic::riscv_vloxseg5_mask:
1810 case Intrinsic::riscv_vloxseg6_mask:
1811 case Intrinsic::riscv_vloxseg7_mask:
1812 case Intrinsic::riscv_vloxseg8_mask:
1813 case Intrinsic::riscv_vluxseg2_mask:
1814 case Intrinsic::riscv_vluxseg3_mask:
1815 case Intrinsic::riscv_vluxseg4_mask:
1816 case Intrinsic::riscv_vluxseg5_mask:
1817 case Intrinsic::riscv_vluxseg6_mask:
1818 case Intrinsic::riscv_vluxseg7_mask:
1819 case Intrinsic::riscv_vluxseg8_mask:
1820 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 6,
1821 /*IsStore*/ false,
1822 /*IsUnitStrided*/ false);
1823 case Intrinsic::riscv_vsseg2:
1824 case Intrinsic::riscv_vsseg3:
1825 case Intrinsic::riscv_vsseg4:
1826 case Intrinsic::riscv_vsseg5:
1827 case Intrinsic::riscv_vsseg6:
1828 case Intrinsic::riscv_vsseg7:
1829 case Intrinsic::riscv_vsseg8:
1830 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
1831 /*IsStore*/ true,
1832 /*IsUnitStrided*/ false);
1833 case Intrinsic::riscv_vsseg2_mask:
1834 case Intrinsic::riscv_vsseg3_mask:
1835 case Intrinsic::riscv_vsseg4_mask:
1836 case Intrinsic::riscv_vsseg5_mask:
1837 case Intrinsic::riscv_vsseg6_mask:
1838 case Intrinsic::riscv_vsseg7_mask:
1839 case Intrinsic::riscv_vsseg8_mask:
1840 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1841 /*IsStore*/ true,
1842 /*IsUnitStrided*/ false);
1843 case Intrinsic::riscv_vssseg2:
1844 case Intrinsic::riscv_vssseg3:
1845 case Intrinsic::riscv_vssseg4:
1846 case Intrinsic::riscv_vssseg5:
1847 case Intrinsic::riscv_vssseg6:
1848 case Intrinsic::riscv_vssseg7:
1849 case Intrinsic::riscv_vssseg8:
1850 case Intrinsic::riscv_vsoxseg2:
1851 case Intrinsic::riscv_vsoxseg3:
1852 case Intrinsic::riscv_vsoxseg4:
1853 case Intrinsic::riscv_vsoxseg5:
1854 case Intrinsic::riscv_vsoxseg6:
1855 case Intrinsic::riscv_vsoxseg7:
1856 case Intrinsic::riscv_vsoxseg8:
1857 case Intrinsic::riscv_vsuxseg2:
1858 case Intrinsic::riscv_vsuxseg3:
1859 case Intrinsic::riscv_vsuxseg4:
1860 case Intrinsic::riscv_vsuxseg5:
1861 case Intrinsic::riscv_vsuxseg6:
1862 case Intrinsic::riscv_vsuxseg7:
1863 case Intrinsic::riscv_vsuxseg8:
1864 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
1865 /*IsStore*/ true,
1866 /*IsUnitStrided*/ false);
1867 case Intrinsic::riscv_vssseg2_mask:
1868 case Intrinsic::riscv_vssseg3_mask:
1869 case Intrinsic::riscv_vssseg4_mask:
1870 case Intrinsic::riscv_vssseg5_mask:
1871 case Intrinsic::riscv_vssseg6_mask:
1872 case Intrinsic::riscv_vssseg7_mask:
1873 case Intrinsic::riscv_vssseg8_mask:
1874 case Intrinsic::riscv_vsoxseg2_mask:
1875 case Intrinsic::riscv_vsoxseg3_mask:
1876 case Intrinsic::riscv_vsoxseg4_mask:
1877 case Intrinsic::riscv_vsoxseg5_mask:
1878 case Intrinsic::riscv_vsoxseg6_mask:
1879 case Intrinsic::riscv_vsoxseg7_mask:
1880 case Intrinsic::riscv_vsoxseg8_mask:
1881 case Intrinsic::riscv_vsuxseg2_mask:
1882 case Intrinsic::riscv_vsuxseg3_mask:
1883 case Intrinsic::riscv_vsuxseg4_mask:
1884 case Intrinsic::riscv_vsuxseg5_mask:
1885 case Intrinsic::riscv_vsuxseg6_mask:
1886 case Intrinsic::riscv_vsuxseg7_mask:
1887 case Intrinsic::riscv_vsuxseg8_mask:
1888 return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
1889 /*IsStore*/ true,
1890 /*IsUnitStrided*/ false);
1891 }
1892}
1893
1895 const AddrMode &AM, Type *Ty,
1896 unsigned AS,
1897 Instruction *I) const {
1898 // No global is ever allowed as a base.
1899 if (AM.BaseGV)
1900 return false;
1901
1902 // None of our addressing modes allows a scalable offset
1903 if (AM.ScalableOffset)
1904 return false;
1905
1906 // RVV instructions only support register addressing.
1907 if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
1908 return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
1909
1910 // Require a 12-bit signed offset.
1911 if (!isInt<12>(AM.BaseOffs))
1912 return false;
1913
1914 switch (AM.Scale) {
1915 case 0: // "r+i" or just "i", depending on HasBaseReg.
1916 break;
1917 case 1:
1918 if (!AM.HasBaseReg) // allow "r+i".
1919 break;
1920 return false; // disallow "r+r" or "r+r+i".
1921 default:
1922 return false;
1923 }
1924
1925 return true;
1926}
1927
1929 return isInt<12>(Imm);
1930}
1931
1933 return isInt<12>(Imm);
1934}
1935
1936// On RV32, 64-bit integers are split into their high and low parts and held
1937// in two different registers, so the trunc is free since the low register can
1938// just be used.
1939// FIXME: Should we consider i64->i32 free on RV64 to match the EVT version of
1940// isTruncateFree?
1942 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
1943 return false;
1944 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
1945 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
1946 return (SrcBits == 64 && DestBits == 32);
1947}
1948
1950 // We consider i64->i32 free on RV64 since we have good selection of W
1951 // instructions that make promoting operations back to i64 free in many cases.
1952 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
1953 !DstVT.isInteger())
1954 return false;
1955 unsigned SrcBits = SrcVT.getSizeInBits();
1956 unsigned DestBits = DstVT.getSizeInBits();
1957 return (SrcBits == 64 && DestBits == 32);
1958}
1959
1961 EVT SrcVT = Val.getValueType();
1962 // free truncate from vnsrl and vnsra
1963 if (Subtarget.hasVInstructions() &&
1964 (Val.getOpcode() == ISD::SRL || Val.getOpcode() == ISD::SRA) &&
1965 SrcVT.isVector() && VT2.isVector()) {
1966 unsigned SrcBits = SrcVT.getVectorElementType().getSizeInBits();
1967 unsigned DestBits = VT2.getVectorElementType().getSizeInBits();
1968 if (SrcBits == DestBits * 2) {
1969 return true;
1970 }
1971 }
1972 return TargetLowering::isTruncateFree(Val, VT2);
1973}
1974
1976 // Zexts are free if they can be combined with a load.
1977 // Don't advertise i32->i64 zextload as being free for RV64. It interacts
1978 // poorly with type legalization of compares preferring sext.
1979 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
1980 EVT MemVT = LD->getMemoryVT();
1981 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1982 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
1983 LD->getExtensionType() == ISD::ZEXTLOAD))
1984 return true;
1985 }
1986
1987 return TargetLowering::isZExtFree(Val, VT2);
1988}
1989
1991 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1992}
1993
1995 return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
1996}
1997
1999 return Subtarget.hasStdExtZbb() ||
2000 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2001}
2002
2004 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
2005 (Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit());
2006}
2007
2009 const Instruction &AndI) const {
2010 // We expect to be able to match a bit extraction instruction if the Zbs
2011 // extension is supported and the mask is a power of two. However, we
2012 // conservatively return false if the mask would fit in an ANDI instruction,
2013 // on the basis that it's possible the sinking+duplication of the AND in
2014 // CodeGenPrepare triggered by this hook wouldn't decrease the instruction
2015 // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
2016 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
2017 return false;
2018 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
2019 if (!Mask)
2020 return false;
2021 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
2022}
2023
2025 EVT VT = Y.getValueType();
2026
2027 // FIXME: Support vectors once we have tests.
2028 if (VT.isVector())
2029 return false;
2030
2031 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
2032 (!isa<ConstantSDNode>(Y) || cast<ConstantSDNode>(Y)->isOpaque());
2033}
2034
2036 // Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
2037 if (Subtarget.hasStdExtZbs())
2038 return X.getValueType().isScalarInteger();
2039 auto *C = dyn_cast<ConstantSDNode>(Y);
2040 // XTheadBs provides th.tst (similar to bexti), if Y is a constant
2041 if (Subtarget.hasVendorXTHeadBs())
2042 return C != nullptr;
2043 // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
2044 return C && C->getAPIntValue().ule(10);
2045}
2046
2048 EVT VT) const {
2049 // Only enable for rvv.
2050 if (!VT.isVector() || !Subtarget.hasVInstructions())
2051 return false;
2052
2053 if (VT.isFixedLengthVector() && !isTypeLegal(VT))
2054 return false;
2055
2056 return true;
2057}
2058
2060 Type *Ty) const {
2061 assert(Ty->isIntegerTy());
2062
2063 unsigned BitSize = Ty->getIntegerBitWidth();
2064 if (BitSize > Subtarget.getXLen())
2065 return false;
2066
2067 // Fast path, assume 32-bit immediates are cheap.
2068 int64_t Val = Imm.getSExtValue();
2069 if (isInt<32>(Val))
2070 return true;
2071
2072 // A constant pool entry may be more aligned thant he load we're trying to
2073 // replace. If we don't support unaligned scalar mem, prefer the constant
2074 // pool.
2075 // TODO: Can the caller pass down the alignment?
2076 if (!Subtarget.enableUnalignedScalarMem())
2077 return true;
2078
2079 // Prefer to keep the load if it would require many instructions.
2080 // This uses the same threshold we use for constant pools but doesn't
2081 // check useConstantPoolForLargeInts.
2082 // TODO: Should we keep the load only when we're definitely going to emit a
2083 // constant pool?
2084
2086 return Seq.size() <= Subtarget.getMaxBuildIntsCost();
2087}
2088
2092 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
2093 SelectionDAG &DAG) const {
2094 // One interesting pattern that we'd want to form is 'bit extract':
2095 // ((1 >> Y) & 1) ==/!= 0
2096 // But we also need to be careful not to try to reverse that fold.
2097
2098 // Is this '((1 >> Y) & 1)'?
2099 if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
2100 return false; // Keep the 'bit extract' pattern.
2101
2102 // Will this be '((1 >> Y) & 1)' after the transform?
2103 if (NewShiftOpcode == ISD::SRL && CC->isOne())
2104 return true; // Do form the 'bit extract' pattern.
2105
2106 // If 'X' is a constant, and we transform, then we will immediately
2107 // try to undo the fold, thus causing endless combine loop.
2108 // So only do the transform if X is not a constant. This matches the default
2109 // implementation of this function.
2110 return !XC;
2111}
2112
2114 unsigned Opc = VecOp.getOpcode();
2115
2116 // Assume target opcodes can't be scalarized.
2117 // TODO - do we have any exceptions?
2118 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
2119 return false;
2120
2121 // If the vector op is not supported, try to convert to scalar.
2122 EVT VecVT = VecOp.getValueType();
2123 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
2124 return true;
2125
2126 // If the vector op is supported, but the scalar op is not, the transform may
2127 // not be worthwhile.
2128 // Permit a vector binary operation can be converted to scalar binary
2129 // operation which is custom lowered with illegal type.
2130 EVT ScalarVT = VecVT.getScalarType();
2131 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT) ||
2132 isOperationCustom(Opc, ScalarVT);
2133}
2134
2136 const GlobalAddressSDNode *GA) const {
2137 // In order to maximise the opportunity for common subexpression elimination,
2138 // keep a separate ADD node for the global address offset instead of folding
2139 // it in the global address node. Later peephole optimisations may choose to
2140 // fold it back in when profitable.
2141 return false;
2142}
2143
2144// Returns 0-31 if the fli instruction is available for the type and this is
2145// legal FP immediate for the type. Returns -1 otherwise.
2147 if (!Subtarget.hasStdExtZfa())
2148 return -1;
2149
2150 bool IsSupportedVT = false;
2151 if (VT == MVT::f16) {
2152 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
2153 } else if (VT == MVT::f32) {
2154 IsSupportedVT = true;
2155 } else if (VT == MVT::f64) {
2156 assert(Subtarget.hasStdExtD() && "Expect D extension");
2157 IsSupportedVT = true;
2158 }
2159
2160 if (!IsSupportedVT)
2161 return -1;
2162
2163 return RISCVLoadFPImm::getLoadFPImm(Imm);
2164}
2165
2167 bool ForCodeSize) const {
2168 bool IsLegalVT = false;
2169 if (VT == MVT::f16)
2170 IsLegalVT = Subtarget.hasStdExtZfhminOrZhinxmin();
2171 else if (VT == MVT::f32)
2172 IsLegalVT = Subtarget.hasStdExtFOrZfinx();
2173 else if (VT == MVT::f64)
2174 IsLegalVT = Subtarget.hasStdExtDOrZdinx();
2175 else if (VT == MVT::bf16)
2176 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2177
2178 if (!IsLegalVT)
2179 return false;
2180
2181 if (getLegalZfaFPImm(Imm, VT) >= 0)
2182 return true;
2183
2184 // Cannot create a 64 bit floating-point immediate value for rv32.
2185 if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
2186 // td can handle +0.0 or -0.0 already.
2187 // -0.0 can be created by fmv + fneg.
2188 return Imm.isZero();
2189 }
2190
2191 // Special case: fmv + fneg
2192 if (Imm.isNegZero())
2193 return true;
2194
2195 // Building an integer and then converting requires a fmv at the end of
2196 // the integer sequence. The fmv is not required for Zfinx.
2197 const int FmvCost = Subtarget.hasStdExtZfinx() ? 0 : 1;
2198 const int Cost =
2199 FmvCost + RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
2200 Subtarget.getXLen(), Subtarget);
2201 return Cost <= FPImmCost;
2202}
2203
2204// TODO: This is very conservative.
2206 unsigned Index) const {
2208 return false;
2209
2210 // Only support extracting a fixed from a fixed vector for now.
2211 if (ResVT.isScalableVector() || SrcVT.isScalableVector())
2212 return false;
2213
2214 EVT EltVT = ResVT.getVectorElementType();
2215 assert(EltVT == SrcVT.getVectorElementType() && "Should hold for node");
2216
2217 // The smallest type we can slide is i8.
2218 // TODO: We can extract index 0 from a mask vector without a slide.
2219 if (EltVT == MVT::i1)
2220 return false;
2221
2222 unsigned ResElts = ResVT.getVectorNumElements();
2223 unsigned SrcElts = SrcVT.getVectorNumElements();
2224
2225 unsigned MinVLen = Subtarget.getRealMinVLen();
2226 unsigned MinVLMAX = MinVLen / EltVT.getSizeInBits();
2227
2228 // If we're extracting only data from the first VLEN bits of the source
2229 // then we can always do this with an m1 vslidedown.vx. Restricting the
2230 // Index ensures we can use a vslidedown.vi.
2231 // TODO: We can generalize this when the exact VLEN is known.
2232 if (Index + ResElts <= MinVLMAX && Index < 31)
2233 return true;
2234
2235 // Convervatively only handle extracting half of a vector.
2236 // TODO: We can do arbitrary slidedowns, but for now only support extracting
2237 // the upper half of a vector until we have more test coverage.
2238 // TODO: For sizes which aren't multiples of VLEN sizes, this may not be
2239 // a cheap extract. However, this case is important in practice for
2240 // shuffled extracts of longer vectors. How resolve?
2241 return (ResElts * 2) == SrcElts && (Index == 0 || Index == ResElts);
2242}
2243
2246 EVT VT) const {
2247 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2248 // We might still end up using a GPR but that will be decided based on ABI.
2249 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2250 !Subtarget.hasStdExtZfhminOrZhinxmin())
2251 return MVT::f32;
2252
2254
2255 return PartVT;
2256}
2257
2258unsigned
2260 std::optional<MVT> RegisterVT) const {
2261 // Pair inline assembly operand
2262 if (VT == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) && RegisterVT &&
2263 *RegisterVT == MVT::Untyped)
2264 return 1;
2265
2266 return TargetLowering::getNumRegisters(Context, VT, RegisterVT);
2267}
2268
2271 EVT VT) const {
2272 // Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
2273 // We might still end up using a GPR but that will be decided based on ABI.
2274 if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
2275 !Subtarget.hasStdExtZfhminOrZhinxmin())
2276 return 1;
2277
2279}
2280
2282 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2283 unsigned &NumIntermediates, MVT &RegisterVT) const {
2285 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
2286
2287 return NumRegs;
2288}
2289
2290// Changes the condition code and swaps operands if necessary, so the SetCC
2291// operation matches one of the comparisons supported directly by branches
2292// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
2293// with 1/-1.
2294static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
2295 ISD::CondCode &CC, SelectionDAG &DAG) {
2296 // If this is a single bit test that can't be handled by ANDI, shift the
2297 // bit to be tested to the MSB and perform a signed compare with 0.
2298 if (isIntEqualitySetCC(CC) && isNullConstant(RHS) &&
2299 LHS.getOpcode() == ISD::AND && LHS.hasOneUse() &&
2300 isa<ConstantSDNode>(LHS.getOperand(1))) {
2301 uint64_t Mask = LHS.getConstantOperandVal(1);
2302 if ((isPowerOf2_64(Mask) || isMask_64(Mask)) && !isInt<12>(Mask)) {
2303 unsigned ShAmt = 0;
2304 if (isPowerOf2_64(Mask)) {
2306 ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask);
2307 } else {
2308 ShAmt = LHS.getValueSizeInBits() - llvm::bit_width(Mask);
2309 }
2310
2311 LHS = LHS.getOperand(0);
2312 if (ShAmt != 0)
2313 LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS,
2314 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
2315 return;
2316 }
2317 }
2318
2319 if (auto *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2320 int64_t C = RHSC->getSExtValue();
2321 switch (CC) {
2322 default: break;
2323 case ISD::SETGT:
2324 // Convert X > -1 to X >= 0.
2325 if (C == -1) {
2326 RHS = DAG.getConstant(0, DL, RHS.getValueType());
2327 CC = ISD::SETGE;
2328 return;
2329 }
2330 break;
2331 case ISD::SETLT:
2332 // Convert X < 1 to 0 >= X.
2333 if (C == 1) {
2334 RHS = LHS;
2335 LHS = DAG.getConstant(0, DL, RHS.getValueType());
2336 CC = ISD::SETGE;
2337 return;
2338 }
2339 break;
2340 }
2341 }
2342
2343 switch (CC) {
2344 default:
2345 break;
2346 case ISD::SETGT:
2347 case ISD::SETLE:
2348 case ISD::SETUGT:
2349 case ISD::SETULE:
2351 std::swap(LHS, RHS);
2352 break;
2353 }
2354}
2355
2357 if (VT.isRISCVVectorTuple()) {
2358 if (VT.SimpleTy >= MVT::riscv_nxv1i8x2 &&
2359 VT.SimpleTy <= MVT::riscv_nxv1i8x8)
2360 return RISCVII::LMUL_F8;
2361 if (VT.SimpleTy >= MVT::riscv_nxv2i8x2 &&
2362 VT.SimpleTy <= MVT::riscv_nxv2i8x8)
2363 return RISCVII::LMUL_F4;
2364 if (VT.SimpleTy >= MVT::riscv_nxv4i8x2 &&
2365 VT.SimpleTy <= MVT::riscv_nxv4i8x8)
2366 return RISCVII::LMUL_F2;
2367 if (VT.SimpleTy >= MVT::riscv_nxv8i8x2 &&
2368 VT.SimpleTy <= MVT::riscv_nxv8i8x8)
2369 return RISCVII::LMUL_1;
2370 if (VT.SimpleTy >= MVT::riscv_nxv16i8x2 &&
2371 VT.SimpleTy <= MVT::riscv_nxv16i8x4)
2372 return RISCVII::LMUL_2;
2373 if (VT.SimpleTy == MVT::riscv_nxv32i8x2)
2374 return RISCVII::LMUL_4;
2375 llvm_unreachable("Invalid vector tuple type LMUL.");
2376 }
2377
2378 assert(VT.isScalableVector() && "Expecting a scalable vector type");
2379 unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
2380 if (VT.getVectorElementType() == MVT::i1)
2381 KnownSize *= 8;
2382
2383 switch (KnownSize) {
2384 default:
2385 llvm_unreachable("Invalid LMUL.");
2386 case 8:
2388 case 16:
2390 case 32:
2392 case 64:
2394 case 128:
2396 case 256:
2398 case 512:
2400 }
2401}
2402
2404 switch (LMul) {
2405 default:
2406 llvm_unreachable("Invalid LMUL.");
2411 return RISCV::VRRegClassID;
2413 return RISCV::VRM2RegClassID;
2415 return RISCV::VRM4RegClassID;
2417 return RISCV::VRM8RegClassID;
2418 }
2419}
2420
2421unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
2422 RISCVII::VLMUL LMUL = getLMUL(VT);
2423 if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
2424 LMUL == RISCVII::VLMUL::LMUL_F4 ||
2425 LMUL == RISCVII::VLMUL::LMUL_F2 ||
2426 LMUL == RISCVII::VLMUL::LMUL_1) {
2427 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2428 "Unexpected subreg numbering");
2429 return RISCV::sub_vrm1_0 + Index;
2430 }
2431 if (LMUL == RISCVII::VLMUL::LMUL_2) {
2432 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2433 "Unexpected subreg numbering");
2434 return RISCV::sub_vrm2_0 + Index;
2435 }
2436 if (LMUL == RISCVII::VLMUL::LMUL_4) {
2437 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2438 "Unexpected subreg numbering");
2439 return RISCV::sub_vrm4_0 + Index;
2440 }
2441 llvm_unreachable("Invalid vector type.");
2442}
2443
2445 if (VT.isRISCVVectorTuple()) {
2446 unsigned NF = VT.getRISCVVectorTupleNumFields();
2447 unsigned RegsPerField =
2448 std::max(1U, (unsigned)VT.getSizeInBits().getKnownMinValue() /
2449 (NF * RISCV::RVVBitsPerBlock));
2450 switch (RegsPerField) {
2451 case 1:
2452 if (NF == 2)
2453 return RISCV::VRN2M1RegClassID;
2454 if (NF == 3)
2455 return RISCV::VRN3M1RegClassID;
2456 if (NF == 4)
2457 return RISCV::VRN4M1RegClassID;
2458 if (NF == 5)
2459 return RISCV::VRN5M1RegClassID;
2460 if (NF == 6)
2461 return RISCV::VRN6M1RegClassID;
2462 if (NF == 7)
2463 return RISCV::VRN7M1RegClassID;
2464 if (NF == 8)
2465 return RISCV::VRN8M1RegClassID;
2466 break;
2467 case 2:
2468 if (NF == 2)
2469 return RISCV::VRN2M2RegClassID;
2470 if (NF == 3)
2471 return RISCV::VRN3M2RegClassID;
2472 if (NF == 4)
2473 return RISCV::VRN4M2RegClassID;
2474 break;
2475 case 4:
2476 assert(NF == 2);
2477 return RISCV::VRN2M4RegClassID;
2478 default:
2479 break;
2480 }
2481 llvm_unreachable("Invalid vector tuple type RegClass.");
2482 }
2483
2484 if (VT.getVectorElementType() == MVT::i1)
2485 return RISCV::VRRegClassID;
2486 return getRegClassIDForLMUL(getLMUL(VT));
2487}
2488
2489// Attempt to decompose a subvector insert/extract between VecVT and
2490// SubVecVT via subregister indices. Returns the subregister index that
2491// can perform the subvector insert/extract with the given element index, as
2492// well as the index corresponding to any leftover subvectors that must be
2493// further inserted/extracted within the register class for SubVecVT.
2494std::pair<unsigned, unsigned>
2496 MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
2497 const RISCVRegisterInfo *TRI) {
2498 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2499 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2500 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2501 "Register classes not ordered");
2502 unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
2503 unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
2504
2505 // If VecVT is a vector tuple type, either it's the tuple type with same
2506 // RegClass with SubVecVT or SubVecVT is a actually a subvector of the VecVT.
2507 if (VecVT.isRISCVVectorTuple()) {
2508 if (VecRegClassID == SubRegClassID)
2509 return {RISCV::NoSubRegister, 0};
2510
2511 assert(SubVecVT.isScalableVector() &&
2512 "Only allow scalable vector subvector.");
2513 assert(getLMUL(VecVT) == getLMUL(SubVecVT) &&
2514 "Invalid vector tuple insert/extract for vector and subvector with "
2515 "different LMUL.");
2516 return {getSubregIndexByMVT(VecVT, InsertExtractIdx), 0};
2517 }
2518
2519 // Try to compose a subregister index that takes us from the incoming
2520 // LMUL>1 register class down to the outgoing one. At each step we half
2521 // the LMUL:
2522 // nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
2523 // Note that this is not guaranteed to find a subregister index, such as
2524 // when we are extracting from one VR type to another.
2525 unsigned SubRegIdx = RISCV::NoSubRegister;
2526 for (const unsigned RCID :
2527 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2528 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2529 VecVT = VecVT.getHalfNumVectorElementsVT();
2530 bool IsHi =
2531 InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
2532 SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
2533 getSubregIndexByMVT(VecVT, IsHi));
2534 if (IsHi)
2535 InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
2536 }
2537 return {SubRegIdx, InsertExtractIdx};
2538}
2539
2540// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
2541// stores for those types.
2542bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
2543 return !Subtarget.useRVVForFixedLengthVectors() ||
2544 (VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
2545}
2546
2548 if (!ScalarTy.isSimple())
2549 return false;
2550 switch (ScalarTy.getSimpleVT().SimpleTy) {
2551 case MVT::iPTR:
2552 return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
2553 case MVT::i8:
2554 case MVT::i16:
2555 case MVT::i32:
2556 return true;
2557 case MVT::i64:
2558 return Subtarget.hasVInstructionsI64();
2559 case MVT::f16:
2560 return Subtarget.hasVInstructionsF16Minimal();
2561 case MVT::bf16:
2562 return Subtarget.hasVInstructionsBF16Minimal();
2563 case MVT::f32:
2564 return Subtarget.hasVInstructionsF32();
2565 case MVT::f64:
2566 return Subtarget.hasVInstructionsF64();
2567 default:
2568 return false;
2569 }
2570}
2571
2572
2573unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
2574 return NumRepeatedDivisors;
2575}
2576
2578 assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
2579 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
2580 "Unexpected opcode");
2581 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
2582 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
2584 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2585 if (!II)
2586 return SDValue();
2587 return Op.getOperand(II->VLOperand + 1 + HasChain);
2588}
2589
2591 const RISCVSubtarget &Subtarget) {
2592 assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
2593 if (!Subtarget.useRVVForFixedLengthVectors())
2594 return false;
2595
2596 // We only support a set of vector types with a consistent maximum fixed size
2597 // across all supported vector element types to avoid legalization issues.
2598 // Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
2599 // fixed-length vector type we support is 1024 bytes.
2600 if (VT.getFixedSizeInBits() > 1024 * 8)
2601 return false;
2602
2603 unsigned MinVLen = Subtarget.getRealMinVLen();
2604
2605 MVT EltVT = VT.getVectorElementType();
2606
2607 // Don't use RVV for vectors we cannot scalarize if required.
2608 switch (EltVT.SimpleTy) {
2609 // i1 is supported but has different rules.
2610 default:
2611 return false;
2612 case MVT::i1:
2613 // Masks can only use a single register.
2614 if (VT.getVectorNumElements() > MinVLen)
2615 return false;
2616 MinVLen /= 8;
2617 break;
2618 case MVT::i8:
2619 case MVT::i16:
2620 case MVT::i32:
2621 break;
2622 case MVT::i64:
2623 if (!Subtarget.hasVInstructionsI64())
2624 return false;
2625 break;
2626 case MVT::f16:
2627 if (!Subtarget.hasVInstructionsF16Minimal())
2628 return false;
2629 break;
2630 case MVT::bf16:
2631 if (!Subtarget.hasVInstructionsBF16Minimal())
2632 return false;
2633 break;
2634 case MVT::f32:
2635 if (!Subtarget.hasVInstructionsF32())
2636 return false;
2637 break;
2638 case MVT::f64:
2639 if (!Subtarget.hasVInstructionsF64())
2640 return false;
2641 break;
2642 }
2643
2644 // Reject elements larger than ELEN.
2645 if (EltVT.getSizeInBits() > Subtarget.getELen())
2646 return false;
2647
2648 unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
2649 // Don't use RVV for types that don't fit.
2650 if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
2651 return false;
2652
2653 // TODO: Perhaps an artificial restriction, but worth having whilst getting
2654 // the base fixed length RVV support in place.
2655 if (!VT.isPow2VectorType())
2656 return false;
2657
2658 return true;
2659}
2660
2661bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
2662 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2663}
2664
2665// Return the largest legal scalable vector type that matches VT's element type.
2667 const RISCVSubtarget &Subtarget) {
2668 // This may be called before legal types are setup.
2669 assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
2670 useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
2671 "Expected legal fixed length vector!");
2672
2673 unsigned MinVLen = Subtarget.getRealMinVLen();
2674 unsigned MaxELen = Subtarget.getELen();
2675
2676 MVT EltVT = VT.getVectorElementType();
2677 switch (EltVT.SimpleTy) {
2678 default:
2679 llvm_unreachable("unexpected element type for RVV container");
2680 case MVT::i1:
2681 case MVT::i8:
2682 case MVT::i16:
2683 case MVT::i32:
2684 case MVT::i64:
2685 case MVT::bf16:
2686 case MVT::f16:
2687 case MVT::f32:
2688 case MVT::f64: {
2689 // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
2690 // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
2691 // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
2692 unsigned NumElts =
2694 NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
2695 assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
2696 return MVT::getScalableVectorVT(EltVT, NumElts);
2697 }
2698 }
2699}
2700
2702 const RISCVSubtarget &Subtarget) {
2704 Subtarget);
2705}
2706
2708 return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
2709}
2710
2711// Grow V to consume an entire RVV register.
2713 const RISCVSubtarget &Subtarget) {
2714 assert(VT.isScalableVector() &&
2715 "Expected to convert into a scalable vector!");
2716 assert(V.getValueType().isFixedLengthVector() &&
2717 "Expected a fixed length vector operand!");
2718 SDLoc DL(V);
2719 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
2720 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
2721}
2722
2723// Shrink V so it's just big enough to maintain a VT's worth of data.
2725 const RISCVSubtarget &Subtarget) {
2727 "Expected to convert into a fixed length vector!");
2728 assert(V.getValueType().isScalableVector() &&
2729 "Expected a scalable vector operand!");
2730 SDLoc DL(V);
2731 SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
2732 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
2733}
2734
2735/// Return the type of the mask type suitable for masking the provided
2736/// vector type. This is simply an i1 element type vector of the same
2737/// (possibly scalable) length.
2738static MVT getMaskTypeFor(MVT VecVT) {
2739 assert(VecVT.isVector());
2741 return MVT::getVectorVT(MVT::i1, EC);
2742}
2743
2744/// Creates an all ones mask suitable for masking a vector of type VecTy with
2745/// vector length VL. .
2746static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
2747 SelectionDAG &DAG) {
2748 MVT MaskVT = getMaskTypeFor(VecVT);
2749 return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
2750}
2751
2752static std::pair<SDValue, SDValue>
2754 const RISCVSubtarget &Subtarget) {
2755 assert(VecVT.isScalableVector() && "Expecting a scalable vector");
2756 SDValue VL = DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
2757 SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
2758 return {Mask, VL};
2759}
2760
2761static std::pair<SDValue, SDValue>
2762getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
2763 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
2764 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2765 SDValue VL = DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
2766 SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
2767 return {Mask, VL};
2768}
2769
2770// Gets the two common "VL" operands: an all-ones mask and the vector length.
2771// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
2772// the vector type that the fixed-length vector is contained in. Otherwise if
2773// VecVT is scalable, then ContainerVT should be the same as VecVT.
2774static std::pair<SDValue, SDValue>
2775getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
2776 const RISCVSubtarget &Subtarget) {
2777 if (VecVT.isFixedLengthVector())
2778 return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
2779 Subtarget);
2780 assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
2781 return getDefaultScalableVLOps(ContainerVT, DL, DAG, Subtarget);
2782}
2783
2785 SelectionDAG &DAG) const {
2786 assert(VecVT.isScalableVector() && "Expected scalable vector");
2787 return DAG.getElementCount(DL, Subtarget.getXLenVT(),
2788 VecVT.getVectorElementCount());
2789}
2790
2791std::pair<unsigned, unsigned>
2793 const RISCVSubtarget &Subtarget) {
2794 assert(VecVT.isScalableVector() && "Expected scalable vector");
2795
2796 unsigned EltSize = VecVT.getScalarSizeInBits();
2797 unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
2798
2799 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
2800 unsigned MaxVLMAX =
2801 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
2802
2803 unsigned VectorBitsMin = Subtarget.getRealMinVLen();
2804 unsigned MinVLMAX =
2805 RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
2806
2807 return std::make_pair(MinVLMAX, MaxVLMAX);
2808}
2809
2810// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
2811// of either is (currently) supported. This can get us into an infinite loop
2812// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
2813// as a ..., etc.
2814// Until either (or both) of these can reliably lower any node, reporting that
2815// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
2816// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
2817// which is not desirable.
2819 EVT VT, unsigned DefinedValues) const {
2820 return false;
2821}
2822
2824 // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
2825 // implementation-defined.
2826 if (!VT.isVector())
2828 unsigned DLenFactor = Subtarget.getDLenFactor();
2829 unsigned Cost;
2830 if (VT.isScalableVector()) {
2831 unsigned LMul;
2832 bool Fractional;
2833 std::tie(LMul, Fractional) =
2835 if (Fractional)
2836 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2837 else
2838 Cost = (LMul * DLenFactor);
2839 } else {
2840 Cost = divideCeil(VT.getSizeInBits(), Subtarget.getRealMinVLen() / DLenFactor);
2841 }
2842 return Cost;
2843}
2844
2845
2846/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
2847/// is generally quadratic in the number of vreg implied by LMUL. Note that
2848/// operand (index and possibly mask) are handled separately.
2850 return getLMULCost(VT) * getLMULCost(VT);
2851}
2852
2853/// Return the cost of a vrgather.vi (or vx) instruction for the type VT.
2854/// vrgather.vi/vx may be linear in the number of vregs implied by LMUL,
2855/// or may track the vrgather.vv cost. It is implementation-dependent.
2857 return getLMULCost(VT);
2858}
2859
2860/// Return the cost of a vslidedown.vx or vslideup.vx instruction
2861/// for the type VT. (This does not cover the vslide1up or vslide1down
2862/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2863/// or may track the vrgather.vv cost. It is implementation-dependent.
2865 return getLMULCost(VT);
2866}
2867
2868/// Return the cost of a vslidedown.vi or vslideup.vi instruction
2869/// for the type VT. (This does not cover the vslide1up or vslide1down
2870/// variants.) Slides may be linear in the number of vregs implied by LMUL,
2871/// or may track the vrgather.vv cost. It is implementation-dependent.
2873 return getLMULCost(VT);
2874}
2875
2877 const RISCVSubtarget &Subtarget) {
2878 // f16 conversions are promoted to f32 when Zfh/Zhinx are not supported.
2879 // bf16 conversions are always promoted to f32.
2880 if ((Op.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2881 Op.getValueType() == MVT::bf16) {
2882 bool IsStrict = Op->isStrictFPOpcode();
2883
2884 SDLoc DL(Op);
2885 if (IsStrict) {
2886 SDValue Val = DAG.getNode(Op.getOpcode(), DL, {MVT::f32, MVT::Other},
2887 {Op.getOperand(0), Op.getOperand(1)});
2888 return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
2889 {Op.getValueType(), MVT::Other},
2890 {Val.getValue(1), Val.getValue(0),
2891 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
2892 }
2893 return DAG.getNode(
2894 ISD::FP_ROUND, DL, Op.getValueType(),
2895 DAG.getNode(Op.getOpcode(), DL, MVT::f32, Op.getOperand(0)),
2896 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
2897 }
2898
2899 // Other operations are legal.
2900 return Op;
2901}
2902
2904 const RISCVSubtarget &Subtarget) {
2905 // RISC-V FP-to-int conversions saturate to the destination register size, but
2906 // don't produce 0 for nan. We can use a conversion instruction and fix the
2907 // nan case with a compare and a select.
2908 SDValue Src = Op.getOperand(0);
2909
2910 MVT DstVT = Op.getSimpleValueType();
2911 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2912
2913 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
2914
2915 if (!DstVT.isVector()) {
2916 // For bf16 or for f16 in absense of Zfh, promote to f32, then saturate
2917 // the result.
2918 if ((Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
2919 Src.getValueType() == MVT::bf16) {
2920 Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
2921 }
2922
2923 unsigned Opc;
2924 if (SatVT == DstVT)
2925 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
2926 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2928 else
2929 return SDValue();
2930 // FIXME: Support other SatVTs by clamping before or after the conversion.
2931
2932 SDLoc DL(Op);
2933 SDValue FpToInt = DAG.getNode(
2934 Opc, DL, DstVT, Src,
2936
2937 if (Opc == RISCVISD::FCVT_WU_RV64)
2938 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
2939
2940 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
2941 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt,
2943 }
2944
2945 // Vectors.
2946
2947 MVT DstEltVT = DstVT.getVectorElementType();
2948 MVT SrcVT = Src.getSimpleValueType();
2949 MVT SrcEltVT = SrcVT.getVectorElementType();
2950 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
2951 unsigned DstEltSize = DstEltVT.getSizeInBits();
2952
2953 // Only handle saturating to the destination type.
2954 if (SatVT != DstEltVT)
2955 return SDValue();
2956
2957 MVT DstContainerVT = DstVT;
2958 MVT SrcContainerVT = SrcVT;
2959 if (DstVT.isFixedLengthVector()) {
2960 DstContainerVT = getContainerForFixedLengthVector(DAG, DstVT, Subtarget);
2961 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
2962 assert(DstContainerVT.getVectorElementCount() ==
2963 SrcContainerVT.getVectorElementCount() &&
2964 "Expected same element count");
2965 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
2966 }
2967
2968 SDLoc DL(Op);
2969
2970 auto [Mask, VL] = getDefaultVLOps(DstVT, DstContainerVT, DL, DAG, Subtarget);
2971
2972 SDValue IsNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
2973 {Src, Src, DAG.getCondCode(ISD::SETNE),
2974 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2975
2976 // Need to widen by more than 1 step, promote the FP type, then do a widening
2977 // convert.
2978 if (DstEltSize > (2 * SrcEltSize)) {
2979 assert(SrcContainerVT.getVectorElementType() == MVT::f16 && "Unexpected VT!");
2980 MVT InterVT = SrcContainerVT.changeVectorElementType(MVT::f32);
2981 Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterVT, Src, Mask, VL);
2982 }
2983
2984 MVT CvtContainerVT = DstContainerVT;
2985 MVT CvtEltVT = DstEltVT;
2986 if (SrcEltSize > (2 * DstEltSize)) {
2987 CvtEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
2988 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2989 }
2990
2991 unsigned RVVOpc =
2993 SDValue Res = DAG.getNode(RVVOpc, DL, CvtContainerVT, Src, Mask, VL);
2994
2995 while (CvtContainerVT != DstContainerVT) {
2996 CvtEltVT = MVT::getIntegerVT(CvtEltVT.getSizeInBits() / 2);
2997 CvtContainerVT = CvtContainerVT.changeVectorElementType(CvtEltVT);
2998 // Rounding mode here is arbitrary since we aren't shifting out any bits.
2999 unsigned ClipOpc = IsSigned ? RISCVISD::TRUNCATE_VECTOR_VL_SSAT
3001 Res = DAG.getNode(ClipOpc, DL, CvtContainerVT, Res, Mask, VL);
3002 }
3003
3004 SDValue SplatZero = DAG.getNode(
3005 RISCVISD::VMV_V_X_VL, DL, DstContainerVT, DAG.getUNDEF(DstContainerVT),
3006 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
3007 Res = DAG.getNode(RISCVISD::VMERGE_VL, DL, DstContainerVT, IsNan, SplatZero,
3008 Res, DAG.getUNDEF(DstContainerVT), VL);
3009
3010 if (DstVT.isFixedLengthVector())
3011 Res = convertFromScalableVector(DstVT, Res, DAG, Subtarget);
3012
3013 return Res;
3014}
3015
3017 const RISCVSubtarget &Subtarget) {
3018 bool IsStrict = Op->isStrictFPOpcode();
3019 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3020
3021 // f16 conversions are promoted to f32 when Zfh/Zhinx is not enabled.
3022 // bf16 conversions are always promoted to f32.
3023 if ((SrcVal.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx()) ||
3024 SrcVal.getValueType() == MVT::bf16) {
3025 SDLoc DL(Op);
3026 if (IsStrict) {
3027 SDValue Ext =
3028 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
3029 {Op.getOperand(0), SrcVal});
3030 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
3031 {Ext.getValue(1), Ext.getValue(0)});
3032 }
3033 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
3034 DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
3035 }
3036
3037 // Other operations are legal.
3038 return Op;
3039}
3040
3042 switch (Opc) {
3043 case ISD::FROUNDEVEN:
3045 case ISD::VP_FROUNDEVEN:
3046 return RISCVFPRndMode::RNE;
3047 case ISD::FTRUNC:
3048 case ISD::STRICT_FTRUNC:
3049 case ISD::VP_FROUNDTOZERO:
3050 return RISCVFPRndMode::RTZ;
3051 case ISD::FFLOOR:
3052 case ISD::STRICT_FFLOOR:
3053 case ISD::VP_FFLOOR:
3054 return RISCVFPRndMode::RDN;
3055 case ISD::FCEIL:
3056 case ISD::STRICT_FCEIL:
3057 case ISD::VP_FCEIL:
3058 return RISCVFPRndMode::RUP;
3059 case ISD::FROUND:
3060 case ISD::STRICT_FROUND:
3061 case ISD::VP_FROUND:
3062 return RISCVFPRndMode::RMM;
3063 case ISD::FRINT:
3064 case ISD::VP_FRINT:
3065 return RISCVFPRndMode::DYN;
3066 }
3067
3069}
3070
3071// Expand vector FTRUNC, FCEIL, FFLOOR, FROUND, VP_FCEIL, VP_FFLOOR, VP_FROUND
3072// VP_FROUNDEVEN, VP_FROUNDTOZERO, VP_FRINT and VP_FNEARBYINT by converting to
3073// the integer domain and back. Taking care to avoid converting values that are
3074// nan or already correct.
3075static SDValue
3077 const RISCVSubtarget &Subtarget) {
3078 MVT VT = Op.getSimpleValueType();
3079 assert(VT.isVector() && "Unexpected type");
3080
3081 SDLoc DL(Op);
3082
3083 SDValue Src = Op.getOperand(0);
3084
3085 MVT ContainerVT = VT;
3086 if (VT.isFixedLengthVector()) {
3087 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3088 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3089 }
3090
3091 SDValue Mask, VL;
3092 if (Op->isVPOpcode()) {
3093 Mask = Op.getOperand(1);
3094 if (VT.isFixedLengthVector())
3095 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
3096 Subtarget);
3097 VL = Op.getOperand(2);
3098 } else {
3099 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3100 }
3101
3102 // Freeze the source since we are increasing the number of uses.
3103 Src = DAG.getFreeze(Src);
3104
3105 // We do the conversion on the absolute value and fix the sign at the end.
3106 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3107
3108 // Determine the largest integer that can be represented exactly. This and
3109 // values larger than it don't have any fractional bits so don't need to
3110 // be converted.
3111 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3112 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3113 APFloat MaxVal = APFloat(FltSem);
3114 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3115 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3116 SDValue MaxValNode =
3117 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3118 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3119 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3120
3121 // If abs(Src) was larger than MaxVal or nan, keep it.
3122 MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
3123 Mask =
3124 DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
3125 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
3126 Mask, Mask, VL});
3127
3128 // Truncate to integer and convert back to FP.
3129 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3130 MVT XLenVT = Subtarget.getXLenVT();
3131 SDValue Truncated;
3132
3133 switch (Op.getOpcode()) {
3134 default:
3135 llvm_unreachable("Unexpected opcode");
3136 case ISD::FRINT:
3137 case ISD::VP_FRINT:
3138 case ISD::FCEIL:
3139 case ISD::VP_FCEIL:
3140 case ISD::FFLOOR:
3141 case ISD::VP_FFLOOR:
3142 case ISD::FROUND:
3143 case ISD::FROUNDEVEN:
3144 case ISD::VP_FROUND:
3145 case ISD::VP_FROUNDEVEN:
3146 case ISD::VP_FROUNDTOZERO: {
3149 Truncated = DAG.getNode(RISCVISD::VFCVT_RM_X_F_VL, DL, IntVT, Src, Mask,
3150 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
3151 break;
3152 }
3153 case ISD::FTRUNC:
3154 Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
3155 Mask, VL);
3156 break;
3157 case ISD::FNEARBYINT:
3158 case ISD::VP_FNEARBYINT:
3159 Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
3160 Mask, VL);
3161 break;
3162 }
3163
3164 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3165 if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
3166 Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
3167 Mask, VL);
3168
3169 // Restore the original sign so that -0.0 is preserved.
3170 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3171 Src, Src, Mask, VL);
3172
3173 if (!VT.isFixedLengthVector())
3174 return Truncated;
3175
3176 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3177}
3178
3179// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
3180// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
3181// qNan and coverting the new source to integer and back to FP.
3182static SDValue
3184 const RISCVSubtarget &Subtarget) {
3185 SDLoc DL(Op);
3186 MVT VT = Op.getSimpleValueType();
3187 SDValue Chain = Op.getOperand(0);
3188 SDValue Src = Op.getOperand(1);
3189
3190 MVT ContainerVT = VT;
3191 if (VT.isFixedLengthVector()) {
3192 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3193 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3194 }
3195
3196 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3197
3198 // Freeze the source since we are increasing the number of uses.
3199 Src = DAG.getFreeze(Src);
3200
3201 // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
3202 MVT MaskVT = Mask.getSimpleValueType();
3204 DAG.getVTList(MaskVT, MVT::Other),
3205 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
3206 DAG.getUNDEF(MaskVT), Mask, VL});
3207 Chain = Unorder.getValue(1);
3209 DAG.getVTList(ContainerVT, MVT::Other),
3210 {Chain, Src, Src, Src, Unorder, VL});
3211 Chain = Src.getValue(1);
3212
3213 // We do the conversion on the absolute value and fix the sign at the end.
3214 SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
3215
3216 // Determine the largest integer that can be represented exactly. This and
3217 // values larger than it don't have any fractional bits so don't need to
3218 // be converted.
3219 const fltSemantics &FltSem = ContainerVT.getFltSemantics();
3220 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3221 APFloat MaxVal = APFloat(FltSem);
3222 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3223 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3224 SDValue MaxValNode =
3225 DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
3226 SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
3227 DAG.getUNDEF(ContainerVT), MaxValNode, VL);
3228
3229 // If abs(Src) was larger than MaxVal or nan, keep it.
3230 Mask = DAG.getNode(
3231 RISCVISD::SETCC_VL, DL, MaskVT,
3232 {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
3233
3234 // Truncate to integer and convert back to FP.
3235 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
3236 MVT XLenVT = Subtarget.getXLenVT();
3237 SDValue Truncated;
3238
3239 switch (Op.getOpcode()) {
3240 default:
3241 llvm_unreachable("Unexpected opcode");
3242 case ISD::STRICT_FCEIL:
3243 case ISD::STRICT_FFLOOR:
3244 case ISD::STRICT_FROUND:
3248 Truncated = DAG.getNode(
3249 RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
3250 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
3251 break;
3252 }
3253 case ISD::STRICT_FTRUNC:
3254 Truncated =
3256 DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
3257 break;
3260 DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
3261 Mask, VL);
3262 break;
3263 }
3264 Chain = Truncated.getValue(1);
3265
3266 // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
3267 if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
3268 Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
3269 DAG.getVTList(ContainerVT, MVT::Other), Chain,
3270 Truncated, Mask, VL);
3271 Chain = Truncated.getValue(1);
3272 }
3273
3274 // Restore the original sign so that -0.0 is preserved.
3275 Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
3276 Src, Src, Mask, VL);
3277
3278 if (VT.isFixedLengthVector())
3279 Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3280 return DAG.getMergeValues({Truncated, Chain}, DL);
3281}
3282
3283static SDValue
3285 const RISCVSubtarget &Subtarget) {
3286 MVT VT = Op.getSimpleValueType();
3287 if (VT.isVector())
3288 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
3289
3290 if (DAG.shouldOptForSize())
3291 return SDValue();
3292
3293 SDLoc DL(Op);
3294 SDValue Src = Op.getOperand(0);
3295
3296 // Create an integer the size of the mantissa with the MSB set. This and all
3297 // values larger than it don't have any fractional bits so don't need to be
3298 // converted.
3299 const fltSemantics &FltSem = VT.getFltSemantics();
3300 unsigned Precision = APFloat::semanticsPrecision(FltSem);
3301 APFloat MaxVal = APFloat(FltSem);
3302 MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
3303 /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
3304 SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
3305
3307 return DAG.getNode(RISCVISD::FROUND, DL, VT, Src, MaxValNode,
3308 DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
3309}
3310
3311// Expand vector LRINT and LLRINT by converting to the integer domain.
3313 const RISCVSubtarget &Subtarget) {
3314 MVT VT = Op.getSimpleValueType();
3315 assert(VT.isVector() && "Unexpected type");
3316
3317 SDLoc DL(Op);
3318 SDValue Src = Op.getOperand(0);
3319 MVT ContainerVT = VT;
3320
3321 if (VT.isFixedLengthVector()) {
3322 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3323 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
3324 }
3325
3326 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3327 SDValue Truncated = DAG.getNode(
3328 RISCVISD::VFCVT_RM_X_F_VL, DL, ContainerVT, Src, Mask,
3330 VL);
3331
3332 if (!VT.isFixedLengthVector())
3333 return Truncated;
3334
3335 return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
3336}
3337
3338static SDValue
3340 const SDLoc &DL, EVT VT, SDValue Passthru, SDValue Op,
3341 SDValue Offset, SDValue Mask, SDValue VL,
3343 if (Passthru.isUndef())
3345 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3346 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3347 return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
3348}
3349
3350static SDValue
3351getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
3352 EVT VT, SDValue Passthru, SDValue Op, SDValue Offset, SDValue Mask,
3353 SDValue VL,
3355 if (Passthru.isUndef())
3357 SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
3358 SDValue Ops[] = {Passthru, Op, Offset, Mask, VL, PolicyOp};
3359 return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
3360}
3361
3362static MVT getLMUL1VT(MVT VT) {
3364 "Unexpected vector MVT");
3368}
3369
3373 int64_t Addend;
3374};
3375
3376static std::optional<APInt> getExactInteger(const APFloat &APF,
3378 // We will use a SINT_TO_FP to materialize this constant so we should use a
3379 // signed APSInt here.
3380 APSInt ValInt(BitWidth, /*IsUnsigned*/ false);
3381 // We use an arbitrary rounding mode here. If a floating-point is an exact
3382 // integer (e.g., 1.0), the rounding mode does not affect the output value. If
3383 // the rounding mode changes the output value, then it is not an exact
3384 // integer.
3386 bool IsExact;
3387 // If it is out of signed integer range, it will return an invalid operation.
3388 // If it is not an exact integer, IsExact is false.
3389 if ((APF.convertToInteger(ValInt, ArbitraryRM, &IsExact) ==
3391 !IsExact)
3392 return std::nullopt;
3393 return ValInt.extractBits(BitWidth, 0);
3394}
3395
3396// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
3397// to the (non-zero) step S and start value X. This can be then lowered as the
3398// RVV sequence (VID * S) + X, for example.
3399// The step S is represented as an integer numerator divided by a positive
3400// denominator. Note that the implementation currently only identifies
3401// sequences in which either the numerator is +/- 1 or the denominator is 1. It
3402// cannot detect 2/3, for example.
3403// Note that this method will also match potentially unappealing index
3404// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
3405// determine whether this is worth generating code for.
3406//
3407// EltSizeInBits is the size of the type that the sequence will be calculated
3408// in, i.e. SEW for build_vectors or XLEN for address calculations.
3409static std::optional<VIDSequence> isSimpleVIDSequence(SDValue Op,
3410 unsigned EltSizeInBits) {
3411 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
3412 if (!cast<BuildVectorSDNode>(Op)->isConstant())
3413 return std::nullopt;
3414 bool IsInteger = Op.getValueType().isInteger();
3415
3416 std::optional<unsigned> SeqStepDenom;
3417 std::optional<APInt> SeqStepNum;
3418 std::optional<APInt> SeqAddend;
3419 std::optional<std::pair<APInt, unsigned>> PrevElt;
3420 assert(EltSizeInBits >= Op.getValueType().getScalarSizeInBits());
3421
3422 // First extract the ops into a list of constant integer values. This may not
3423 // be possible for floats if they're not all representable as integers.
3425 const unsigned OpSize = Op.getScalarValueSizeInBits();
3426 for (auto [Idx, Elt] : enumerate(Op->op_values())) {
3427 if (Elt.isUndef()) {
3428 Elts[Idx] = std::nullopt;
3429 continue;
3430 }
3431 if (IsInteger) {
3432 Elts[Idx] = Elt->getAsAPIntVal().trunc(OpSize).zext(EltSizeInBits);
3433 } else {
3434 auto ExactInteger =
3435 getExactInteger(cast<ConstantFPSDNode>(Elt)->getValueAPF(), OpSize);
3436 if (!ExactInteger)
3437 return std::nullopt;
3438 Elts[Idx] = *ExactInteger;
3439 }
3440 }
3441
3442 for (auto [Idx, Elt] : enumerate(Elts)) {
3443 // Assume undef elements match the sequence; we just have to be careful
3444 // when interpolating across them.
3445 if (!Elt)
3446 continue;
3447
3448 if (PrevElt) {
3449 // Calculate the step since the last non-undef element, and ensure
3450 // it's consistent across the entire sequence.
3451 unsigned IdxDiff = Idx - PrevElt->second;
3452 APInt ValDiff = *Elt - PrevElt->first;
3453
3454 // A zero-value value difference means that we're somewhere in the middle
3455 // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
3456 // step change before evaluating the sequence.
3457 if (ValDiff == 0)
3458 continue;
3459
3460 int64_t Remainder = ValDiff.srem(IdxDiff);
3461 // Normalize the step if it's greater than 1.
3462 if (Remainder != ValDiff.getSExtValue()) {
3463 // The difference must cleanly divide the element span.
3464 if (Remainder != 0)
3465 return std::nullopt;
3466 ValDiff = ValDiff.sdiv(IdxDiff);
3467 IdxDiff = 1;
3468 }
3469
3470 if (!SeqStepNum)
3471 SeqStepNum = ValDiff;
3472 else if (ValDiff != SeqStepNum)
3473 return std::nullopt;
3474
3475 if (!SeqStepDenom)
3476 SeqStepDenom = IdxDiff;
3477 else if (IdxDiff != *SeqStepDenom)
3478 return std::nullopt;
3479 }
3480
3481 // Record this non-undef element for later.
3482 if (!PrevElt || PrevElt->first != *Elt)
3483 PrevElt = std::make_pair(*Elt, Idx);
3484 }
3485
3486 // We need to have logged a step for this to count as a legal index sequence.
3487 if (!SeqStepNum || !SeqStepDenom)
3488 return std::nullopt;
3489
3490 // Loop back through the sequence and validate elements we might have skipped
3491 // while waiting for a valid step. While doing this, log any sequence addend.
3492 for (auto [Idx, Elt] : enumerate(Elts)) {
3493 if (!Elt)
3494 continue;
3495 APInt ExpectedVal =
3496 (APInt(EltSizeInBits, Idx, /*isSigned=*/false, /*implicitTrunc=*/true) *
3497 *SeqStepNum)
3498 .sdiv(*SeqStepDenom);
3499
3500 APInt Addend = *Elt - ExpectedVal;
3501 if (!SeqAddend)
3502 SeqAddend = Addend;
3503 else if (Addend != SeqAddend)
3504 return std::nullopt;
3505 }
3506
3507 assert(SeqAddend && "Must have an addend if we have a step");
3508
3509 return VIDSequence{SeqStepNum->getSExtValue(), *SeqStepDenom,
3510 SeqAddend->getSExtValue()};
3511}
3512
3513// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
3514// and lower it as a VRGATHER_VX_VL from the source vector.
3515static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
3516 SelectionDAG &DAG,
3517 const RISCVSubtarget &Subtarget) {
3518 if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
3519 return SDValue();
3520 SDValue Src = SplatVal.getOperand(0);
3521 // Don't perform this optimization for i1 vectors, or if the element types are
3522 // different
3523 // FIXME: Support i1 vectors, maybe by promoting to i8?
3524 MVT EltTy = VT.getVectorElementType();
3525 MVT SrcVT = Src.getSimpleValueType();
3526 if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType())
3527 return SDValue();
3528 SDValue Idx = SplatVal.getOperand(1);
3529 // The index must be a legal type.
3530 if (Idx.getValueType() != Subtarget.getXLenVT())
3531 return SDValue();
3532
3533 // Check that we know Idx lies within VT
3534 if (!TypeSize::isKnownLE(SrcVT.getSizeInBits(), VT.getSizeInBits())) {
3535 auto *CIdx = dyn_cast<ConstantSDNode>(Idx);
3536 if (!CIdx || CIdx->getZExtValue() >= VT.getVectorMinNumElements())
3537 return SDValue();
3538 }
3539
3540 // Convert fixed length vectors to scalable
3541 MVT ContainerVT = VT;
3542 if (VT.isFixedLengthVector())
3543 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3544
3545 MVT SrcContainerVT = SrcVT;
3546 if (SrcVT.isFixedLengthVector()) {
3547 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
3548 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
3549 }
3550
3551 // Put Vec in a VT sized vector
3552 if (SrcContainerVT.getVectorMinNumElements() <
3553 ContainerVT.getVectorMinNumElements())
3554 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
3555 DAG.getUNDEF(ContainerVT), Src,
3556 DAG.getVectorIdxConstant(0, DL));
3557 else
3558 Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Src,
3559 DAG.getVectorIdxConstant(0, DL));
3560
3561 // We checked that Idx fits inside VT earlier
3562 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3563 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Src,
3564 Idx, DAG.getUNDEF(ContainerVT), Mask, VL);
3565 if (VT.isFixedLengthVector())
3566 Gather = convertFromScalableVector(VT, Gather, DAG, Subtarget);
3567 return Gather;
3568}
3569
3570/// Try and optimize BUILD_VECTORs with "dominant values" - these are values
3571/// which constitute a large proportion of the elements. In such cases we can
3572/// splat a vector with the dominant element and make up the shortfall with
3573/// INSERT_VECTOR_ELTs. Returns SDValue if not profitable.
3574/// Note that this includes vectors of 2 elements by association. The
3575/// upper-most element is the "dominant" one, allowing us to use a splat to
3576/// "insert" the upper element, and an insert of the lower element at position
3577/// 0, which improves codegen.
3579 const RISCVSubtarget &Subtarget) {
3580 MVT VT = Op.getSimpleValueType();
3581 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3582
3583 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3584
3585 SDLoc DL(Op);
3586 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3587
3588 MVT XLenVT = Subtarget.getXLenVT();
3589 unsigned NumElts = Op.getNumOperands();
3590
3591 SDValue DominantValue;
3592 unsigned MostCommonCount = 0;
3593 DenseMap<SDValue, unsigned> ValueCounts;
3594 unsigned NumUndefElts =
3595 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
3596
3597 // Track the number of scalar loads we know we'd be inserting, estimated as
3598 // any non-zero floating-point constant. Other kinds of element are either
3599 // already in registers or are materialized on demand. The threshold at which
3600 // a vector load is more desirable than several scalar materializion and
3601 // vector-insertion instructions is not known.
3602 unsigned NumScalarLoads = 0;
3603
3604 for (SDValue V : Op->op_values()) {
3605 if (V.isUndef())
3606 continue;
3607
3608 unsigned &Count = ValueCounts[V];
3609 if (0 == Count)
3610 if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3611 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3612
3613 // Is this value dominant? In case of a tie, prefer the highest element as
3614 // it's cheaper to insert near the beginning of a vector than it is at the
3615 // end.
3616 if (++Count >= MostCommonCount) {
3617 DominantValue = V;
3618 MostCommonCount = Count;
3619 }
3620 }
3621
3622 assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
3623 unsigned NumDefElts = NumElts - NumUndefElts;
3624 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3625
3626 // Don't perform this optimization when optimizing for size, since
3627 // materializing elements and inserting them tends to cause code bloat.
3628 if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
3629 (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
3630 ((MostCommonCount > DominantValueCountThreshold) ||
3631 (ValueCounts.size() <= Log2_32(NumDefElts)))) {
3632 // Start by splatting the most common element.
3633 SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
3634
3635 DenseSet<SDValue> Processed{DominantValue};
3636
3637 // We can handle an insert into the last element (of a splat) via
3638 // v(f)slide1down. This is slightly better than the vslideup insert
3639 // lowering as it avoids the need for a vector group temporary. It
3640 // is also better than using vmerge.vx as it avoids the need to
3641 // materialize the mask in a vector register.
3642 if (SDValue LastOp = Op->getOperand(Op->getNumOperands() - 1);
3643 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3644 LastOp != DominantValue) {
3645 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
3646 auto OpCode =
3648 if (!VT.isFloatingPoint())
3649 LastOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, LastOp);
3650 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
3651 LastOp, Mask, VL);
3652 Vec = convertFromScalableVector(VT, Vec, DAG, Subtarget);
3653 Processed.insert(LastOp);
3654 }
3655
3656 MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
3657 for (const auto &OpIdx : enumerate(Op->ops())) {
3658 const SDValue &V = OpIdx.value();
3659 if (V.isUndef() || !Processed.insert(V).second)
3660 continue;
3661 if (ValueCounts[V] == 1) {
3662 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
3663 DAG.getVectorIdxConstant(OpIdx.index(), DL));
3664 } else {
3665 // Blend in all instances of this value using a VSELECT, using a
3666 // mask where each bit signals whether that element is the one
3667 // we're after.
3669 transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
3670 return DAG.getConstant(V == V1, DL, XLenVT);
3671 });
3672 Vec = DAG.getNode(ISD::VSELECT, DL, VT,
3673 DAG.getBuildVector(SelMaskTy, DL, Ops),
3674 DAG.getSplatBuildVector(VT, DL, V), Vec);
3675 }
3676 }
3677
3678 return Vec;
3679 }
3680
3681 return SDValue();
3682}
3683
3685 const RISCVSubtarget &Subtarget) {
3686 MVT VT = Op.getSimpleValueType();
3687 assert(VT.isFixedLengthVector() && "Unexpected vector!");
3688
3689 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
3690
3691 SDLoc DL(Op);
3692 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
3693
3694 MVT XLenVT = Subtarget.getXLenVT();
3695 unsigned NumElts = Op.getNumOperands();
3696
3697 if (VT.getVectorElementType() == MVT::i1) {
3698 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
3699 SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
3700 return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
3701 }
3702
3703 if (ISD::isBuildVectorAllOnes(Op.getNode())) {
3704 SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
3705 return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
3706 }
3707
3708 // Lower constant mask BUILD_VECTORs via an integer vector type, in
3709 // scalar integer chunks whose bit-width depends on the number of mask
3710 // bits and XLEN.
3711 // First, determine the most appropriate scalar integer type to use. This
3712 // is at most XLenVT, but may be shrunk to a smaller vector element type
3713 // according to the size of the final vector - use i8 chunks rather than
3714 // XLenVT if we're producing a v8i1. This results in more consistent
3715 // codegen across RV32 and RV64.
3716 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
3717 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
3718 // If we have to use more than one INSERT_VECTOR_ELT then this
3719 // optimization is likely to increase code size; avoid peforming it in
3720 // such a case. We can use a load from a constant pool in this case.
3721 if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
3722 return SDValue();
3723 // Now we can create our integer vector type. Note that it may be larger
3724 // than the resulting mask type: v4i1 would use v1i8 as its integer type.
3725 unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
3726 MVT IntegerViaVecVT =
3727 MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
3728 IntegerViaVecElts);
3729
3730 uint64_t Bits = 0;
3731 unsigned BitPos = 0, IntegerEltIdx = 0;
3732 SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
3733
3734 for (unsigned I = 0; I < NumElts;) {
3735 SDValue V = Op.getOperand(I);
3736 bool BitValue = !V.isUndef() && V->getAsZExtVal();
3737 Bits |= ((uint64_t)BitValue << BitPos);
3738 ++BitPos;
3739 ++I;
3740
3741 // Once we accumulate enough bits to fill our scalar type or process the
3742 // last element, insert into our vector and clear our accumulated data.
3743 if (I % NumViaIntegerBits == 0 || I == NumElts) {
3744 if (NumViaIntegerBits <= 32)
3745 Bits = SignExtend64<32>(Bits);
3746 SDValue Elt = DAG.getSignedConstant(Bits, DL, XLenVT);
3747 Elts[IntegerEltIdx] = Elt;
3748 Bits = 0;
3749 BitPos = 0;
3750 IntegerEltIdx++;
3751 }
3752 }
3753
3754 SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
3755
3756 if (NumElts < NumViaIntegerBits) {
3757 // If we're producing a smaller vector than our minimum legal integer
3758 // type, bitcast to the equivalent (known-legal) mask type, and extract
3759 // our final mask.
3760 assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
3761 Vec = DAG.getBitcast(MVT::v8i1, Vec);
3762 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
3763 DAG.getConstant(0, DL, XLenVT));
3764 } else {
3765 // Else we must have produced an integer type with the same size as the
3766 // mask type; bitcast for the final result.
3767 assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
3768 Vec = DAG.getBitcast(VT, Vec);
3769 }
3770
3771 return Vec;
3772 }
3773
3774 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
3775 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
3777 if (!VT.isFloatingPoint())
3778 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
3779 Splat =
3780 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
3781 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
3782 }
3783
3784 // Try and match index sequences, which we can lower to the vid instruction
3785 // with optional modifications. An all-undef vector is matched by
3786 // getSplatValue, above.
3787 if (auto SimpleVID = isSimpleVIDSequence(Op, Op.getScalarValueSizeInBits())) {
3788 int64_t StepNumerator = SimpleVID->StepNumerator;
3789 unsigned StepDenominator = SimpleVID->StepDenominator;
3790 int64_t Addend = SimpleVID->Addend;
3791
3792 assert(StepNumerator != 0 && "Invalid step");
3793 bool Negate = false;
3794 int64_t SplatStepVal = StepNumerator;
3795 unsigned StepOpcode = ISD::MUL;
3796 // Exclude INT64_MIN to avoid passing it to std::abs. We won't optimize it
3797 // anyway as the shift of 63 won't fit in uimm5.
3798 if (StepNumerator != 1 && StepNumerator != INT64_MIN &&
3799 isPowerOf2_64(std::abs(StepNumerator))) {
3800 Negate = StepNumerator < 0;
3801 StepOpcode = ISD::SHL;
3802 SplatStepVal = Log2_64(std::abs(StepNumerator));
3803 }
3804
3805 // Only emit VIDs with suitably-small steps/addends. We use imm5 is a
3806 // threshold since it's the immediate value many RVV instructions accept.
3807 // There is no vmul.vi instruction so ensure multiply constant can fit in
3808 // a single addi instruction.
3809 if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
3810 (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
3811 isPowerOf2_32(StepDenominator) &&
3812 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3813 MVT VIDVT =
3815 MVT VIDContainerVT =
3816 getContainerForFixedLengthVector(DAG, VIDVT, Subtarget);
3817 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VIDContainerVT, Mask, VL);
3818 // Convert right out of the scalable type so we can use standard ISD
3819 // nodes for the rest of the computation. If we used scalable types with
3820 // these, we'd lose the fixed-length vector info and generate worse
3821 // vsetvli code.
3822 VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
3823 if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
3824 (StepOpcode == ISD::SHL && SplatStepVal != 0)) {
3825 SDValue SplatStep = DAG.getSignedConstant(SplatStepVal, DL, VIDVT);
3826 VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
3827 }
3828 if (StepDenominator != 1) {
3829 SDValue SplatStep =
3830 DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
3831 VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
3832 }
3833 if (Addend != 0 || Negate) {
3834 SDValue SplatAddend = DAG.getSignedConstant(Addend, DL, VIDVT);
3835 VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
3836 VID);
3837 }
3838 if (VT.isFloatingPoint()) {
3839 // TODO: Use vfwcvt to reduce register pressure.
3840 VID = DAG.getNode(ISD::SINT_TO_FP, DL, VT, VID);
3841 }
3842 return VID;
3843 }
3844 }
3845
3846 // For very small build_vectors, use a single scalar insert of a constant.
3847 // TODO: Base this on constant rematerialization cost, not size.
3848 const unsigned EltBitSize = VT.getScalarSizeInBits();
3849 if (VT.getSizeInBits() <= 32 &&
3851 MVT ViaIntVT = MVT::getIntegerVT(VT.getSizeInBits());
3852 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3853 "Unexpected sequence type");
3854 // If we can use the original VL with the modified element type, this
3855 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3856 // be moved into InsertVSETVLI?
3857 unsigned ViaVecLen =
3858 (Subtarget.getRealMinVLen() >= VT.getSizeInBits() * NumElts) ? NumElts : 1;
3859 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3860
3861 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3862 uint64_t SplatValue = 0;
3863 // Construct the amalgamated value at this larger vector type.
3864 for (const auto &OpIdx : enumerate(Op->op_values())) {
3865 const auto &SeqV = OpIdx.value();
3866 if (!SeqV.isUndef())
3867 SplatValue |=
3868 ((SeqV->getAsZExtVal() & EltMask) << (OpIdx.index() * EltBitSize));
3869 }
3870
3871 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3872 // achieve better constant materializion.
3873 // On RV32, we need to sign-extend to use getSignedConstant.
3874 if (ViaIntVT == MVT::i32)
3875 SplatValue = SignExtend64<32>(SplatValue);
3876
3877 SDValue Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ViaVecVT,
3878 DAG.getUNDEF(ViaVecVT),
3879 DAG.getSignedConstant(SplatValue, DL, XLenVT),
3880 DAG.getVectorIdxConstant(0, DL));
3881 if (ViaVecLen != 1)
3883 MVT::getVectorVT(ViaIntVT, 1), Vec,
3884 DAG.getConstant(0, DL, XLenVT));
3885 return DAG.getBitcast(VT, Vec);
3886 }
3887
3888
3889 // Attempt to detect "hidden" splats, which only reveal themselves as splats
3890 // when re-interpreted as a vector with a larger element type. For example,
3891 // v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
3892 // could be instead splat as
3893 // v2i32 = build_vector i32 0x00010000, i32 0x00010000
3894 // TODO: This optimization could also work on non-constant splats, but it
3895 // would require bit-manipulation instructions to construct the splat value.
3896 SmallVector<SDValue> Sequence;
3897 const auto *BV = cast<BuildVectorSDNode>(Op);
3898 if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
3900 BV->getRepeatedSequence(Sequence) &&
3901 (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
3902 unsigned SeqLen = Sequence.size();
3903 MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
3904 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3905 ViaIntVT == MVT::i64) &&
3906 "Unexpected sequence type");
3907
3908 // If we can use the original VL with the modified element type, this
3909 // means we only have a VTYPE toggle, not a VL toggle. TODO: Should this
3910 // be moved into InsertVSETVLI?
3911 const unsigned RequiredVL = NumElts / SeqLen;
3912 const unsigned ViaVecLen =
3913 (Subtarget.getRealMinVLen() >= ViaIntVT.getSizeInBits() * NumElts) ?
3914 NumElts : RequiredVL;
3915 MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, ViaVecLen);
3916
3917 unsigned EltIdx = 0;
3918 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3919 uint64_t SplatValue = 0;
3920 // Construct the amalgamated value which can be splatted as this larger
3921 // vector type.
3922 for (const auto &SeqV : Sequence) {
3923 if (!SeqV.isUndef())
3924 SplatValue |=
3925 ((SeqV->getAsZExtVal() & EltMask) << (EltIdx * EltBitSize));
3926 EltIdx++;
3927 }
3928
3929 // On RV64, sign-extend from 32 to 64 bits where possible in order to
3930 // achieve better constant materializion.
3931 // On RV32, we need to sign-extend to use getSignedConstant.
3932 if (ViaIntVT == MVT::i32)
3933 SplatValue = SignExtend64<32>(SplatValue);
3934
3935 // Since we can't introduce illegal i64 types at this stage, we can only
3936 // perform an i64 splat on RV32 if it is its own sign-extended value. That
3937 // way we can use RVV instructions to splat.
3938 assert((ViaIntVT.bitsLE(XLenVT) ||
3939 (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
3940 "Unexpected bitcast sequence");
3941 if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3942 SDValue ViaVL =
3943 DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
3944 MVT ViaContainerVT =
3945 getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
3946 SDValue Splat =
3947 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
3948 DAG.getUNDEF(ViaContainerVT),
3949 DAG.getSignedConstant(SplatValue, DL, XLenVT), ViaVL);
3950 Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
3951 if (ViaVecLen != RequiredVL)
3953 MVT::getVectorVT(ViaIntVT, RequiredVL), Splat,
3954 DAG.getConstant(0, DL, XLenVT));
3955 return DAG.getBitcast(VT, Splat);
3956 }
3957 }
3958
3959 // If the number of signbits allows, see if we can lower as a <N x i8>.
3960 // Our main goal here is to reduce LMUL (and thus work) required to
3961 // build the constant, but we will also narrow if the resulting
3962 // narrow vector is known to materialize cheaply.
3963 // TODO: We really should be costing the smaller vector. There are
3964 // profitable cases this misses.
3965 if (EltBitSize > 8 && VT.isInteger() &&
3966 (NumElts <= 4 || VT.getSizeInBits() > Subtarget.getRealMinVLen()) &&
3967 DAG.ComputeMaxSignificantBits(Op) <= 8) {
3968 SDValue Source = DAG.getBuildVector(VT.changeVectorElementType(MVT::i8),
3969 DL, Op->ops());
3970 Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
3971 Source, DAG, Subtarget);
3972 SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
3973 return convertFromScalableVector(VT, Res, DAG, Subtarget);
3974 }
3975
3976 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
3977 return Res;
3978
3979 // For constant vectors, use generic constant pool lowering. Otherwise,
3980 // we'd have to materialize constants in GPRs just to move them into the
3981 // vector.
3982 return SDValue();
3983}
3984
3985static unsigned getPACKOpcode(unsigned DestBW,
3986 const RISCVSubtarget &Subtarget) {
3987 switch (DestBW) {
3988 default:
3989 llvm_unreachable("Unsupported pack size");
3990 case 16:
3991 return RISCV::PACKH;
3992 case 32:
3993 return Subtarget.is64Bit() ? RISCV::PACKW : RISCV::PACK;
3994 case 64:
3995 assert(Subtarget.is64Bit());
3996 return RISCV::PACK;
3997 }
3998}
3999
4000/// Double the element size of the build vector to reduce the number
4001/// of vslide1down in the build vector chain. In the worst case, this
4002/// trades three scalar operations for 1 vector operation. Scalar
4003/// operations are generally lower latency, and for out-of-order cores
4004/// we also benefit from additional parallelism.
4006 const RISCVSubtarget &Subtarget) {
4007 SDLoc DL(Op);
4008 MVT VT = Op.getSimpleValueType();
4009 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4010 MVT ElemVT = VT.getVectorElementType();
4011 if (!ElemVT.isInteger())
4012 return SDValue();
4013
4014 // TODO: Relax these architectural restrictions, possibly with costing
4015 // of the actual instructions required.
4016 if (!Subtarget.hasStdExtZbb() || !Subtarget.hasStdExtZba())
4017 return SDValue();
4018
4019 unsigned NumElts = VT.getVectorNumElements();
4020 unsigned ElemSizeInBits = ElemVT.getSizeInBits();
4021 if (ElemSizeInBits >= std::min(Subtarget.getELen(), Subtarget.getXLen()) ||
4022 NumElts % 2 != 0)
4023 return SDValue();
4024
4025 // Produce [B,A] packed into a type twice as wide. Note that all
4026 // scalars are XLenVT, possibly masked (see below).
4027 MVT XLenVT = Subtarget.getXLenVT();
4028 SDValue Mask = DAG.getConstant(
4029 APInt::getLowBitsSet(XLenVT.getSizeInBits(), ElemSizeInBits), DL, XLenVT);
4030 auto pack = [&](SDValue A, SDValue B) {
4031 // Bias the scheduling of the inserted operations to near the
4032 // definition of the element - this tends to reduce register
4033 // pressure overall.
4034 SDLoc ElemDL(B);
4035 if (Subtarget.hasStdExtZbkb())
4036 // Note that we're relying on the high bits of the result being
4037 // don't care. For PACKW, the result is *sign* extended.
4038 return SDValue(
4039 DAG.getMachineNode(getPACKOpcode(ElemSizeInBits * 2, Subtarget),
4040 ElemDL, XLenVT, A, B),
4041 0);
4042
4043 A = DAG.getNode(ISD::AND, SDLoc(A), XLenVT, A, Mask);
4044 B = DAG.getNode(ISD::AND, SDLoc(B), XLenVT, B, Mask);
4045 SDValue ShtAmt = DAG.getConstant(ElemSizeInBits, ElemDL, XLenVT);
4046 return DAG.getNode(ISD::OR, ElemDL, XLenVT, A,
4047 DAG.getNode(ISD::SHL, ElemDL, XLenVT, B, ShtAmt),
4049 };
4050
4051 SmallVector<SDValue> NewOperands;
4052 NewOperands.reserve(NumElts / 2);
4053 for (unsigned i = 0; i < VT.getVectorNumElements(); i += 2)
4054 NewOperands.push_back(pack(Op.getOperand(i), Op.getOperand(i + 1)));
4055 assert(NumElts == NewOperands.size() * 2);
4056 MVT WideVT = MVT::getIntegerVT(ElemSizeInBits * 2);
4057 MVT WideVecVT = MVT::getVectorVT(WideVT, NumElts / 2);
4058 return DAG.getNode(ISD::BITCAST, DL, VT,
4059 DAG.getBuildVector(WideVecVT, DL, NewOperands));
4060}
4061
4063 const RISCVSubtarget &Subtarget) {
4064 MVT VT = Op.getSimpleValueType();
4065 assert(VT.isFixedLengthVector() && "Unexpected vector!");
4066
4067 MVT EltVT = VT.getVectorElementType();
4068 MVT XLenVT = Subtarget.getXLenVT();
4069
4070 SDLoc DL(Op);
4071
4072 // Proper support for f16 requires Zvfh. bf16 always requires special
4073 // handling. We need to cast the scalar to integer and create an integer
4074 // build_vector.
4075 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) || EltVT == MVT::bf16) {
4076 MVT IVT = VT.changeVectorElementType(MVT::i16);
4078 for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
4079 SDValue Elem = Op.getOperand(I);
4080 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4081 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin())) {
4082 // Called by LegalizeDAG, we need to use XLenVT operations since we
4083 // can't create illegal types.
4084 if (auto *C = dyn_cast<ConstantFPSDNode>(Elem)) {
4085 // Manually constant fold so the integer build_vector can be lowered
4086 // better. Waiting for DAGCombine will be too late.
4087 APInt V =
4088 C->getValueAPF().bitcastToAPInt().sext(XLenVT.getSizeInBits());
4089 NewOps[I] = DAG.getConstant(V, DL, XLenVT);
4090 } else {
4091 NewOps[I] = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Elem);
4092 }
4093 } else {
4094 // Called by scalar type legalizer, we can use i16.
4095 NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
4096 }
4097 }
4098 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, IVT, NewOps);
4099 return DAG.getBitcast(VT, Res);
4100 }
4101
4102 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
4104 return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
4105
4106 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4107
4108 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4109
4110 if (VT.getVectorElementType() == MVT::i1) {
4111 // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
4112 // vector type, we have a legal equivalently-sized i8 type, so we can use
4113 // that.
4114 MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
4115 SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
4116
4117 SDValue WideVec;
4118 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4119 // For a splat, perform a scalar truncate before creating the wider
4120 // vector.
4121 Splat = DAG.getNode(ISD::AND, DL, Splat.getValueType(), Splat,
4122 DAG.getConstant(1, DL, Splat.getValueType()));
4123 WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
4124 } else {
4125 SmallVector<SDValue, 8> Ops(Op->op_values());
4126 WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
4127 SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
4128 WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
4129 }
4130
4131 return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
4132 }
4133
4134 if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
4135 if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
4136 return Gather;
4137 unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
4139 if (!VT.isFloatingPoint())
4140 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Splat);
4141 Splat =
4142 DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
4143 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
4144 }
4145
4146 if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
4147 return Res;
4148
4149 // If we're compiling for an exact VLEN value, we can split our work per
4150 // register in the register group.
4151 if (const auto VLen = Subtarget.getRealVLen();
4152 VLen && VT.getSizeInBits().getKnownMinValue() > *VLen) {
4153 MVT ElemVT = VT.getVectorElementType();
4154 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
4155 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4156 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
4157 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
4158 assert(M1VT == getLMUL1VT(M1VT));
4159
4160 // The following semantically builds up a fixed length concat_vector
4161 // of the component build_vectors. We eagerly lower to scalable and
4162 // insert_subvector here to avoid DAG combining it back to a large
4163 // build_vector.
4164 SmallVector<SDValue> BuildVectorOps(Op->ops());
4165 unsigned NumOpElts = M1VT.getVectorMinNumElements();
4166 SDValue Vec = DAG.getUNDEF(ContainerVT);
4167 for (unsigned i = 0; i < VT.getVectorNumElements(); i += ElemsPerVReg) {
4168 auto OneVRegOfOps = ArrayRef(BuildVectorOps).slice(i, ElemsPerVReg);
4169 SDValue SubBV =
4170 DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
4171 SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
4172 unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
4173 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubBV,
4174 DAG.getVectorIdxConstant(InsertIdx, DL));
4175 }
4176 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4177 }
4178
4179 // If we're about to resort to vslide1down (or stack usage), pack our
4180 // elements into the widest scalar type we can. This will force a VL/VTYPE
4181 // toggle, but reduces the critical path, the number of vslide1down ops
4182 // required, and possibly enables scalar folds of the values.
4183 if (SDValue Res = lowerBuildVectorViaPacking(Op, DAG, Subtarget))
4184 return Res;
4185
4186 // For m1 vectors, if we have non-undef values in both halves of our vector,
4187 // split the vector into low and high halves, build them separately, then
4188 // use a vselect to combine them. For long vectors, this cuts the critical
4189 // path of the vslide1down sequence in half, and gives us an opportunity
4190 // to special case each half independently. Note that we don't change the
4191 // length of the sub-vectors here, so if both fallback to the generic
4192 // vslide1down path, we should be able to fold the vselect into the final
4193 // vslidedown (for the undef tail) for the first half w/ masking.
4194 unsigned NumElts = VT.getVectorNumElements();
4195 unsigned NumUndefElts =
4196 count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
4197 unsigned NumDefElts = NumElts - NumUndefElts;
4198 if (NumDefElts >= 8 && NumDefElts > NumElts / 2 &&
4199 ContainerVT.bitsLE(getLMUL1VT(ContainerVT))) {
4200 SmallVector<SDValue> SubVecAOps, SubVecBOps;
4201 SmallVector<SDValue> MaskVals;
4202 SDValue UndefElem = DAG.getUNDEF(Op->getOperand(0)->getValueType(0));
4203 SubVecAOps.reserve(NumElts);
4204 SubVecBOps.reserve(NumElts);
4205 for (unsigned i = 0; i < NumElts; i++) {
4206 SDValue Elem = Op->getOperand(i);
4207 if (i < NumElts / 2) {
4208 SubVecAOps.push_back(Elem);
4209 SubVecBOps.push_back(UndefElem);
4210 } else {
4211 SubVecAOps.push_back(UndefElem);
4212 SubVecBOps.push_back(Elem);
4213 }
4214 bool SelectMaskVal = (i < NumElts / 2);
4215 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
4216 }
4217 assert(SubVecAOps.size() == NumElts && SubVecBOps.size() == NumElts &&
4218 MaskVals.size() == NumElts);
4219
4220 SDValue SubVecA = DAG.getBuildVector(VT, DL, SubVecAOps);
4221 SDValue SubVecB = DAG.getBuildVector(VT, DL, SubVecBOps);
4222 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
4223 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
4224 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, SubVecA, SubVecB);
4225 }
4226
4227 // Cap the cost at a value linear to the number of elements in the vector.
4228 // The default lowering is to use the stack. The vector store + scalar loads
4229 // is linear in VL. However, at high lmuls vslide1down and vslidedown end up
4230 // being (at least) linear in LMUL. As a result, using the vslidedown
4231 // lowering for every element ends up being VL*LMUL..
4232 // TODO: Should we be directly costing the stack alternative? Doing so might
4233 // give us a more accurate upper bound.
4234 InstructionCost LinearBudget = VT.getVectorNumElements() * 2;
4235
4236 // TODO: unify with TTI getSlideCost.
4237 InstructionCost PerSlideCost = 1;
4238 switch (RISCVTargetLowering::getLMUL(ContainerVT)) {
4239 default: break;
4241 PerSlideCost = 2;
4242 break;
4244 PerSlideCost = 4;
4245 break;
4247 PerSlideCost = 8;
4248 break;
4249 }
4250
4251 // TODO: Should we be using the build instseq then cost + evaluate scheme
4252 // we use for integer constants here?
4253 unsigned UndefCount = 0;
4254 for (const SDValue &V : Op->ops()) {
4255 if (V.isUndef()) {
4256 UndefCount++;
4257 continue;
4258 }
4259 if (UndefCount) {
4260 LinearBudget -= PerSlideCost;
4261 UndefCount = 0;
4262 }
4263 LinearBudget -= PerSlideCost;
4264 }
4265 if (UndefCount) {
4266 LinearBudget -= PerSlideCost;
4267 }
4268
4269 if (LinearBudget < 0)
4270 return SDValue();
4271
4272 assert((!VT.isFloatingPoint() ||
4273 VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
4274 "Illegal type which will result in reserved encoding");
4275
4276 const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
4277
4278 SDValue Vec;
4279 UndefCount = 0;
4280 for (SDValue V : Op->ops()) {
4281 if (V.isUndef()) {
4282 UndefCount++;
4283 continue;
4284 }
4285
4286 // Start our sequence with a TA splat in the hopes that hardware is able to
4287 // recognize there's no dependency on the prior value of our temporary
4288 // register.
4289 if (!Vec) {
4290 Vec = DAG.getSplatVector(VT, DL, V);
4291 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
4292 UndefCount = 0;
4293 continue;
4294 }
4295
4296 if (UndefCount) {
4297 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4298 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4299 Vec, Offset, Mask, VL, Policy);
4300 UndefCount = 0;
4301 }
4302 auto OpCode =
4304 if (!VT.isFloatingPoint())
4305 V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
4306 Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
4307 V, Mask, VL);
4308 }
4309 if (UndefCount) {
4310 const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
4311 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4312 Vec, Offset, Mask, VL, Policy);
4313 }
4314 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4315}
4316
4317static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4319 SelectionDAG &DAG) {
4320 if (!Passthru)
4321 Passthru = DAG.getUNDEF(VT);
4322 if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
4323 int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
4324 int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
4325 // If Hi constant is all the same sign bit as Lo, lower this as a custom
4326 // node in order to try and match RVV vector/scalar instructions.
4327 if ((LoC >> 31) == HiC)
4328 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4329
4330 // If vl is equal to VLMAX or fits in 4 bits and Hi constant is equal to Lo,
4331 // we could use vmv.v.x whose EEW = 32 to lower it. This allows us to use
4332 // vlmax vsetvli or vsetivli to change the VL.
4333 // FIXME: Support larger constants?
4334 // FIXME: Support non-constant VLs by saturating?
4335 if (LoC == HiC) {
4336 SDValue NewVL;
4337 if (isAllOnesConstant(VL) ||
4338 (isa<RegisterSDNode>(VL) &&
4339 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0))
4340 NewVL = DAG.getRegister(RISCV::X0, MVT::i32);
4341 else if (isa<ConstantSDNode>(VL) && isUInt<4>(VL->getAsZExtVal()))
4342 NewVL = DAG.getNode(ISD::ADD, DL, VL.getValueType(), VL, VL);
4343
4344 if (NewVL) {
4345 MVT InterVT =
4346 MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
4347 auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT,
4348 DAG.getUNDEF(InterVT), Lo, NewVL);
4349 return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
4350 }
4351 }
4352 }
4353
4354 // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
4355 if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
4356 isa<ConstantSDNode>(Hi.getOperand(1)) &&
4357 Hi.getConstantOperandVal(1) == 31)
4358 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4359
4360 // If the hi bits of the splat are undefined, then it's fine to just splat Lo
4361 // even if it might be sign extended.
4362 if (Hi.isUndef())
4363 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
4364
4365 // Fall back to a stack store and stride x0 vector load.
4366 return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
4367 Hi, VL);
4368}
4369
4370// Called by type legalization to handle splat of i64 on RV32.
4371// FIXME: We can optimize this when the type has sign or zero bits in one
4372// of the halves.
4373static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
4374 SDValue Scalar, SDValue VL,
4375 SelectionDAG &DAG) {
4376 assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
4377 SDValue Lo, Hi;
4378 std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
4379 return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
4380}
4381
4382// This function lowers a splat of a scalar operand Splat with the vector
4383// length VL. It ensures the final sequence is type legal, which is useful when
4384// lowering a splat after type legalization.
4385static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
4386 MVT VT, const SDLoc &DL, SelectionDAG &DAG,
4387 const RISCVSubtarget &Subtarget) {
4388 bool HasPassthru = Passthru && !Passthru.isUndef();
4389 if (!HasPassthru && !Passthru)
4390 Passthru = DAG.getUNDEF(VT);
4391
4392 MVT EltVT = VT.getVectorElementType();
4393 MVT XLenVT = Subtarget.getXLenVT();
4394
4395 if (VT.isFloatingPoint()) {
4396 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
4397 EltVT == MVT::bf16) {
4398 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
4399 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
4400 Scalar = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Scalar);
4401 else
4402 Scalar = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Scalar);
4403 MVT IVT = VT.changeVectorElementType(MVT::i16);
4404 Passthru = DAG.getNode(ISD::BITCAST, DL, IVT, Passthru);
4405 SDValue Splat =
4406 lowerScalarSplat(Passthru, Scalar, VL, IVT, DL, DAG, Subtarget);
4407 return DAG.getNode(ISD::BITCAST, DL, VT, Splat);
4408 }
4409 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
4410 }
4411
4412 // Simplest case is that the operand needs to be promoted to XLenVT.
4413 if (Scalar.getValueType().bitsLE(XLenVT)) {
4414 // If the operand is a constant, sign extend to increase our chances
4415 // of being able to use a .vi instruction. ANY_EXTEND would become a
4416 // a zero extend and the simm5 check in isel would fail.
4417 // FIXME: Should we ignore the upper bits in isel instead?
4418 unsigned ExtOpc =
4419 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4420 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4421 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
4422 }
4423
4424 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
4425 "Unexpected scalar for splat lowering!");
4426
4427 if (isOneConstant(VL) && isNullConstant(Scalar))
4428 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
4429 DAG.getConstant(0, DL, XLenVT), VL);
4430
4431 // Otherwise use the more complicated splatting algorithm.
4432 return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
4433}
4434
4435// This function lowers an insert of a scalar operand Scalar into lane
4436// 0 of the vector regardless of the value of VL. The contents of the
4437// remaining lanes of the result vector are unspecified. VL is assumed
4438// to be non-zero.
4440 const SDLoc &DL, SelectionDAG &DAG,
4441 const RISCVSubtarget &Subtarget) {
4442 assert(VT.isScalableVector() && "Expect VT is scalable vector type.");
4443
4444 const MVT XLenVT = Subtarget.getXLenVT();
4445 SDValue Passthru = DAG.getUNDEF(VT);
4446
4447 if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4448 isNullConstant(Scalar.getOperand(1))) {
4449 SDValue ExtractedVal = Scalar.getOperand(0);
4450 // The element types must be the same.
4451 if (ExtractedVal.getValueType().getVectorElementType() ==
4452 VT.getVectorElementType()) {
4453 MVT ExtractedVT = ExtractedVal.getSimpleValueType();
4454 MVT ExtractedContainerVT = ExtractedVT;
4455 if (ExtractedContainerVT.isFixedLengthVector()) {
4456 ExtractedContainerVT = getContainerForFixedLengthVector(
4457 DAG, ExtractedContainerVT, Subtarget);
4458 ExtractedVal = convertToScalableVector(ExtractedContainerVT,
4459 ExtractedVal, DAG, Subtarget);
4460 }
4461 if (ExtractedContainerVT.bitsLE(VT))
4462 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
4463 ExtractedVal, DAG.getVectorIdxConstant(0, DL));
4464 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
4465 DAG.getVectorIdxConstant(0, DL));
4466 }
4467 }
4468
4469
4470 if (VT.isFloatingPoint())
4471 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT,
4472 DAG.getUNDEF(VT), Scalar, VL);
4473
4474 // Avoid the tricky legalization cases by falling back to using the
4475 // splat code which already handles it gracefully.
4476 if (!Scalar.getValueType().bitsLE(XLenVT))
4477 return lowerScalarSplat(DAG.getUNDEF(VT), Scalar,
4478 DAG.getConstant(1, DL, XLenVT),
4479 VT, DL, DAG, Subtarget);
4480
4481 // If the operand is a constant, sign extend to increase our chances
4482 // of being able to use a .vi instruction. ANY_EXTEND would become a
4483 // a zero extend and the simm5 check in isel would fail.
4484 // FIXME: Should we ignore the upper bits in isel instead?
4485 unsigned ExtOpc =
4486 isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
4487 Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
4488 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
4489 VL);
4490}
4491
4492// Can this shuffle be performed on exactly one (possibly larger) input?
4493static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1,
4494 SDValue V2) {
4495
4496 if (V2.isUndef() &&
4498 return V1;
4499
4500 // Both input must be extracts.
4501 if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
4502 V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4503 return SDValue();
4504
4505 // Extracting from the same source.
4506 SDValue Src = V1.getOperand(0);
4507 if (Src != V2.getOperand(0))
4508 return SDValue();
4509
4510 // Src needs to have twice the number of elements.
4511 unsigned NumElts = VT.getVectorNumElements();
4512 if (Src.getValueType().getVectorNumElements() != (NumElts * 2))
4513 return SDValue();
4514
4515 // The extracts must extract the two halves of the source.
4516 if (V1.getConstantOperandVal(1) != 0 ||
4517 V2.getConstantOperandVal(1) != NumElts)
4518 return SDValue();
4519
4520 return Src;
4521}
4522
4523/// Is this shuffle interleaving contiguous elements from one vector into the
4524/// even elements and contiguous elements from another vector into the odd
4525/// elements. \p EvenSrc will contain the element that should be in the first
4526/// even element. \p OddSrc will contain the element that should be in the first
4527/// odd element. These can be the first element in a source or the element half
4528/// way through the source.
4529static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
4530 int &OddSrc, const RISCVSubtarget &Subtarget) {
4531 // We need to be able to widen elements to the next larger integer type.
4532 if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4533 return false;
4534
4535 int Size = Mask.size();
4536 int NumElts = VT.getVectorNumElements();
4537 assert(Size == (int)NumElts && "Unexpected mask size");
4538
4539 SmallVector<unsigned, 2> StartIndexes;
4540 if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
4541 return false;
4542
4543 EvenSrc = StartIndexes[0];
4544 OddSrc = StartIndexes[1];
4545
4546 // One source should be low half of first vector.
4547 if (EvenSrc != 0 && OddSrc != 0)
4548 return false;
4549
4550 // Subvectors will be subtracted from either at the start of the two input
4551 // vectors, or at the start and middle of the first vector if it's an unary
4552 // interleave.
4553 // In both cases, HalfNumElts will be extracted.
4554 // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
4555 // we'll create an illegal extract_subvector.
4556 // FIXME: We could support other values using a slidedown first.
4557 int HalfNumElts = NumElts / 2;
4558 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
4559}
4560
4561/// Match shuffles that concatenate two vectors, rotate the concatenation,
4562/// and then extract the original number of elements from the rotated result.
4563/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
4564/// returned rotation amount is for a rotate right, where elements move from
4565/// higher elements to lower elements. \p LoSrc indicates the first source
4566/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
4567/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
4568/// 0 or 1 if a rotation is found.
4569///
4570/// NOTE: We talk about rotate to the right which matches how bit shift and
4571/// rotate instructions are described where LSBs are on the right, but LLVM IR
4572/// and the table below write vectors with the lowest elements on the left.
4573static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
4574 int Size = Mask.size();
4575
4576 // We need to detect various ways of spelling a rotation:
4577 // [11, 12, 13, 14, 15, 0, 1, 2]
4578 // [-1, 12, 13, 14, -1, -1, 1, -1]
4579 // [-1, -1, -1, -1, -1, -1, 1, 2]
4580 // [ 3, 4, 5, 6, 7, 8, 9, 10]
4581 // [-1, 4, 5, 6, -1, -1, 9, -1]
4582 // [-1, 4, 5, 6, -1, -1, -1, -1]
4583 int Rotation = 0;
4584 LoSrc = -1;
4585 HiSrc = -1;
4586 for (int i = 0; i != Size; ++i) {
4587 int M = Mask[i];
4588 if (M < 0)
4589 continue;
4590
4591 // Determine where a rotate vector would have started.
4592 int StartIdx = i - (M % Size);
4593 // The identity rotation isn't interesting, stop.
4594 if (StartIdx == 0)
4595 return -1;
4596
4597 // If we found the tail of a vector the rotation must be the missing
4598 // front. If we found the head of a vector, it must be how much of the
4599 // head.
4600 int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
4601
4602 if (Rotation == 0)
4603 Rotation = CandidateRotation;
4604 else if (Rotation != CandidateRotation)
4605 // The rotations don't match, so we can't match this mask.
4606 return -1;
4607
4608 // Compute which value this mask is pointing at.
4609 int MaskSrc = M < Size ? 0 : 1;
4610
4611 // Compute which of the two target values this index should be assigned to.
4612 // This reflects whether the high elements are remaining or the low elemnts
4613 // are remaining.
4614 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
4615
4616 // Either set up this value if we've not encountered it before, or check
4617 // that it remains consistent.
4618 if (TargetSrc < 0)
4619 TargetSrc = MaskSrc;
4620 else if (TargetSrc != MaskSrc)
4621 // This may be a rotation, but it pulls from the inputs in some
4622 // unsupported interleaving.
4623 return -1;
4624 }
4625
4626 // Check that we successfully analyzed the mask, and normalize the results.
4627 assert(Rotation != 0 && "Failed to locate a viable rotation!");
4628 assert((LoSrc >= 0 || HiSrc >= 0) &&
4629 "Failed to find a rotated input vector!");
4630
4631 return Rotation;
4632}
4633
4634// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
4635// 2, 4, 8 and the integer type Factor-times larger than VT's
4636// element type must be a legal element type.
4637// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0)
4638// -> [p, q, r, s] (Factor=2, Index=1)
4640 SDValue Src, unsigned Factor,
4641 unsigned Index, SelectionDAG &DAG) {
4642 unsigned EltBits = VT.getScalarSizeInBits();
4643 ElementCount SrcEC = Src.getValueType().getVectorElementCount();
4644 MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor),
4645 SrcEC.divideCoefficientBy(Factor));
4646 MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits),
4647 SrcEC.divideCoefficientBy(Factor));
4648 Src = DAG.getBitcast(WideSrcVT, Src);
4649
4650 unsigned Shift = Index * EltBits;
4651 SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src,
4652 DAG.getConstant(Shift, DL, WideSrcVT));
4653 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res);
4655 Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res,
4656 DAG.getVectorIdxConstant(0, DL));
4657 return DAG.getBitcast(VT, Res);
4658}
4659
4660// Lower the following shuffle to vslidedown.
4661// a)
4662// t49: v8i8 = extract_subvector t13, Constant:i64<0>
4663// t109: v8i8 = extract_subvector t13, Constant:i64<8>
4664// t108: v8i8 = vector_shuffle<1,2,3,4,5,6,7,8> t49, t106
4665// b)
4666// t69: v16i16 = extract_subvector t68, Constant:i64<0>
4667// t23: v8i16 = extract_subvector t69, Constant:i64<0>
4668// t29: v4i16 = extract_subvector t23, Constant:i64<4>
4669// t26: v8i16 = extract_subvector t69, Constant:i64<8>
4670// t30: v4i16 = extract_subvector t26, Constant:i64<0>
4671// t54: v4i16 = vector_shuffle<1,2,3,4> t29, t30
4673 SDValue V1, SDValue V2,
4674 ArrayRef<int> Mask,
4675 const RISCVSubtarget &Subtarget,
4676 SelectionDAG &DAG) {
4677 auto findNonEXTRACT_SUBVECTORParent =
4678 [](SDValue Parent) -> std::pair<SDValue, uint64_t> {
4679 uint64_t Offset = 0;
4680 while (Parent.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
4681 // EXTRACT_SUBVECTOR can be used to extract a fixed-width vector from
4682 // a scalable vector. But we don't want to match the case.
4683 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4684 Offset += Parent.getConstantOperandVal(1);
4685 Parent = Parent.getOperand(0);
4686 }
4687 return std::make_pair(Parent, Offset);
4688 };
4689
4690 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4691 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4692
4693 // Extracting from the same source.
4694 SDValue Src = V1Src;
4695 if (Src != V2Src)
4696 return SDValue();
4697
4698 // Rebuild mask because Src may be from multiple EXTRACT_SUBVECTORs.
4699 SmallVector<int, 16> NewMask(Mask);
4700 for (size_t i = 0; i != NewMask.size(); ++i) {
4701 if (NewMask[i] == -1)
4702 continue;
4703
4704 if (static_cast<size_t>(NewMask[i]) < NewMask.size()) {
4705 NewMask[i] = NewMask[i] + V1IndexOffset;
4706 } else {
4707 // Minus NewMask.size() is needed. Otherwise, the b case would be
4708 // <5,6,7,12> instead of <5,6,7,8>.
4709 NewMask[i] = NewMask[i] - NewMask.size() + V2IndexOffset;
4710 }
4711 }
4712
4713 // First index must be known and non-zero. It will be used as the slidedown
4714 // amount.
4715 if (NewMask[0] <= 0)
4716 return SDValue();
4717
4718 // NewMask is also continuous.
4719 for (unsigned i = 1; i != NewMask.size(); ++i)
4720 if (NewMask[i - 1] + 1 != NewMask[i])
4721 return SDValue();
4722
4723 MVT XLenVT = Subtarget.getXLenVT();
4724 MVT SrcVT = Src.getSimpleValueType();
4725 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
4726 auto [TrueMask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
4727 SDValue Slidedown =
4728 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
4729 convertToScalableVector(ContainerVT, Src, DAG, Subtarget),
4730 DAG.getConstant(NewMask[0], DL, XLenVT), TrueMask, VL);
4731 return DAG.getNode(
4733 convertFromScalableVector(SrcVT, Slidedown, DAG, Subtarget),
4734 DAG.getConstant(0, DL, XLenVT));
4735}
4736
4737// Because vslideup leaves the destination elements at the start intact, we can
4738// use it to perform shuffles that insert subvectors:
4739//
4740// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
4741// ->
4742// vsetvli zero, 8, e8, mf2, ta, ma
4743// vslideup.vi v8, v9, 4
4744//
4745// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
4746// ->
4747// vsetvli zero, 5, e8, mf2, tu, ma
4748// vslideup.v1 v8, v9, 2
4750 SDValue V1, SDValue V2,
4751 ArrayRef<int> Mask,
4752 const RISCVSubtarget &Subtarget,
4753 SelectionDAG &DAG) {
4754 unsigned NumElts = VT.getVectorNumElements();
4755 int NumSubElts, Index;
4756 if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
4757 Index))
4758 return SDValue();
4759
4760 bool OpsSwapped = Mask[Index] < (int)NumElts;
4761 SDValue InPlace = OpsSwapped ? V2 : V1;
4762 SDValue ToInsert = OpsSwapped ? V1 : V2;
4763
4764 MVT XLenVT = Subtarget.getXLenVT();
4765 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4766 auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
4767 // We slide up by the index that the subvector is being inserted at, and set
4768 // VL to the index + the number of elements being inserted.
4770 // If the we're adding a suffix to the in place vector, i.e. inserting right
4771 // up to the very end of it, then we don't actually care about the tail.
4772 if (NumSubElts + Index >= (int)NumElts)
4773 Policy |= RISCVII::TAIL_AGNOSTIC;
4774
4775 InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
4776 ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
4777 SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
4778
4779 SDValue Res;
4780 // If we're inserting into the lowest elements, use a tail undisturbed
4781 // vmv.v.v.
4782 if (Index == 0)
4783 Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
4784 VL);
4785 else
4786 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
4787 DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
4788 return convertFromScalableVector(VT, Res, DAG, Subtarget);
4789}
4790
4791/// Match v(f)slide1up/down idioms. These operations involve sliding
4792/// N-1 elements to make room for an inserted scalar at one end.
4794 SDValue V1, SDValue V2,
4795 ArrayRef<int> Mask,
4796 const RISCVSubtarget &Subtarget,
4797 SelectionDAG &DAG) {
4798 bool OpsSwapped = false;
4799 if (!isa<BuildVectorSDNode>(V1)) {
4800 if (!isa<BuildVectorSDNode>(V2))
4801 return SDValue();
4802 std::swap(V1, V2);
4803 OpsSwapped = true;
4804 }
4805 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4806 if (!Splat)
4807 return SDValue();
4808
4809 // Return true if the mask could describe a slide of Mask.size() - 1
4810 // elements from concat_vector(V1, V2)[Base:] to [Offset:].
4811 auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
4812 const unsigned S = (Offset > 0) ? 0 : -Offset;
4813 const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
4814 for (unsigned i = S; i != E; ++i)
4815 if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
4816 return false;
4817 return true;
4818 };
4819
4820 const unsigned NumElts = VT.getVectorNumElements();
4821 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4822 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4823 return SDValue();
4824
4825 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4826 // Inserted lane must come from splat, undef scalar is legal but not profitable.
4827 if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
4828 return SDValue();
4829
4830 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
4831 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
4832
4833 // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
4834 // vslide1{down,up}.vx instead.
4835 if (VT.getVectorElementType() == MVT::bf16 ||
4836 (VT.getVectorElementType() == MVT::f16 &&
4837 !Subtarget.hasVInstructionsF16())) {
4838 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
4839 Splat =
4840 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
4841 V2 = DAG.getBitcast(
4842 IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
4843 SDValue Vec = DAG.getNode(
4845 IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
4846 Vec = DAG.getBitcast(ContainerVT, Vec);
4847 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4848 }
4849
4850 auto OpCode = IsVSlidedown ?
4853 if (!VT.isFloatingPoint())
4854 Splat = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Splat);
4855 auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
4856 DAG.getUNDEF(ContainerVT),
4857 convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
4858 Splat, TrueMask, VL);
4859 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4860}
4861
4862// Match a mask which "spreads" the leading elements of a vector evenly
4863// across the result. Factor is the spread amount, and Index is the
4864// offset applied. (on success, Index < Factor) This is the inverse
4865// of a deinterleave with the same Factor and Index. This is analogous
4866// to an interleave, except that all but one lane is undef.
4867static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
4868 SmallVector<bool> LaneIsUndef(Factor, true);
4869 for (unsigned i = 0; i < Mask.size(); i++)
4870 LaneIsUndef[i % Factor] &= (Mask[i] == -1);
4871
4872 bool Found = false;
4873 for (unsigned i = 0; i < Factor; i++) {
4874 if (LaneIsUndef[i])
4875 continue;
4876 if (Found)
4877 return false;
4878 Index = i;
4879 Found = true;
4880 }
4881 if (!Found)
4882 return false;
4883
4884 for (unsigned i = 0; i < Mask.size() / Factor; i++) {
4885 unsigned j = i * Factor + Index;
4886 if (Mask[j] != -1 && (unsigned)Mask[j] != i)
4887 return false;
4888 }
4889 return true;
4890}
4891
4892// Given a vector a, b, c, d return a vector Factor times longer
4893// with Factor-1 undef's between elements. Ex:
4894// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4895// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4896static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4897 const SDLoc &DL, SelectionDAG &DAG) {
4898
4899 MVT VT = V.getSimpleValueType();
4900 unsigned EltBits = VT.getScalarSizeInBits();
4902 V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4903
4904 MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4905
4906 SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4907 // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4908 // allow the SHL to fold away if Index is 0.
4909 if (Index != 0)
4910 Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4911 DAG.getConstant(EltBits * Index, DL, WideVT));
4912 // Make sure to use original element type
4914 EC.multiplyCoefficientBy(Factor));
4915 return DAG.getBitcast(ResultVT, Result);
4916}
4917
4918// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4919// to create an interleaved vector of <[vscale x] n*2 x ty>.
4920// This requires that the size of ty is less than the subtarget's maximum ELEN.
4922 const SDLoc &DL, SelectionDAG &DAG,
4923 const RISCVSubtarget &Subtarget) {
4924
4925 // FIXME: Not only does this optimize the code, it fixes some correctness
4926 // issues because MIR does not have freeze.
4927 if (EvenV.isUndef())
4928 return getWideningSpread(OddV, 2, 1, DL, DAG);
4929 if (OddV.isUndef())
4930 return getWideningSpread(EvenV, 2, 0, DL, DAG);
4931
4932 MVT VecVT = EvenV.getSimpleValueType();
4933 MVT VecContainerVT = VecVT; // <vscale x n x ty>
4934 // Convert fixed vectors to scalable if needed
4935 if (VecContainerVT.isFixedLengthVector()) {
4936 VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
4937 EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
4938 OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
4939 }
4940
4941 assert(VecVT.getScalarSizeInBits() < Subtarget.getELen());
4942
4943 // We're working with a vector of the same size as the resulting
4944 // interleaved vector, but with half the number of elements and
4945 // twice the SEW (Hence the restriction on not using the maximum
4946 // ELEN)
4947 MVT WideVT =
4949 VecVT.getVectorElementCount());
4950 MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
4951 if (WideContainerVT.isFixedLengthVector())
4952 WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
4953
4954 // Bitcast the input vectors to integers in case they are FP
4955 VecContainerVT = VecContainerVT.changeTypeToInteger();
4956 EvenV = DAG.getBitcast(VecContainerVT, EvenV);
4957 OddV = DAG.getBitcast(VecContainerVT, OddV);
4958
4959 auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
4960 SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4961
4962 SDValue Interleaved;
4963 if (Subtarget.hasStdExtZvbb()) {
4964 // Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4965 SDValue OffsetVec =
4966 DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4967 Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4968 OffsetVec, Passthru, Mask, VL);
4969 Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4970 Interleaved, EvenV, Passthru, Mask, VL);
4971 } else {
4972 // FIXME: We should freeze the odd vector here. We already handled the case
4973 // of provably undef/poison above.
4974
4975 // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
4976 // vwaddu.vv
4977 Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT, EvenV,
4978 OddV, Passthru, Mask, VL);
4979
4980 // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
4981 SDValue AllOnesVec = DAG.getSplatVector(
4982 VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
4983 SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT,
4984 OddV, AllOnesVec, Passthru, Mask, VL);
4985
4986 // Add the two together so we get
4987 // (OddV * 0xff...ff) + (OddV + EvenV)
4988 // = (OddV * 0x100...00) + EvenV
4989 // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
4990 // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
4991 Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT,
4992 Interleaved, OddsMul, Passthru, Mask, VL);
4993 }
4994
4995 // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
4996 MVT ResultContainerVT = MVT::getVectorVT(
4997 VecVT.getVectorElementType(), // Make sure to use original type
4998 VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
4999 Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
5000
5001 // Convert back to a fixed vector if needed
5002 MVT ResultVT =
5005 if (ResultVT.isFixedLengthVector())
5006 Interleaved =
5007 convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
5008
5009 return Interleaved;
5010}
5011
5012// If we have a vector of bits that we want to reverse, we can use a vbrev on a
5013// larger element type, e.g. v32i1 can be reversed with a v1i32 bitreverse.
5015 SelectionDAG &DAG,
5016 const RISCVSubtarget &Subtarget) {
5017 SDLoc DL(SVN);
5018 MVT VT = SVN->getSimpleValueType(0);
5019 SDValue V = SVN->getOperand(0);
5020 unsigned NumElts = VT.getVectorNumElements();
5021
5022 assert(VT.getVectorElementType() == MVT::i1);
5023
5025 SVN->getMask().size()) ||
5026 !SVN->getOperand(1).isUndef())
5027 return SDValue();
5028
5029 unsigned ViaEltSize = std::max((uint64_t)8, PowerOf2Ceil(NumElts));
5030 EVT ViaVT = EVT::getVectorVT(
5031 *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), ViaEltSize), 1);
5032 EVT ViaBitVT =
5033 EVT::getVectorVT(*DAG.getContext(), MVT::i1, ViaVT.getScalarSizeInBits());
5034
5035 // If we don't have zvbb or the larger element type > ELEN, the operation will
5036 // be illegal.
5038 ViaVT) ||
5039 !Subtarget.getTargetLowering()->isTypeLegal(ViaBitVT))
5040 return SDValue();
5041
5042 // If the bit vector doesn't fit exactly into the larger element type, we need
5043 // to insert it into the larger vector and then shift up the reversed bits
5044 // afterwards to get rid of the gap introduced.
5045 if (ViaEltSize > NumElts)
5046 V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ViaBitVT, DAG.getUNDEF(ViaBitVT),
5047 V, DAG.getVectorIdxConstant(0, DL));
5048
5049 SDValue Res =
5050 DAG.getNode(ISD::BITREVERSE, DL, ViaVT, DAG.getBitcast(ViaVT, V));
5051
5052 // Shift up the reversed bits if the vector didn't exactly fit into the larger
5053 // element type.
5054 if (ViaEltSize > NumElts)
5055 Res = DAG.getNode(ISD::SRL, DL, ViaVT, Res,
5056 DAG.getConstant(ViaEltSize - NumElts, DL, ViaVT));
5057
5058 Res = DAG.getBitcast(ViaBitVT, Res);
5059
5060 if (ViaEltSize > NumElts)
5061 Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
5062 DAG.getVectorIdxConstant(0, DL));
5063 return Res;
5064}
5065
5067 SelectionDAG &DAG,
5068 const RISCVSubtarget &Subtarget,
5069 MVT &RotateVT, unsigned &RotateAmt) {
5070 SDLoc DL(SVN);
5071
5072 EVT VT = SVN->getValueType(0);
5073 unsigned NumElts = VT.getVectorNumElements();
5074 unsigned EltSizeInBits = VT.getScalarSizeInBits();
5075 unsigned NumSubElts;
5076 if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
5077 NumElts, NumSubElts, RotateAmt))
5078 return false;
5079 RotateVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits * NumSubElts),
5080 NumElts / NumSubElts);
5081
5082 // We might have a RotateVT that isn't legal, e.g. v4i64 on zve32x.
5083 return Subtarget.getTargetLowering()->isTypeLegal(RotateVT);
5084}
5085
5086// Given a shuffle mask like <3, 0, 1, 2, 7, 4, 5, 6> for v8i8, we can
5087// reinterpret it as a v2i32 and rotate it right by 8 instead. We can lower this
5088// as a vror.vi if we have Zvkb, or otherwise as a vsll, vsrl and vor.
5090 SelectionDAG &DAG,
5091 const RISCVSubtarget &Subtarget) {
5092 SDLoc DL(SVN);
5093
5094 EVT VT = SVN->getValueType(0);
5095 unsigned RotateAmt;
5096 MVT RotateVT;
5097 if (!isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5098 return SDValue();
5099
5100 SDValue Op = DAG.getBitcast(RotateVT, SVN->getOperand(0));
5101
5102 SDValue Rotate;
5103 // A rotate of an i16 by 8 bits either direction is equivalent to a byteswap,
5104 // so canonicalize to vrev8.
5105 if (RotateVT.getScalarType() == MVT::i16 && RotateAmt == 8)
5106 Rotate = DAG.getNode(ISD::BSWAP, DL, RotateVT, Op);
5107 else
5108 Rotate = DAG.getNode(ISD::ROTL, DL, RotateVT, Op,
5109 DAG.getConstant(RotateAmt, DL, RotateVT));
5110
5111 return DAG.getBitcast(VT, Rotate);
5112}
5113
5114// If compiling with an exactly known VLEN, see if we can split a
5115// shuffle on m2 or larger into a small number of m1 sized shuffles
5116// which write each destination registers exactly once.
5118 SelectionDAG &DAG,
5119 const RISCVSubtarget &Subtarget) {
5120 SDLoc DL(SVN);
5121 MVT VT = SVN->getSimpleValueType(0);
5122 SDValue V1 = SVN->getOperand(0);
5123 SDValue V2 = SVN->getOperand(1);
5124 ArrayRef<int> Mask = SVN->getMask();
5125
5126 // If we don't know exact data layout, not much we can do. If this
5127 // is already m1 or smaller, no point in splitting further.
5128 const auto VLen = Subtarget.getRealVLen();
5129 if (!VLen || VT.getSizeInBits().getFixedValue() <= *VLen)
5130 return SDValue();
5131
5132 // Avoid picking up bitrotate patterns which we have a linear-in-lmul
5133 // expansion for.
5134 unsigned RotateAmt;
5135 MVT RotateVT;
5136 if (isLegalBitRotate(SVN, DAG, Subtarget, RotateVT, RotateAmt))
5137 return SDValue();
5138
5139 MVT ElemVT = VT.getVectorElementType();
5140 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
5141
5142 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5143 MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
5144 MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
5145 assert(M1VT == getLMUL1VT(M1VT));
5146 unsigned NumOpElts = M1VT.getVectorMinNumElements();
5147 unsigned NumElts = ContainerVT.getVectorMinNumElements();
5148 unsigned NumOfSrcRegs = NumElts / NumOpElts;
5149 unsigned NumOfDestRegs = NumElts / NumOpElts;
5150 // The following semantically builds up a fixed length concat_vector
5151 // of the component shuffle_vectors. We eagerly lower to scalable here
5152 // to avoid DAG combining it back to a large shuffle_vector again.
5153 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5154 V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
5156 Operands;
5158 Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
5159 [&]() { Operands.emplace_back(); },
5160 [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
5161 Operands.emplace_back().emplace_back(
5162 SrcVecIdx, UINT_MAX,
5163 SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5164 },
5165 [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
5166 if (NewReg)
5167 Operands.emplace_back();
5168 Operands.back().emplace_back(
5169 Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
5170 });
5171 assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
5172 // Note: check that we do not emit too many shuffles here to prevent code
5173 // size explosion.
5174 // TODO: investigate, if it can be improved by extra analysis of the masks to
5175 // check if the code is more profitable.
5176 unsigned NumShuffles = std::accumulate(
5177 Operands.begin(), Operands.end(), 0u,
5178 [&](unsigned N,
5179 ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
5180 if (Data.empty())
5181 return N;
5182 N += Data.size();
5183 for (const auto &P : Data) {
5184 unsigned Idx2 = std::get<1>(P);
5185 ArrayRef<int> Mask = std::get<2>(P);
5186 if (Idx2 != UINT_MAX)
5187 ++N;
5188 else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
5189 --N;
5190 }
5191 return N;
5192 });
5193 if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
5194 (NumOfDestRegs <= 2 && NumShuffles >= 4))
5195 return SDValue();
5196 auto ExtractValue = [&, &DAG = DAG](SDValue SrcVec, unsigned ExtractIdx) {
5197 SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
5198 DAG.getVectorIdxConstant(ExtractIdx, DL));
5199 SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
5200 return SubVec;
5201 };
5202 auto PerformShuffle = [&, &DAG = DAG](SDValue SubVec1, SDValue SubVec2,
5204 SDValue SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec1, SubVec2, Mask);
5205 return SubVec;
5206 };
5207 SDValue Vec = DAG.getUNDEF(ContainerVT);
5208 for (auto [I, Data] : enumerate(Operands)) {
5209 if (Data.empty())
5210 continue;
5212 for (unsigned I : seq<unsigned>(Data.size())) {
5213 const auto &[Idx1, Idx2, _] = Data[I];
5214 if (Values.contains(Idx1)) {
5215 assert(Idx2 != UINT_MAX && Values.contains(Idx2) &&
5216 "Expected both indices to be extracted already.");
5217 break;
5218 }
5219 SDValue V = ExtractValue(Idx1 >= NumOfSrcRegs ? V2 : V1,
5220 (Idx1 % NumOfSrcRegs) * NumOpElts);
5221 Values[Idx1] = V;
5222 if (Idx2 != UINT_MAX)
5223 Values[Idx2] = ExtractValue(Idx2 >= NumOfSrcRegs ? V2 : V1,
5224 (Idx2 % NumOfSrcRegs) * NumOpElts);
5225 }
5226 SDValue V;
5227 for (const auto &[Idx1, Idx2, Mask] : Data) {
5228 SDValue V1 = Values.at(Idx1);
5229 SDValue V2 = Idx2 == UINT_MAX ? V1 : Values.at(Idx2);
5230 V = PerformShuffle(V1, V2, Mask);
5231 Values[Idx1] = V;
5232 }
5233
5234 unsigned InsertIdx = I * NumOpElts;
5235 V = convertToScalableVector(M1VT, V, DAG, Subtarget);
5236 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, V,
5237 DAG.getVectorIdxConstant(InsertIdx, DL));
5238 }
5239 return convertFromScalableVector(VT, Vec, DAG, Subtarget);
5240}
5241
5242// Matches a subset of compress masks with a contiguous prefix of output
5243// elements. This could be extended to allow gaps by deciding which
5244// source elements to spuriously demand.
5246 int Last = -1;
5247 bool SawUndef = false;
5248 for (unsigned i = 0; i < Mask.size(); i++) {
5249 if (Mask[i] == -1) {
5250 SawUndef = true;
5251 continue;
5252 }
5253 if (SawUndef)
5254 return false;
5255 if (i > (unsigned)Mask[i])
5256 return false;
5257 if (Mask[i] <= Last)
5258 return false;
5259 Last = Mask[i];
5260 }
5261 return true;
5262}
5263
5264/// Given a shuffle where the indices are disjoint between the two sources,
5265/// e.g.:
5266///
5267/// t2:v4i8 = vector_shuffle t0:v4i8, t1:v4i8, <2, 7, 1, 4>
5268///
5269/// Merge the two sources into one and do a single source shuffle:
5270///
5271/// t2:v4i8 = vselect t1:v4i8, t0:v4i8, <0, 1, 0, 1>
5272/// t3:v4i8 = vector_shuffle t2:v4i8, undef, <2, 3, 1, 0>
5273///
5274/// A vselect will either be merged into a masked instruction or be lowered as a
5275/// vmerge.vvm, which is cheaper than a vrgather.vv.
5277 SelectionDAG &DAG,
5278 const RISCVSubtarget &Subtarget) {
5279 MVT VT = SVN->getSimpleValueType(0);
5280 MVT XLenVT = Subtarget.getXLenVT();
5281 SDLoc DL(SVN);
5282
5283 const ArrayRef<int> Mask = SVN->getMask();
5284
5285 // Work out which source each lane will come from.
5286 SmallVector<int, 16> Srcs(Mask.size(), -1);
5287
5288 for (int Idx : Mask) {
5289 if (Idx == -1)
5290 continue;
5291 unsigned SrcIdx = Idx % Mask.size();
5292 int Src = (uint32_t)Idx < Mask.size() ? 0 : 1;
5293 if (Srcs[SrcIdx] == -1)
5294 // Mark this source as using this lane.
5295 Srcs[SrcIdx] = Src;
5296 else if (Srcs[SrcIdx] != Src)
5297 // The other source is using this lane: not disjoint.
5298 return SDValue();
5299 }
5300
5301 SmallVector<SDValue> SelectMaskVals;
5302 for (int Lane : Srcs) {
5303 if (Lane == -1)
5304 SelectMaskVals.push_back(DAG.getUNDEF(XLenVT));
5305 else
5306 SelectMaskVals.push_back(DAG.getConstant(Lane ? 0 : 1, DL, XLenVT));
5307 }
5308 MVT MaskVT = VT.changeVectorElementType(MVT::i1);
5309 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, SelectMaskVals);
5310 SDValue Select = DAG.getNode(ISD::VSELECT, DL, VT, SelectMask,
5311 SVN->getOperand(0), SVN->getOperand(1));
5312
5313 // Move all indices relative to the first source.
5314 SmallVector<int> NewMask(Mask.size());
5315 for (unsigned I = 0; I < Mask.size(); I++) {
5316 if (Mask[I] == -1)
5317 NewMask[I] = -1;
5318 else
5319 NewMask[I] = Mask[I] % Mask.size();
5320 }
5321
5322 return DAG.getVectorShuffle(VT, DL, Select, DAG.getUNDEF(VT), NewMask);
5323}
5324
5325/// Try to widen element type to get a new mask value for a better permutation
5326/// sequence. This doesn't try to inspect the widened mask for profitability;
5327/// we speculate the widened form is equal or better. This has the effect of
5328/// reducing mask constant sizes - allowing cheaper materialization sequences
5329/// - and index sequence sizes - reducing register pressure and materialization
5330/// cost, at the cost of (possibly) an extra VTYPE toggle.
5332 SDLoc DL(Op);
5333 MVT VT = Op.getSimpleValueType();
5334 MVT ScalarVT = VT.getVectorElementType();
5335 unsigned ElementSize = ScalarVT.getFixedSizeInBits();
5336 SDValue V0 = Op.getOperand(0);
5337 SDValue V1 = Op.getOperand(1);
5338 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5339
5340 // Avoid wasted work leading to isTypeLegal check failing below
5341 if (ElementSize > 32)
5342 return SDValue();
5343
5344 SmallVector<int, 8> NewMask;
5345 if (!widenShuffleMaskElts(Mask, NewMask))
5346 return SDValue();
5347
5348 MVT NewEltVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(ElementSize * 2)
5349 : MVT::getIntegerVT(ElementSize * 2);
5350 MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
5351 if (!DAG.getTargetLoweringInfo().isTypeLegal(NewVT))
5352 return SDValue();
5353 V0 = DAG.getBitcast(NewVT, V0);
5354 V1 = DAG.getBitcast(NewVT, V1);
5355 return DAG.getBitcast(VT, DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
5356}
5357
5359 const RISCVSubtarget &Subtarget) {
5360 SDValue V1 = Op.getOperand(0);
5361 SDValue V2 = Op.getOperand(1);
5362 SDLoc DL(Op);
5363 MVT XLenVT = Subtarget.getXLenVT();
5364 MVT VT = Op.getSimpleValueType();
5365 unsigned NumElts = VT.getVectorNumElements();
5366 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
5367
5368 if (VT.getVectorElementType() == MVT::i1) {
5369 // Lower to a vror.vi of a larger element type if possible before we promote
5370 // i1s to i8s.
5371 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5372 return V;
5373 if (SDValue V = lowerBitreverseShuffle(SVN, DAG, Subtarget))
5374 return V;
5375
5376 // Promote i1 shuffle to i8 shuffle.
5377 MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
5378 V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
5379 V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
5380 : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
5381 SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
5382 return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
5383 ISD::SETNE);
5384 }
5385
5386 MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
5387
5388 auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5389
5390 if (SVN->isSplat()) {
5391 const int Lane = SVN->getSplatIndex();
5392 if (Lane >= 0) {
5393 MVT SVT = VT.getVectorElementType();
5394
5395 // Turn splatted vector load into a strided load with an X0 stride.
5396 SDValue V = V1;
5397 // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
5398 // with undef.
5399 // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
5400 int Offset = Lane;
5401 if (V.getOpcode() == ISD::CONCAT_VECTORS) {
5402 int OpElements =
5403 V.getOperand(0).getSimpleValueType().getVectorNumElements();
5404 V = V.getOperand(Offset / OpElements);
5405 Offset %= OpElements;
5406 }
5407
5408 // We need to ensure the load isn't atomic or volatile.
5409 if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
5410 auto *Ld = cast<LoadSDNode>(V);
5411 Offset *= SVT.getStoreSize();
5412 SDValue NewAddr = DAG.getMemBasePlusOffset(
5413 Ld->getBasePtr(), TypeSize::getFixed(Offset), DL);
5414
5415 // If this is SEW=64 on RV32, use a strided load with a stride of x0.
5416 if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
5417 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
5418 SDValue IntID =
5419 DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
5420 SDValue Ops[] = {Ld->getChain(),
5421 IntID,
5422 DAG.getUNDEF(ContainerVT),
5423 NewAddr,
5424 DAG.getRegister(RISCV::X0, XLenVT),
5425 VL};
5426 SDValue NewLoad = DAG.getMemIntrinsicNode(
5427 ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
5429 Ld->getMemOperand(), Offset, SVT.getStoreSize()));
5430 DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
5431 return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
5432 }
5433
5434 MVT SplatVT = ContainerVT;
5435
5436 // f16 with zvfhmin and bf16 need to use an integer scalar load.
5437 if (SVT == MVT::bf16 ||
5438 (SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
5439 SVT = MVT::i16;
5440 SplatVT = ContainerVT.changeVectorElementType(SVT);
5441 }
5442
5443 // Otherwise use a scalar load and splat. This will give the best
5444 // opportunity to fold a splat into the operation. ISel can turn it into
5445 // the x0 strided load if we aren't able to fold away the select.
5446 if (SVT.isFloatingPoint())
5447 V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
5448 Ld->getPointerInfo().getWithOffset(Offset),
5449 Ld->getOriginalAlign(),
5450 Ld->getMemOperand()->getFlags());
5451 else
5452 V = DAG.getExtLoad(ISD::EXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
5453 Ld->getPointerInfo().getWithOffset(Offset), SVT,
5454 Ld->getOriginalAlign(),
5455 Ld->getMemOperand()->getFlags());
5457
5458 unsigned Opc = SplatVT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
5460 SDValue Splat =
5461 DAG.getNode(Opc, DL, SplatVT, DAG.getUNDEF(ContainerVT), V, VL);
5462 Splat = DAG.getBitcast(ContainerVT, Splat);
5463 return convertFromScalableVector(VT, Splat, DAG, Subtarget);
5464 }
5465
5466 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5467 assert(Lane < (int)NumElts && "Unexpected lane!");
5468 SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
5469 V1, DAG.getConstant(Lane, DL, XLenVT),
5470 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5471 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5472 }
5473 }
5474
5475 // For exact VLEN m2 or greater, try to split to m1 operations if we
5476 // can split cleanly.
5477 if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
5478 return V;
5479
5480 ArrayRef<int> Mask = SVN->getMask();
5481
5482 if (SDValue V =
5483 lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
5484 return V;
5485
5486 if (SDValue V =
5487 lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
5488 return V;
5489
5490 // A bitrotate will be one instruction on Zvkb, so try to lower to it first if
5491 // available.
5492 if (Subtarget.hasStdExtZvkb())
5493 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5494 return V;
5495
5496 // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
5497 // be undef which can be handled with a single SLIDEDOWN/UP.
5498 int LoSrc, HiSrc;
5499 int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
5500 if (Rotation > 0) {
5501 SDValue LoV, HiV;
5502 if (LoSrc >= 0) {
5503 LoV = LoSrc == 0 ? V1 : V2;
5504 LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
5505 }
5506 if (HiSrc >= 0) {
5507 HiV = HiSrc == 0 ? V1 : V2;
5508 HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
5509 }
5510
5511 // We found a rotation. We need to slide HiV down by Rotation. Then we need
5512 // to slide LoV up by (NumElts - Rotation).
5513 unsigned InvRotate = NumElts - Rotation;
5514
5515 SDValue Res = DAG.getUNDEF(ContainerVT);
5516 if (HiV) {
5517 // Even though we could use a smaller VL, don't to avoid a vsetivli
5518 // toggle.
5519 Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
5520 DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
5521 }
5522 if (LoV)
5523 Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
5524 DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL,
5526
5527 return convertFromScalableVector(VT, Res, DAG, Subtarget);
5528 }
5529
5530 if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef())
5531 return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1);
5532
5533 // If this is a deinterleave(2,4,8) and we can widen the vector, then we can
5534 // use shift and truncate to perform the shuffle.
5535 // TODO: For Factor=6, we can perform the first step of the deinterleave via
5536 // shift-and-trunc reducing total cost for everything except an mf8 result.
5537 // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough
5538 // to do the entire operation.
5539 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5540 const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5541 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5542 for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) {
5543 unsigned Index = 0;
5544 if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) &&
5545 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) {
5546 if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2))
5547 return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG);
5548 }
5549 }
5550 }
5551
5552 if (SDValue V =
5553 lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
5554 return V;
5555
5556 // Detect an interleave shuffle and lower to
5557 // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
5558 int EvenSrc, OddSrc;
5559 if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
5560 // Extract the halves of the vectors.
5561 MVT HalfVT = VT.getHalfNumVectorElementsVT();
5562
5563 // Recognize if one half is actually undef; the matching above will
5564 // otherwise reuse the even stream for the undef one. This improves
5565 // spread(2) shuffles.
5566 bool LaneIsUndef[2] = { true, true};
5567 for (unsigned i = 0; i < Mask.size(); i++)
5568 LaneIsUndef[i % 2] &= (Mask[i] == -1);
5569
5570 int Size = Mask.size();
5571 SDValue EvenV, OddV;
5572 if (LaneIsUndef[0]) {
5573 EvenV = DAG.getUNDEF(HalfVT);
5574 } else {
5575 assert(EvenSrc >= 0 && "Undef source?");
5576 EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
5577 EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
5578 DAG.getVectorIdxConstant(EvenSrc % Size, DL));
5579 }
5580
5581 if (LaneIsUndef[1]) {
5582 OddV = DAG.getUNDEF(HalfVT);
5583 } else {
5584 assert(OddSrc >= 0 && "Undef source?");
5585 OddV = (OddSrc / Size) == 0 ? V1 : V2;
5586 OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
5587 DAG.getVectorIdxConstant(OddSrc % Size, DL));
5588 }
5589
5590 return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
5591 }
5592
5593
5594 // Handle any remaining single source shuffles
5595 assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
5596 if (V2.isUndef()) {
5597 // We might be able to express the shuffle as a bitrotate. But even if we
5598 // don't have Zvkb and have to expand, the expanded sequence of approx. 2
5599 // shifts and a vor will have a higher throughput than a vrgather.
5600 if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
5601 return V;
5602
5603 // Before hitting generic lowering fallbacks, try to widen the mask
5604 // to a wider SEW.
5605 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5606 return V;
5607
5608 // Can we generate a vcompress instead of a vrgather? These scale better
5609 // at high LMUL, at the cost of not being able to fold a following select
5610 // into them. The mask constants are also smaller than the index vector
5611 // constants, and thus easier to materialize.
5612 if (isCompressMask(Mask)) {
5613 SmallVector<SDValue> MaskVals(NumElts,
5614 DAG.getConstant(false, DL, XLenVT));
5615 for (auto Idx : Mask) {
5616 if (Idx == -1)
5617 break;
5618 assert(Idx >= 0 && (unsigned)Idx < NumElts);
5619 MaskVals[Idx] = DAG.getConstant(true, DL, XLenVT);
5620 }
5621 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5622 SDValue CompressMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5623 return DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, V1, CompressMask,
5624 DAG.getUNDEF(VT));
5625 }
5626
5627 // Match a spread(4,8) which can be done via extend and shift. Spread(2)
5628 // is fully covered in interleave(2) above, so it is ignored here.
5629 if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
5630 unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits();
5631 assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8);
5632 for (unsigned Factor = 4; Factor <= MaxFactor; Factor <<= 1) {
5633 unsigned Index;
5634 if (isSpreadMask(Mask, Factor, Index)) {
5635 MVT NarrowVT =
5636 MVT::getVectorVT(VT.getVectorElementType(), NumElts / Factor);
5637 SDValue Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT, V1,
5638 DAG.getVectorIdxConstant(0, DL));
5639 return getWideningSpread(Src, Factor, Index, DL, DAG);
5640 }
5641 }
5642 }
5643
5644 if (VT.getScalarSizeInBits() == 8 &&
5645 any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
5646 // On such a vector we're unable to use i8 as the index type.
5647 // FIXME: We could promote the index to i16 and use vrgatherei16, but that
5648 // may involve vector splitting if we're already at LMUL=8, or our
5649 // user-supplied maximum fixed-length LMUL.
5650 return SDValue();
5651 }
5652
5653 // Base case for the two operand recursion below - handle the worst case
5654 // single source shuffle.
5655 unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
5656 MVT IndexVT = VT.changeTypeToInteger();
5657 // Since we can't introduce illegal index types at this stage, use i16 and
5658 // vrgatherei16 if the corresponding index type for plain vrgather is greater
5659 // than XLenVT.
5660 if (IndexVT.getScalarType().bitsGT(XLenVT)) {
5661 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5662 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5663 }
5664
5665 // If the mask allows, we can do all the index computation in 16 bits. This
5666 // requires less work and less register pressure at high LMUL, and creates
5667 // smaller constants which may be cheaper to materialize.
5668 if (IndexVT.getScalarType().bitsGT(MVT::i16) && isUInt<16>(NumElts - 1) &&
5669 (IndexVT.getSizeInBits() / Subtarget.getRealMinVLen()) > 1) {
5670 GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
5671 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
5672 }
5673
5674 MVT IndexContainerVT =
5675 ContainerVT.changeVectorElementType(IndexVT.getScalarType());
5676
5677 V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
5678 SmallVector<SDValue> GatherIndicesLHS;
5679 for (int MaskIndex : Mask) {
5680 bool IsLHSIndex = MaskIndex < (int)NumElts && MaskIndex >= 0;
5681 GatherIndicesLHS.push_back(IsLHSIndex
5682 ? DAG.getConstant(MaskIndex, DL, XLenVT)
5683 : DAG.getUNDEF(XLenVT));
5684 }
5685 SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
5686 LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
5687 Subtarget);
5688 SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
5689 DAG.getUNDEF(ContainerVT), TrueMask, VL);
5690 return convertFromScalableVector(VT, Gather, DAG, Subtarget);
5691 }
5692
5693 // As a backup, shuffles can be lowered via a vrgather instruction, possibly
5694 // merged with a second vrgather.
5695 SmallVector<int> ShuffleMaskLHS, ShuffleMaskRHS;
5696
5697 // Now construct the mask that will be used by the blended vrgather operation.
5698 // Construct the appropriate indices into each vector.
5699 for (int MaskIndex : Mask) {
5700 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
5701 ShuffleMaskLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
5702 ? MaskIndex : -1);
5703 ShuffleMaskRHS.push_back(IsLHSOrUndefIndex ? -1 : (MaskIndex - NumElts));
5704 }
5705
5706 // If the mask indices are disjoint between the two sources, we can lower it
5707 // as a vselect + a single source vrgather.vv. Don't do this if we think the
5708 // operands may end up being lowered to something cheaper than a vrgather.vv.
5709 if (!DAG.isSplatValue(V2) && !DAG.isSplatValue(V1) &&
5710 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskLHS.data(), VT) &&
5711 !ShuffleVectorSDNode::isSplatMask(ShuffleMaskRHS.data(), VT) &&
5712 !ShuffleVectorInst::isIdentityMask(ShuffleMaskLHS, NumElts) &&
5713 !ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts))
5714 if (SDValue V = lowerDisjointIndicesShuffle(SVN, DAG, Subtarget))
5715 return V;
5716
5717 // Before hitting generic lowering fallbacks, try to widen the mask
5718 // to a wider SEW.
5719 if (SDValue V = tryWidenMaskForShuffle(Op, DAG))
5720 return V;
5721
5722 // Try to pick a profitable operand order.
5723 bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
5724 SwapOps = SwapOps ^ ShuffleVectorInst::isIdentityMask(ShuffleMaskRHS, NumElts);
5725
5726 // Recursively invoke lowering for each operand if we had two
5727 // independent single source shuffles, and then combine the result via a
5728 // vselect. Note that the vselect will likely be folded back into the
5729 // second permute (vrgather, or other) by the post-isel combine.
5730 V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
5731 V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), ShuffleMaskRHS);
5732
5733 SmallVector<SDValue> MaskVals;
5734 for (int MaskIndex : Mask) {
5735 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
5736 MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
5737 }
5738
5739 assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
5740 MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
5741 SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
5742
5743 if (SwapOps)
5744 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
5745 return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V2, V1);
5746}
5747
5749 // Only support legal VTs for other shuffles for now.
5750 if (!isTypeLegal(VT))
5751 return false;
5752
5753 // Support splats for any type. These should type legalize well.
5754 if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
5755 return true;
5756
5757 MVT SVT = VT.getSimpleVT();
5758
5759 // Not for i1 vectors.
5760 if (SVT.getScalarType() == MVT::i1)
5761 return false;
5762
5763 int Dummy1, Dummy2;
5764 return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
5765 isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
5766}
5767
5768// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
5769// the exponent.
5770SDValue
5771RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
5772 SelectionDAG &DAG) const {
5773 MVT VT = Op.getSimpleValueType();
5774 unsigned EltSize = VT.getScalarSizeInBits();
5775 SDValue Src = Op.getOperand(0);
5776 SDLoc DL(Op);
5777 MVT ContainerVT = VT;
5778
5779 SDValue Mask, VL;
5780 if (Op->isVPOpcode()) {
5781 Mask = Op.getOperand(1);
5782 if (VT.isFixedLengthVector())
5783 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5784 Subtarget);
5785 VL = Op.getOperand(2);
5786 }
5787
5788 // We choose FP type that can represent the value if possible. Otherwise, we
5789 // use rounding to zero conversion for correct exponent of the result.
5790 // TODO: Use f16 for i8 when possible?
5791 MVT FloatEltVT = (EltSize >= 32) ? MVT::f64 : MVT::f32;
5792 if (!isTypeLegal(MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount())))
5793 FloatEltVT = MVT::f32;
5794 MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
5795
5796 // Legal types should have been checked in the RISCVTargetLowering
5797 // constructor.
5798 // TODO: Splitting may make sense in some cases.
5799 assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
5800 "Expected legal float type!");
5801
5802 // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
5803 // The trailing zero count is equal to log2 of this single bit value.
5804 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
5805 SDValue Neg = DAG.getNegative(Src, DL, VT);
5806 Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
5807 } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
5808 SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
5809 Src, Mask, VL);
5810 Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
5811 }
5812
5813 // We have a legal FP type, convert to it.
5814 SDValue FloatVal;
5815 if (FloatVT.bitsGT(VT)) {
5816 if (Op->isVPOpcode())
5817 FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
5818 else
5819 FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
5820 } else {
5821 // Use RTZ to avoid rounding influencing exponent of FloatVal.
5822 if (VT.isFixedLengthVector()) {
5823 ContainerVT = getContainerForFixedLengthVector(VT);
5824 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
5825 }
5826 if (!Op->isVPOpcode())
5827 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
5828 SDValue RTZRM =
5830 MVT ContainerFloatVT =
5831 MVT::getVectorVT(FloatEltVT, ContainerVT.getVectorElementCount());
5832 FloatVal = DAG.getNode(RISCVISD::VFCVT_RM_F_XU_VL, DL, ContainerFloatVT,
5833 Src, Mask, RTZRM, VL);
5834 if (VT.isFixedLengthVector())
5835 FloatVal = convertFromScalableVector(FloatVT, FloatVal, DAG, Subtarget);
5836 }
5837 // Bitcast to integer and shift the exponent to the LSB.
5838 EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
5839 SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
5840 unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
5841
5842 SDValue Exp;
5843 // Restore back to original type. Truncation after SRL is to generate vnsrl.
5844 if (Op->isVPOpcode()) {
5845 Exp = DAG.getNode(ISD::VP_SRL, DL, IntVT, Bitcast,
5846 DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
5847 Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
5848 } else {
5849 Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
5850 DAG.getConstant(ShiftAmt, DL, IntVT));
5851 if (IntVT.bitsLT(VT))
5852 Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
5853 else if (IntVT.bitsGT(VT))
5854 Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
5855 }
5856
5857 // The exponent contains log2 of the value in biased form.
5858 unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
5859 // For trailing zeros, we just need to subtract the bias.
5860 if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
5861 return DAG.getNode(ISD::SUB, DL, VT, Exp,
5862 DAG.getConstant(ExponentBias, DL, VT));
5863 if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
5864 return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
5865 DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
5866
5867 // For leading zeros, we need to remove the bias and convert from log2 to
5868 // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
5869 unsigned Adjust = ExponentBias + (EltSize - 1);
5870 SDValue Res;
5871 if (Op->isVPOpcode())
5872 Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
5873 Mask, VL);
5874 else
5875 Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
5876
5877 // The above result with zero input equals to Adjust which is greater than
5878 // EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
5879 if (Op.getOpcode() == ISD::CTLZ)
5880 Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
5881 else if (Op.getOpcode() == ISD::VP_CTLZ)
5882 Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
5883 DAG.getConstant(EltSize, DL, VT), Mask, VL);
5884 return Res;
5885}
5886
5887SDValue RISCVTargetLowering::lowerVPCttzElements(SDValue Op,
5888 SelectionDAG &DAG) const {
5889 SDLoc DL(Op);
5890 MVT XLenVT = Subtarget.getXLenVT();
5891 SDValue Source = Op->getOperand(0);
5892 MVT SrcVT = Source.getSimpleValueType();
5893 SDValue Mask = Op->getOperand(1);
5894 SDValue EVL = Op->getOperand(2);
5895
5896 if (SrcVT.isFixedLengthVector()) {
5897 MVT ContainerVT = getContainerForFixedLengthVector(SrcVT);
5898 Source = convertToScalableVector(ContainerVT, Source, DAG, Subtarget);
5899 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
5900 Subtarget);
5901 SrcVT = ContainerVT;
5902 }
5903
5904 // Convert to boolean vector.
5905 if (SrcVT.getScalarType() != MVT::i1) {
5906 SDValue AllZero = DAG.getConstant(0, DL, SrcVT);
5907 SrcVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorElementCount());
5908 Source = DAG.getNode(RISCVISD::SETCC_VL, DL, SrcVT,
5909 {Source, AllZero, DAG.getCondCode(ISD::SETNE),
5910 DAG.getUNDEF(SrcVT), Mask, EVL});
5911 }
5912
5913 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Source, Mask, EVL);
5914 if (Op->getOpcode() == ISD::VP_CTTZ_ELTS_ZERO_UNDEF)
5915 // In this case, we can interpret poison as -1, so nothing to do further.
5916 return Res;
5917
5918 // Convert -1 to VL.
5919 SDValue SetCC =
5920 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
5921 Res = DAG.getSelect(DL, XLenVT, SetCC, EVL, Res);
5922 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
5923}
5924
5925// While RVV has alignment restrictions, we should always be able to load as a
5926// legal equivalently-sized byte-typed vector instead. This method is
5927// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
5928// the load is already correctly-aligned, it returns SDValue().
5929SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
5930 SelectionDAG &DAG) const {
5931 auto *Load = cast<LoadSDNode>(Op);
5932 assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
5933
5935 Load->getMemoryVT(),
5936 *Load->getMemOperand()))
5937 return SDValue();
5938
5939 SDLoc DL(Op);
5940 MVT VT = Op.getSimpleValueType();
5941 unsigned EltSizeBits = VT.getScalarSizeInBits();
5942 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5943 "Unexpected unaligned RVV load type");
5944 MVT NewVT =
5945 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5946 assert(NewVT.isValid() &&
5947 "Expecting equally-sized RVV vector types to be legal");
5948 SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
5949 Load->getPointerInfo(), Load->getOriginalAlign(),
5950 Load->getMemOperand()->getFlags());
5951 return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
5952}
5953
5954// While RVV has alignment restrictions, we should always be able to store as a
5955// legal equivalently-sized byte-typed vector instead. This method is
5956// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
5957// returns SDValue() if the store is already correctly aligned.
5958SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
5959 SelectionDAG &DAG) const {
5960 auto *Store = cast<StoreSDNode>(Op);
5961 assert(Store && Store->getValue().getValueType().isVector() &&
5962 "Expected vector store");
5963
5965 Store->getMemoryVT(),
5966 *Store->getMemOperand()))
5967 return SDValue();
5968
5969 SDLoc DL(Op);
5970 SDValue StoredVal = Store->getValue();
5971 MVT VT = StoredVal.getSimpleValueType();
5972 unsigned EltSizeBits = VT.getScalarSizeInBits();
5973 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
5974 "Unexpected unaligned RVV store type");
5975 MVT NewVT =
5976 MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
5977 assert(NewVT.isValid() &&
5978 "Expecting equally-sized RVV vector types to be legal");
5979 StoredVal = DAG.getBitcast(NewVT, StoredVal);
5980 return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
5981 Store->getPointerInfo(), Store->getOriginalAlign(),
5982 Store->getMemOperand()->getFlags());
5983}
5984
5986 const RISCVSubtarget &Subtarget) {
5987 assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
5988
5989 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
5990
5991 // All simm32 constants should be handled by isel.
5992 // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
5993 // this check redundant, but small immediates are common so this check
5994 // should have better compile time.
5995 if (isInt<32>(Imm))
5996 return Op;
5997
5998 // We only need to cost the immediate, if constant pool lowering is enabled.
5999 if (!Subtarget.useConstantPoolForLargeInts())
6000 return Op;
6001
6003 if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
6004 return Op;
6005
6006 // Optimizations below are disabled for opt size. If we're optimizing for
6007 // size, use a constant pool.
6008 if (DAG.shouldOptForSize())
6009 return SDValue();
6010
6011 // Special case. See if we can build the constant as (ADD (SLLI X, C), X) do
6012 // that if it will avoid a constant pool.
6013 // It will require an extra temporary register though.
6014 // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
6015 // low and high 32 bits are the same and bit 31 and 63 are set.
6016 unsigned ShiftAmt, AddOpc;
6017 RISCVMatInt::InstSeq SeqLo =
6018 RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
6019 if (!SeqLo.empty() && (SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
6020 return Op;
6021
6022 return SDValue();
6023}
6024
6025SDValue RISCVTargetLowering::lowerConstantFP(SDValue Op,
6026 SelectionDAG &DAG) const {
6027 MVT VT = Op.getSimpleValueType();
6028 const APFloat &Imm = cast<ConstantFPSDNode>(Op)->getValueAPF();
6029
6030 // Can this constant be selected by a Zfa FLI instruction?
6031 bool Negate = false;
6032 int Index = getLegalZfaFPImm(Imm, VT);
6033
6034 // If the constant is negative, try negating.
6035 if (Index < 0 && Imm.isNegative()) {
6036 Index = getLegalZfaFPImm(-Imm, VT);
6037 Negate = true;
6038 }
6039
6040 // If we couldn't find a FLI lowering, fall back to generic code.
6041 if (Index < 0)
6042 return SDValue();
6043
6044 // Emit an FLI+FNEG. We use a custom node to hide from constant folding.
6045 SDLoc DL(Op);
6046 SDValue Const =
6047 DAG.getNode(RISCVISD::FLI, DL, VT,
6048 DAG.getTargetConstant(Index, DL, Subtarget.getXLenVT()));
6049 if (!Negate)
6050 return Const;
6051
6052 return DAG.getNode(ISD::FNEG, DL, VT, Const);
6053}
6054
6056 const RISCVSubtarget &Subtarget) {
6057 SDLoc dl(Op);
6058 AtomicOrdering FenceOrdering =
6059 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
6060 SyncScope::ID FenceSSID =
6061 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
6062
6063 if (Subtarget.hasStdExtZtso()) {
6064 // The only fence that needs an instruction is a sequentially-consistent
6065 // cross-thread fence.
6066 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
6067 FenceSSID == SyncScope::System)
6068 return Op;
6069
6070 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6071 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6072 }
6073
6074 // singlethread fences only synchronize with signal handlers on the same
6075 // thread and thus only need to preserve instruction order, not actually
6076 // enforce memory ordering.
6077 if (FenceSSID == SyncScope::SingleThread)
6078 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
6079 return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
6080
6081 return Op;
6082}
6083
6084SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
6085 SelectionDAG &DAG) const {
6086 SDLoc DL(Op);
6087 MVT VT = Op.getSimpleValueType();
6088 MVT XLenVT = Subtarget.getXLenVT();
6089 unsigned Check = Op.getConstantOperandVal(1);
6090 unsigned TDCMask = 0;
6091 if (Check & fcSNan)
6092 TDCMask |= RISCV::FPMASK_Signaling_NaN;
6093 if (Check & fcQNan)
6094 TDCMask |= RISCV::FPMASK_Quiet_NaN;
6095 if (Check & fcPosInf)
6097 if (Check & fcNegInf)
6099 if (Check & fcPosNormal)
6101 if (Check & fcNegNormal)
6103 if (Check & fcPosSubnormal)
6105 if (Check & fcNegSubnormal)
6107 if (Check & fcPosZero)
6108 TDCMask |= RISCV::FPMASK_Positive_Zero;
6109 if (Check & fcNegZero)
6110 TDCMask |= RISCV::FPMASK_Negative_Zero;
6111
6112 bool IsOneBitMask = isPowerOf2_32(TDCMask);
6113
6114 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
6115
6116 if (VT.isVector()) {
6117 SDValue Op0 = Op.getOperand(0);
6118 MVT VT0 = Op.getOperand(0).getSimpleValueType();
6119
6120 if (VT.isScalableVector()) {
6122 auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
6123 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6124 Mask = Op.getOperand(2);
6125 VL = Op.getOperand(3);
6126 }
6127 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
6128 VL, Op->getFlags());
6129 if (IsOneBitMask)
6130 return DAG.getSetCC(DL, VT, FPCLASS,
6131 DAG.getConstant(TDCMask, DL, DstVT),
6133 SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
6134 DAG.getConstant(TDCMask, DL, DstVT));
6135 return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
6136 ISD::SETNE);
6137 }
6138
6139 MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
6140 MVT ContainerVT = getContainerForFixedLengthVector(VT);
6141 MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
6142 auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
6143 if (Op.getOpcode() == ISD::VP_IS_FPCLASS) {
6144 Mask = Op.getOperand(2);
6145 MVT MaskContainerVT =
6146 getContainerForFixedLengthVector(Mask.getSimpleValueType());
6147 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
6148 VL = Op.getOperand(3);
6149 }
6150 Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
6151
6152 SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
6153 Mask, VL, Op->getFlags());
6154
6155 TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6156 DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
6157 if (IsOneBitMask) {
6158 SDValue VMSEQ =
6159 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6160 {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
6161 DAG.getUNDEF(ContainerVT), Mask, VL});
6162 return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
6163 }
6164 SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
6165 TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
6166
6167 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
6168 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
6169 DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
6170
6171 SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
6172 {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
6173 DAG.getUNDEF(ContainerVT), Mask, VL});
6174 return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
6175 }
6176
6177 SDValue FCLASS = DAG.getNode(RISCVISD::FCLASS, DL, XLenVT, Op.getOperand(0));
6178 SDValue AND = DAG.getNode(ISD::AND, DL, XLenVT, FCLASS, TDCMaskV);
6179 SDValue Res = DAG.getSetCC(DL, XLenVT, AND, DAG.getConstant(0, DL, XLenVT),
6181 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6182}
6183
6184// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
6185// operations propagate nans.
6187 const RISCVSubtarget &Subtarget) {
6188 SDLoc DL(Op);
6189 MVT VT = Op.getSimpleValueType();
6190
6191 SDValue X = Op.getOperand(0);
6192 SDValue Y = Op.getOperand(1);
6193
6194 if (!VT.isVector()) {
6195 MVT XLenVT = Subtarget.getXLenVT();
6196
6197 // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
6198 // ensures that when one input is a nan, the other will also be a nan
6199 // allowing the nan to propagate. If both inputs are nan, this will swap the
6200 // inputs which is harmless.
6201
6202 SDValue NewY = Y;
6203 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(X)) {
6204 SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
6205 NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
6206 }
6207
6208 SDValue NewX = X;
6209 if (!Op->getFlags().hasNoNaNs() && !DAG.isKnownNeverNaN(Y)) {
6210 SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
6211 NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
6212 }
6213
6214 unsigned Opc =
6215 Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
6216 return DAG.getNode(Opc, DL, VT, NewX, NewY);
6217 }
6218
6219 // Check no NaNs before converting to fixed vector scalable.
6220 bool XIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(X);
6221 bool YIsNeverNan = Op->getFlags().hasNoNaNs() || DAG.isKnownNeverNaN(Y);
6222
6223 MVT ContainerVT = VT;
6224 if (VT.isFixedLengthVector()) {
6225 ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
6226 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
6227 Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
6228 }
6229
6230 SDValue Mask, VL;
6231 if (Op->isVPOpcode()) {
6232 Mask = Op.getOperand(2);
6233 if (VT.isFixedLengthVector())
6234 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
6235 Subtarget);
6236 VL = Op.getOperand(3);
6237 } else {
6238 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
6239 }
6240
6241 SDValue NewY = Y;
6242 if (!XIsNeverNan) {
6243 SDValue XIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6244 {X, X, DAG.getCondCode(ISD::SETOEQ),
6245 DAG.getUNDEF(ContainerVT), Mask, VL});
6246 NewY = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, XIsNonNan, Y, X,
6247 DAG.getUNDEF(ContainerVT), VL);
6248 }
6249
6250 SDValue NewX = X;
6251 if (!YIsNeverNan) {
6252 SDValue YIsNonNan = DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
6253 {Y, Y, DAG.getCondCode(ISD::SETOEQ),
6254 DAG.getUNDEF(ContainerVT), Mask, VL});
6255 NewX = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, YIsNonNan, X, Y,
6256 DAG.getUNDEF(ContainerVT), VL);
6257 }
6258
6259 unsigned Opc =
6260 Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
6263 SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
6264 DAG.getUNDEF(ContainerVT), Mask, VL);
6265 if (VT.isFixedLengthVector())
6266 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
6267 return Res;
6268}
6269
6271 const RISCVSubtarget &Subtarget) {
6272 bool IsFABS = Op.getOpcode() == ISD::FABS;
6273 assert((IsFABS || Op.getOpcode() == ISD::FNEG) &&
6274 "Wrong opcode for lowering FABS or FNEG.");
6275
6276 MVT XLenVT = Subtarget.getXLenVT();
6277 MVT VT = Op.getSimpleValueType();
6278 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6279
6280 SDLoc DL(Op);
6281 SDValue Fmv =
6282 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op.getOperand(0));
6283
6284 APInt Mask = IsFABS ? APInt::getSignedMaxValue(16) : APInt::getSignMask(16);
6285 Mask = Mask.sext(Subtarget.getXLen());
6286
6287 unsigned LogicOpc = IsFABS ? ISD::AND : ISD::XOR;
6288 SDValue Logic =
6289 DAG.getNode(LogicOpc, DL, XLenVT, Fmv, DAG.getConstant(Mask, DL, XLenVT));
6290 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, Logic);
6291}
6292
6294 const RISCVSubtarget &Subtarget) {
6295 assert(Op.getOpcode() == ISD::FCOPYSIGN && "Unexpected opcode");
6296
6297 MVT XLenVT = Subtarget.getXLenVT();
6298 MVT VT = Op.getSimpleValueType();
6299 assert((VT == MVT::f16 || VT == MVT::bf16) && "Unexpected type");
6300
6301 SDValue Mag = Op.getOperand(0);
6302 SDValue Sign = Op.getOperand(1);
6303
6304 SDLoc DL(Op);
6305
6306 // Get sign bit into an integer value.
6307 SDValue SignAsInt;
6308 unsigned SignSize = Sign.getValueSizeInBits();
6309 if (SignSize == Subtarget.getXLen()) {
6310 SignAsInt = DAG.getNode(ISD::BITCAST, DL, XLenVT, Sign);
6311 } else if (SignSize == 16) {
6312 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Sign);
6313 } else if (SignSize == 32) {
6314 SignAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, Sign);
6315 } else if (SignSize == 64) {
6316 assert(XLenVT == MVT::i32 && "Unexpected type");
6317 // Copy the upper word to integer.
6318 SignAsInt = DAG.getNode(RISCVISD::SplitF64, DL, {MVT::i32, MVT::i32}, Sign)
6319 .getValue(1);
6320 SignSize = 32;
6321 } else
6322 llvm_unreachable("Unexpected sign size");
6323
6324 // Get the signbit at the right position for MagAsInt.
6325 int ShiftAmount = (int)SignSize - (int)Mag.getValueSizeInBits();
6326 if (ShiftAmount > 0) {
6327 SignAsInt = DAG.getNode(ISD::SRL, DL, XLenVT, SignAsInt,
6328 DAG.getConstant(ShiftAmount, DL, XLenVT));
6329 } else if (ShiftAmount < 0) {
6330 SignAsInt = DAG.getNode(ISD::SHL, DL, XLenVT, SignAsInt,
6331 DAG.getConstant(-ShiftAmount, DL, XLenVT));
6332 }
6333
6334 // Mask the sign bit and any bits above it. The extra bits will be dropped
6335 // when we convert back to FP.
6336 SDValue SignMask = DAG.getConstant(
6337 APInt::getSignMask(16).sext(Subtarget.getXLen()), DL, XLenVT);
6338 SDValue SignBit = DAG.getNode(ISD::AND, DL, XLenVT, SignAsInt, SignMask);
6339
6340 // Transform Mag value to integer, and clear the sign bit.
6341 SDValue MagAsInt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Mag);
6342 SDValue ClearSignMask = DAG.getConstant(
6343 APInt::getSignedMaxValue(16).sext(Subtarget.getXLen()), DL, XLenVT);
6344 SDValue ClearedSign =
6345 DAG.getNode(ISD::AND, DL, XLenVT, MagAsInt, ClearSignMask);
6346
6347 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, XLenVT, ClearedSign, SignBit,
6349
6350 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, CopiedSign);
6351}
6352
6353/// Get a RISC-V target specified VL op for a given SDNode.
6354static unsigned getRISCVVLOp(SDValue Op) {
6355#define OP_CASE(NODE) \
6356 case ISD::NODE: \
6357 return RISCVISD::NODE##_VL;
6358#define VP_CASE(NODE) \
6359 case ISD::VP_##NODE: \
6360 return RISCVISD::NODE##_VL;
6361 // clang-format off
6362 switch (Op.getOpcode()) {
6363 default:
6364 llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
6365 OP_CASE(ADD)
6366 OP_CASE(SUB)
6367 OP_CASE(MUL)
6368 OP_CASE(MULHS)
6369 OP_CASE(MULHU)
6370 OP_CASE(SDIV)
6371 OP_CASE(SREM)
6372 OP_CASE(UDIV)
6373 OP_CASE(UREM)
6374 OP_CASE(SHL)
6375 OP_CASE(SRA)
6376 OP_CASE(SRL)
6377 OP_CASE(ROTL)
6378 OP_CASE(ROTR)
6379 OP_CASE(BSWAP)
6380 OP_CASE(CTTZ)
6381 OP_CASE(CTLZ)
6382 OP_CASE(CTPOP)
6383 OP_CASE(BITREVERSE)
6384 OP_CASE(SADDSAT)
6385 OP_CASE(UADDSAT)
6386 OP_CASE(SSUBSAT)
6387 OP_CASE(USUBSAT)
6388 OP_CASE(AVGFLOORS)
6389 OP_CASE(AVGFLOORU)
6390 OP_CASE(AVGCEILS)
6391 OP_CASE(AVGCEILU)
6392 OP_CASE(FADD)
6393 OP_CASE(FSUB)
6394 OP_CASE(FMUL)
6395 OP_CASE(FDIV)
6396 OP_CASE(FNEG)
6397 OP_CASE(FABS)
6398 OP_CASE(FSQRT)
6399 OP_CASE(SMIN)
6400 OP_CASE(SMAX)
6401 OP_CASE(UMIN)
6402 OP_CASE(UMAX)
6403 OP_CASE(STRICT_FADD)
6404 OP_CASE(STRICT_FSUB)
6405 OP_CASE(STRICT_FMUL)
6406 OP_CASE(STRICT_FDIV)
6407 OP_CASE(STRICT_FSQRT)
6408 VP_CASE(ADD) // VP_ADD
6409 VP_CASE(SUB) // VP_SUB
6410 VP_CASE(MUL) // VP_MUL
6411 VP_CASE(SDIV) // VP_SDIV
6412 VP_CASE(SREM) // VP_SREM
6413 VP_CASE(UDIV) // VP_UDIV
6414 VP_CASE(UREM) // VP_UREM
6415 VP_CASE(SHL) // VP_SHL
6416 VP_CASE(FADD) // VP_FADD
6417 VP_CASE(FSUB) // VP_FSUB
6418 VP_CASE(FMUL) // VP_FMUL
6419 VP_CASE(FDIV) // VP_FDIV
6420 VP_CASE(FNEG) // VP_FNEG
6421 VP_CASE(FABS) // VP_FABS
6422 VP_CASE(SMIN) // VP_SMIN
6423 VP_CASE(SMAX) // VP_SMAX
6424 VP_CASE(UMIN) // VP_UMIN
6425 VP_CASE(UMAX) // VP_UMAX
6426 VP_CASE(FCOPYSIGN) // VP_FCOPYSIGN
6427 VP_CASE(SETCC) // VP_SETCC
6428 VP_CASE(SINT_TO_FP) // VP_SINT_TO_FP
6429 VP_CASE(UINT_TO_FP) // VP_UINT_TO_FP
6430 VP_CASE(BITREVERSE) // VP_BITREVERSE
6431 VP_CASE(SADDSAT) // VP_SADDSAT
6432 VP_CASE(UADDSAT) // VP_UADDSAT
6433 VP_CASE(SSUBSAT) // VP_SSUBSAT
6434 VP_CASE(USUBSAT) // VP_USUBSAT
6435 VP_CASE(BSWAP) // VP_BSWAP
6436 VP_CASE(CTLZ) // VP_CTLZ
6437 VP_CASE(CTTZ) // VP_CTTZ
6438 VP_CASE(CTPOP) // VP_CTPOP
6440 case ISD::VP_CTLZ_ZERO_UNDEF:
6441 return RISCVISD::CTLZ_VL;
6443 case ISD::VP_CTTZ_ZERO_UNDEF:
6444 return RISCVISD::CTTZ_VL;
6445 case ISD::FMA:
6446 case ISD::VP_FMA:
6447 return RISCVISD::VFMADD_VL;
6448 case ISD::STRICT_FMA:
6450 case ISD::AND:
6451 case ISD::VP_AND:
6452 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6453 return RISCVISD::VMAND_VL;
6454 return RISCVISD::AND_VL;
6455 case ISD::OR:
6456 case ISD::VP_OR:
6457 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6458 return RISCVISD::VMOR_VL;
6459 return RISCVISD::OR_VL;
6460 case ISD::XOR:
6461 case ISD::VP_XOR:
6462 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
6463 return RISCVISD::VMXOR_VL;
6464 return RISCVISD::XOR_VL;
6465 case ISD::VP_SELECT:
6466 case ISD::VP_MERGE:
6467 return RISCVISD::VMERGE_VL;
6468 case ISD::VP_SRA:
6469 return RISCVISD::SRA_VL;
6470 case ISD::VP_SRL:
6471 return RISCVISD::SRL_VL;
6472 case ISD::VP_SQRT:
6473 return RISCVISD::FSQRT_VL;
6474 case ISD::VP_SIGN_EXTEND:
6475 return RISCVISD::VSEXT_VL;
6476 case ISD::VP_ZERO_EXTEND:
6477 return RISCVISD::VZEXT_VL;
6478 case ISD::VP_FP_TO_SINT:
6480 case ISD::VP_FP_TO_UINT:
6482 case ISD::FMINNUM:
6483 case ISD::VP_FMINNUM:
6484 return RISCVISD::VFMIN_VL;
6485 case ISD::FMAXNUM:
6486 case ISD::VP_FMAXNUM:
6487 return RISCVISD::VFMAX_VL;
6488 case ISD::LRINT:
6489 case ISD::VP_LRINT:
6490 case ISD::LLRINT:
6491 case ISD::VP_LLRINT:
6493 }
6494 // clang-format on
6495#undef OP_CASE
6496#undef VP_CASE
6497}
6498
6499/// Return true if a RISC-V target specified op has a passthru operand.
6500static bool hasPassthruOp(unsigned Opcode) {
6501 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6503 "not a RISC-V target specific op");
6504 static_assert(
6507 "adding target specific op should update this function");
6508 if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
6509 return true;
6510 if (Opcode == RISCVISD::FCOPYSIGN_VL)
6511 return true;
6512 if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
6513 return true;
6514 if (Opcode == RISCVISD::SETCC_VL)
6515 return true;
6516 if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
6517 return true;
6518 if (Opcode == RISCVISD::VMERGE_VL)
6519 return true;
6520 return false;
6521}
6522
6523/// Return true if a RISC-V target specified op has a mask operand.
6524static bool hasMaskOp(unsigned Opcode) {
6525 assert(Opcode > RISCVISD::FIRST_NUMBER &&
6527 "not a RISC-V target specific op");
6528 static_assert(
6531 "adding target specific op should update this function");
6532 if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
6533 return true;
6534 if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
6535 return true;
6536 if (Opcode >= RISCVISD::STRICT_FADD_VL &&
6538 return true;
6539 return false;
6540}
6541
6543 const RISCVSubtarget &Subtarget) {
6544 if (Op.getValueType() == MVT::nxv32f16 &&
6545 (Subtarget.hasVInstructionsF16Minimal() &&
6546 !Subtarget.hasVInstructionsF16()))
6547 return true;
6548 if (Op.getValueType() == MVT::nxv32bf16)
6549 return true;
6550 return false;
6551}
6552
6554 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6555 SDLoc DL(Op);
6556
6559
6560 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6561 if (!Op.getOperand(j).getValueType().isVector()) {
6562 LoOperands[j] = Op.getOperand(j);
6563 HiOperands[j] = Op.getOperand(j);
6564 continue;
6565 }
6566 std::tie(LoOperands[j], HiOperands[j]) =
6567 DAG.SplitVector(Op.getOperand(j), DL);
6568 }
6569
6570 SDValue LoRes =
6571 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6572 SDValue HiRes =
6573 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6574
6575 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6576}
6577
6579 assert(ISD::isVPOpcode(Op.getOpcode()) && "Not a VP op");
6580 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op.getValueType());
6581 SDLoc DL(Op);
6582
6585
6586 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6587 if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == j) {
6588 std::tie(LoOperands[j], HiOperands[j]) =
6589 DAG.SplitEVL(Op.getOperand(j), Op.getValueType(), DL);
6590 continue;
6591 }
6592 if (!Op.getOperand(j).getValueType().isVector()) {
6593 LoOperands[j] = Op.getOperand(j);
6594 HiOperands[j] = Op.getOperand(j);
6595 continue;
6596 }
6597 std::tie(LoOperands[j], HiOperands[j]) =
6598 DAG.SplitVector(Op.getOperand(j), DL);
6599 }
6600
6601 SDValue LoRes =
6602 DAG.getNode(Op.getOpcode(), DL, LoVT, LoOperands, Op->getFlags());
6603 SDValue HiRes =
6604 DAG.getNode(Op.getOpcode(), DL, HiVT, HiOperands, Op->getFlags());
6605
6606 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Op.getValueType(), LoRes, HiRes);
6607}
6608
6610 SDLoc DL(Op);
6611
6612 auto [Lo, Hi] = DAG.SplitVector(Op.getOperand(1), DL);
6613 auto [MaskLo, MaskHi] = DAG.SplitVector(Op.getOperand(2), DL);
6614 auto [EVLLo, EVLHi] =
6615 DAG.SplitEVL(Op.getOperand(3), Op.getOperand(1).getValueType(), DL);
6616
6617 SDValue ResLo =
6618 DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6619 {Op.getOperand(0), Lo, MaskLo, EVLLo}, Op->getFlags());
6620 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
6621 {ResLo, Hi, MaskHi, EVLHi}, Op->getFlags());
6622}
6623
6625
6626 assert(Op->isStrictFPOpcode());
6627
6628 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(Op->getValueType(0));
6629
6630 SDVTList LoVTs = DAG.getVTList(LoVT, Op->getValueType(1));
6631 SDVTList HiVTs = DAG.getVTList(HiVT, Op->getValueType(1));
6632
6633 SDLoc DL(Op);
6634
6637
6638 for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
6639 if (!Op.getOperand(j).getValueType().isVector()) {
6640 LoOperands[j] = Op.getOperand(j);
6641 HiOperands[j] = Op.getOperand(j);
6642 continue;
6643 }
6644 std::tie(LoOperands[j], HiOperands[j]) =
6645 DAG.SplitVector(Op.getOperand(j), DL);
6646 }
6647
6648 SDValue LoRes =
6649 DAG.getNode(Op.getOpcode(), DL, LoVTs, LoOperands, Op->getFlags());
6650 HiOperands[0] = LoRes.getValue(1);
6651 SDValue HiRes =
6652 DAG.getNode(Op.getOpcode(), DL, HiVTs, HiOperands, Op->getFlags());
6653
6654 SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, Op->getValueType(0),
6655 LoRes.getValue(0), HiRes.getValue(0));
6656 return DAG.getMergeValues({V, HiRes.getValue(1)}, DL);
6657}
6658
6660 SelectionDAG &DAG) const {
6661 switch (Op.getOpcode()) {
6662 default:
6663 report_fatal_error("unimplemented operand");
6664 case ISD::ATOMIC_FENCE:
6665 return LowerATOMIC_FENCE(Op, DAG, Subtarget);
6666 case ISD::GlobalAddress:
6667 return lowerGlobalAddress(Op, DAG);
6668 case ISD::BlockAddress:
6669 return lowerBlockAddress(Op, DAG);
6670 case ISD::ConstantPool:
6671 return lowerConstantPool(Op, DAG);
6672 case ISD::JumpTable:
6673 return lowerJumpTable(Op, DAG);
6675 return lowerGlobalTLSAddress(Op, DAG);
6676 case ISD::Constant:
6677 return lowerConstant(Op, DAG, Subtarget);
6678 case ISD::ConstantFP:
6679 return lowerConstantFP(Op, DAG);
6680 case ISD::SELECT:
6681 return lowerSELECT(Op, DAG);
6682 case ISD::BRCOND:
6683 return lowerBRCOND(Op, DAG);
6684 case ISD::VASTART:
6685 return lowerVASTART(Op, DAG);
6686 case ISD::FRAMEADDR:
6687 return lowerFRAMEADDR(Op, DAG);
6688 case ISD::RETURNADDR:
6689 return lowerRETURNADDR(Op, DAG);
6690 case ISD::SHL_PARTS:
6691 return lowerShiftLeftParts(Op, DAG);
6692 case ISD::SRA_PARTS:
6693 return lowerShiftRightParts(Op, DAG, true);
6694 case ISD::SRL_PARTS:
6695 return lowerShiftRightParts(Op, DAG, false);
6696 case ISD::ROTL:
6697 case ISD::ROTR:
6698 if (Op.getValueType().isFixedLengthVector()) {
6699 assert(Subtarget.hasStdExtZvkb());
6700 return lowerToScalableOp(Op, DAG);
6701 }
6702 assert(Subtarget.hasVendorXTHeadBb() &&
6703 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
6704 "Unexpected custom legalization");
6705 // XTHeadBb only supports rotate by constant.
6706 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6707 return SDValue();
6708 return Op;
6709 case ISD::BITCAST: {
6710 SDLoc DL(Op);
6711 EVT VT = Op.getValueType();
6712 SDValue Op0 = Op.getOperand(0);
6713 EVT Op0VT = Op0.getValueType();
6714 MVT XLenVT = Subtarget.getXLenVT();
6715 if (Op0VT == MVT::i16 &&
6716 ((VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
6717 (VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
6718 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
6719 return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, NewOp0);
6720 }
6721 if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
6722 Subtarget.hasStdExtFOrZfinx()) {
6723 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6724 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
6725 }
6726 if (VT == MVT::f64 && Op0VT == MVT::i64 && !Subtarget.is64Bit() &&
6727 Subtarget.hasStdExtDOrZdinx()) {
6728 SDValue Lo, Hi;
6729 std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
6730 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
6731 }
6732
6733 // Consider other scalar<->scalar casts as legal if the types are legal.
6734 // Otherwise expand them.
6735 if (!VT.isVector() && !Op0VT.isVector()) {
6736 if (isTypeLegal(VT) && isTypeLegal(Op0VT))
6737 return Op;
6738 return SDValue();
6739 }
6740
6741 assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
6742 "Unexpected types");
6743
6744 if (VT.isFixedLengthVector()) {
6745 // We can handle fixed length vector bitcasts with a simple replacement
6746 // in isel.
6747 if (Op0VT.isFixedLengthVector())
6748 return Op;
6749 // When bitcasting from scalar to fixed-length vector, insert the scalar
6750 // into a one-element vector of the result type, and perform a vector
6751 // bitcast.
6752 if (!Op0VT.isVector()) {
6753 EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
6754 if (!isTypeLegal(BVT))
6755 return SDValue();
6756 return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
6757 DAG.getUNDEF(BVT), Op0,
6758 DAG.getVectorIdxConstant(0, DL)));
6759 }
6760 return SDValue();
6761 }
6762 // Custom-legalize bitcasts from fixed-length vector types to scalar types
6763 // thus: bitcast the vector to a one-element vector type whose element type
6764 // is the same as the result type, and extract the first element.
6765 if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
6766 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
6767 if (!isTypeLegal(BVT))
6768 return SDValue();
6769 SDValue BVec = DAG.getBitcast(BVT, Op0);
6770 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
6771 DAG.getVectorIdxConstant(0, DL));
6772 }
6773 return SDValue();
6774 }
6776 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6778 return LowerINTRINSIC_W_CHAIN(Op, DAG);
6780 return LowerINTRINSIC_VOID(Op, DAG);
6781 case ISD::IS_FPCLASS:
6782 return LowerIS_FPCLASS(Op, DAG);
6783 case ISD::BITREVERSE: {
6784 MVT VT = Op.getSimpleValueType();
6785 if (VT.isFixedLengthVector()) {
6786 assert(Subtarget.hasStdExtZvbb());
6787 return lowerToScalableOp(Op, DAG);
6788 }
6789 SDLoc DL(Op);
6790 assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
6791 assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
6792 // Expand bitreverse to a bswap(rev8) followed by brev8.
6793 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
6794 return DAG.getNode(RISCVISD::BREV8, DL, VT, BSwap);
6795 }
6796 case ISD::TRUNCATE:
6799 // Only custom-lower vector truncates
6800 if (!Op.getSimpleValueType().isVector())
6801 return Op;
6802 return lowerVectorTruncLike(Op, DAG);
6803 case ISD::ANY_EXTEND:
6804 case ISD::ZERO_EXTEND:
6805 if (Op.getOperand(0).getValueType().isVector() &&
6806 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6807 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
6808 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
6809 case ISD::SIGN_EXTEND:
6810 if (Op.getOperand(0).getValueType().isVector() &&
6811 Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
6812 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
6813 return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
6815 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
6817 return lowerINSERT_VECTOR_ELT(Op, DAG);
6819 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6820 case ISD::SCALAR_TO_VECTOR: {
6821 MVT VT = Op.getSimpleValueType();
6822 SDLoc DL(Op);
6823 SDValue Scalar = Op.getOperand(0);
6824 if (VT.getVectorElementType() == MVT::i1) {
6825 MVT WideVT = VT.changeVectorElementType(MVT::i8);
6826 SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
6827 return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
6828 }
6829 MVT ContainerVT = VT;
6830 if (VT.isFixedLengthVector())
6831 ContainerVT = getContainerForFixedLengthVector(VT);
6832 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
6833
6834 SDValue V;
6835 if (VT.isFloatingPoint()) {
6836 V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
6837 DAG.getUNDEF(ContainerVT), Scalar, VL);
6838 } else {
6839 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
6840 V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
6841 DAG.getUNDEF(ContainerVT), Scalar, VL);
6842 }
6843 if (VT.isFixedLengthVector())
6844 V = convertFromScalableVector(VT, V, DAG, Subtarget);
6845 return V;
6846 }
6847 case ISD::VSCALE: {
6848 MVT XLenVT = Subtarget.getXLenVT();
6849 MVT VT = Op.getSimpleValueType();
6850 SDLoc DL(Op);
6851 SDValue Res = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
6852 // We define our scalable vector types for lmul=1 to use a 64 bit known
6853 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
6854 // vscale as VLENB / 8.
6855 static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
6856 if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
6857 report_fatal_error("Support for VLEN==32 is incomplete.");
6858 // We assume VLENB is a multiple of 8. We manually choose the best shift
6859 // here because SimplifyDemandedBits isn't always able to simplify it.
6860 uint64_t Val = Op.getConstantOperandVal(0);
6861 if (isPowerOf2_64(Val)) {
6862 uint64_t Log2 = Log2_64(Val);
6863 if (Log2 < 3)
6864 Res = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6865 DAG.getConstant(3 - Log2, DL, VT));
6866 else if (Log2 > 3)
6867 Res = DAG.getNode(ISD::SHL, DL, XLenVT, Res,
6868 DAG.getConstant(Log2 - 3, DL, XLenVT));
6869 } else if ((Val % 8) == 0) {
6870 // If the multiplier is a multiple of 8, scale it down to avoid needing
6871 // to shift the VLENB value.
6872 Res = DAG.getNode(ISD::MUL, DL, XLenVT, Res,
6873 DAG.getConstant(Val / 8, DL, XLenVT));
6874 } else {
6875 SDValue VScale = DAG.getNode(ISD::SRL, DL, XLenVT, Res,
6876 DAG.getConstant(3, DL, XLenVT));
6877 Res = DAG.getNode(ISD::MUL, DL, XLenVT, VScale,
6878 DAG.getConstant(Val, DL, XLenVT));
6879 }
6880 return DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
6881 }
6882 case ISD::FPOWI: {
6883 // Custom promote f16 powi with illegal i32 integer type on RV64. Once
6884 // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
6885 if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
6886 Op.getOperand(1).getValueType() == MVT::i32) {
6887 SDLoc DL(Op);
6888 SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
6889 SDValue Powi =
6890 DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
6891 return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
6892 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6893 }
6894 return SDValue();
6895 }
6896 case ISD::FMAXIMUM:
6897 case ISD::FMINIMUM:
6898 if (isPromotedOpNeedingSplit(Op, Subtarget))
6899 return SplitVectorOp(Op, DAG);
6900 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
6901 case ISD::FP_EXTEND:
6902 case ISD::FP_ROUND:
6903 return lowerVectorFPExtendOrRoundLike(Op, DAG);
6906 return lowerStrictFPExtendOrRoundLike(Op, DAG);
6907 case ISD::SINT_TO_FP:
6908 case ISD::UINT_TO_FP:
6909 if (Op.getValueType().isVector() &&
6910 ((Op.getValueType().getScalarType() == MVT::f16 &&
6911 (Subtarget.hasVInstructionsF16Minimal() &&
6912 !Subtarget.hasVInstructionsF16())) ||
6913 Op.getValueType().getScalarType() == MVT::bf16)) {
6914 if (isPromotedOpNeedingSplit(Op, Subtarget))
6915 return SplitVectorOp(Op, DAG);
6916 // int -> f32
6917 SDLoc DL(Op);
6918 MVT NVT =
6919 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
6920 SDValue NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
6921 // f32 -> [b]f16
6922 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
6923 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
6924 }
6925 [[fallthrough]];
6926 case ISD::FP_TO_SINT:
6927 case ISD::FP_TO_UINT:
6928 if (SDValue Op1 = Op.getOperand(0);
6929 Op1.getValueType().isVector() &&
6930 ((Op1.getValueType().getScalarType() == MVT::f16 &&
6931 (Subtarget.hasVInstructionsF16Minimal() &&
6932 !Subtarget.hasVInstructionsF16())) ||
6933 Op1.getValueType().getScalarType() == MVT::bf16)) {
6934 if (isPromotedOpNeedingSplit(Op1, Subtarget))
6935 return SplitVectorOp(Op, DAG);
6936 // [b]f16 -> f32
6937 SDLoc DL(Op);
6938 MVT NVT = MVT::getVectorVT(MVT::f32,
6939 Op1.getValueType().getVectorElementCount());
6940 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
6941 // f32 -> int
6942 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), WidenVec);
6943 }
6944 [[fallthrough]];
6949 // RVV can only do fp<->int conversions to types half/double the size as
6950 // the source. We custom-lower any conversions that do two hops into
6951 // sequences.
6952 MVT VT = Op.getSimpleValueType();
6953 if (VT.isScalarInteger())
6954 return lowerFP_TO_INT(Op, DAG, Subtarget);
6955 bool IsStrict = Op->isStrictFPOpcode();
6956 SDValue Src = Op.getOperand(0 + IsStrict);
6957 MVT SrcVT = Src.getSimpleValueType();
6958 if (SrcVT.isScalarInteger())
6959 return lowerINT_TO_FP(Op, DAG, Subtarget);
6960 if (!VT.isVector())
6961 return Op;
6962 SDLoc DL(Op);
6963 MVT EltVT = VT.getVectorElementType();
6964 MVT SrcEltVT = SrcVT.getVectorElementType();
6965 unsigned EltSize = EltVT.getSizeInBits();
6966 unsigned SrcEltSize = SrcEltVT.getSizeInBits();
6967 assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
6968 "Unexpected vector element types");
6969
6970 bool IsInt2FP = SrcEltVT.isInteger();
6971 // Widening conversions
6972 if (EltSize > (2 * SrcEltSize)) {
6973 if (IsInt2FP) {
6974 // Do a regular integer sign/zero extension then convert to float.
6975 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
6977 unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
6978 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
6981 SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
6982 if (IsStrict)
6983 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
6984 Op.getOperand(0), Ext);
6985 return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
6986 }
6987 // FP2Int
6988 assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
6989 // Do one doubling fp_extend then complete the operation by converting
6990 // to int.
6991 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
6992 if (IsStrict) {
6993 auto [FExt, Chain] =
6994 DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
6995 return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
6996 }
6997 SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
6998 return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
6999 }
7000
7001 // Narrowing conversions
7002 if (SrcEltSize > (2 * EltSize)) {
7003 if (IsInt2FP) {
7004 // One narrowing int_to_fp, then an fp_round.
7005 assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
7006 MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
7007 if (IsStrict) {
7008 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
7009 DAG.getVTList(InterimFVT, MVT::Other),
7010 Op.getOperand(0), Src);
7011 SDValue Chain = Int2FP.getValue(1);
7012 return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
7013 }
7014 SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
7015 return DAG.getFPExtendOrRound(Int2FP, DL, VT);
7016 }
7017 // FP2Int
7018 // One narrowing fp_to_int, then truncate the integer. If the float isn't
7019 // representable by the integer, the result is poison.
7020 MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
7022 if (IsStrict) {
7023 SDValue FP2Int =
7024 DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
7025 Op.getOperand(0), Src);
7026 SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7027 return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
7028 }
7029 SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
7030 return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
7031 }
7032
7033 // Scalable vectors can exit here. Patterns will handle equally-sized
7034 // conversions halving/doubling ones.
7035 if (!VT.isFixedLengthVector())
7036 return Op;
7037
7038 // For fixed-length vectors we lower to a custom "VL" node.
7039 unsigned RVVOpc = 0;
7040 switch (Op.getOpcode()) {
7041 default:
7042 llvm_unreachable("Impossible opcode");
7043 case ISD::FP_TO_SINT:
7045 break;
7046 case ISD::FP_TO_UINT:
7048 break;
7049 case ISD::SINT_TO_FP:
7050 RVVOpc = RISCVISD::SINT_TO_FP_VL;
7051 break;
7052 case ISD::UINT_TO_FP:
7053 RVVOpc = RISCVISD::UINT_TO_FP_VL;
7054 break;
7057 break;
7060 break;
7063 break;
7066 break;
7067 }
7068
7069 MVT ContainerVT = getContainerForFixedLengthVector(VT);
7070 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
7071 assert(ContainerVT.getVectorElementCount() == SrcContainerVT.getVectorElementCount() &&
7072 "Expected same element count");
7073
7074 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
7075
7076 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
7077 if (IsStrict) {
7078 Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
7079 Op.getOperand(0), Src, Mask, VL);
7080 SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
7081 return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
7082 }
7083 Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
7084 return convertFromScalableVector(VT, Src, DAG, Subtarget);
7085 }
7088 return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
7089 case ISD::FP_TO_BF16: {
7090 // Custom lower to ensure the libcall return is passed in an FPR on hard
7091 // float ABIs.
7092 assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
7093 SDLoc DL(Op);
7094 MakeLibCallOptions CallOptions;
7095 RTLIB::Libcall LC =
7096 RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
7097 SDValue Res =
7098 makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
7099 if (Subtarget.is64Bit())
7100 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7101 return DAG.getBitcast(MVT::i32, Res);
7102 }
7103 case ISD::BF16_TO_FP: {
7104 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
7105 MVT VT = Op.getSimpleValueType();
7106 SDLoc DL(Op);
7107 Op = DAG.getNode(
7108 ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
7109 DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
7110 SDValue Res = Subtarget.is64Bit()
7111 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
7112 : DAG.getBitcast(MVT::f32, Op);
7113 // fp_extend if the target VT is bigger than f32.
7114 if (VT != MVT::f32)
7115 return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
7116 return Res;
7117 }
7119 case ISD::FP_TO_FP16: {
7120 // Custom lower to ensure the libcall return is passed in an FPR on hard
7121 // float ABIs.
7122 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7123 SDLoc DL(Op);
7124 MakeLibCallOptions CallOptions;
7125 bool IsStrict = Op->isStrictFPOpcode();
7126 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7127 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7128 RTLIB::Libcall LC = RTLIB::getFPROUND(Op0.getValueType(), MVT::f16);
7129 SDValue Res;
7130 std::tie(Res, Chain) =
7131 makeLibCall(DAG, LC, MVT::f32, Op0, CallOptions, DL, Chain);
7132 if (Subtarget.is64Bit())
7133 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
7134 SDValue Result = DAG.getBitcast(MVT::i32, IsStrict ? Res.getValue(0) : Res);
7135 if (IsStrict)
7136 return DAG.getMergeValues({Result, Chain}, DL);
7137 return Result;
7138 }
7140 case ISD::FP16_TO_FP: {
7141 // Custom lower to ensure the libcall argument is passed in an FPR on hard
7142 // float ABIs.
7143 assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
7144 SDLoc DL(Op);
7145 MakeLibCallOptions CallOptions;
7146 bool IsStrict = Op->isStrictFPOpcode();
7147 SDValue Op0 = IsStrict ? Op.getOperand(1) : Op.getOperand(0);
7148 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7149 SDValue Arg = Subtarget.is64Bit()
7150 ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op0)
7151 : DAG.getBitcast(MVT::f32, Op0);
7152 SDValue Res;
7153 std::tie(Res, Chain) = makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg,
7154 CallOptions, DL, Chain);
7155 if (IsStrict)
7156 return DAG.getMergeValues({Res, Chain}, DL);
7157 return Res;
7158 }
7159 case ISD::FTRUNC:
7160 case ISD::FCEIL:
7161 case ISD::FFLOOR:
7162 case ISD::FNEARBYINT:
7163 case ISD::FRINT:
7164 case ISD::FROUND:
7165 case ISD::FROUNDEVEN:
7166 if (isPromotedOpNeedingSplit(Op, Subtarget))
7167 return SplitVectorOp(Op, DAG);
7168 return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7169 case ISD::LRINT:
7170 case ISD::LLRINT:
7171 if (Op.getValueType().isVector())
7172 return lowerVectorXRINT(Op, DAG, Subtarget);
7173 [[fallthrough]];
7174 case ISD::LROUND:
7175 case ISD::LLROUND: {
7176 assert(Op.getOperand(0).getValueType() == MVT::f16 &&
7177 "Unexpected custom legalisation");
7178 SDLoc DL(Op);
7179 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
7180 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), Ext);
7181 }
7182 case ISD::STRICT_LRINT:
7183 case ISD::STRICT_LLRINT:
7184 case ISD::STRICT_LROUND:
7185 case ISD::STRICT_LLROUND: {
7186 assert(Op.getOperand(1).getValueType() == MVT::f16 &&
7187 "Unexpected custom legalisation");
7188 SDLoc DL(Op);
7189 SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
7190 {Op.getOperand(0), Op.getOperand(1)});
7191 return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
7192 {Ext.getValue(1), Ext.getValue(0)});
7193 }
7194 case ISD::VECREDUCE_ADD:
7199 return lowerVECREDUCE(Op, DAG);
7200 case ISD::VECREDUCE_AND:
7201 case ISD::VECREDUCE_OR:
7202 case ISD::VECREDUCE_XOR:
7203 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
7204 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
7205 return lowerVECREDUCE(Op, DAG);
7212 return lowerFPVECREDUCE(Op, DAG);
7213 case ISD::VP_REDUCE_ADD:
7214 case ISD::VP_REDUCE_UMAX:
7215 case ISD::VP_REDUCE_SMAX:
7216 case ISD::VP_REDUCE_UMIN:
7217 case ISD::VP_REDUCE_SMIN:
7218 case ISD::VP_REDUCE_FADD:
7219 case ISD::VP_REDUCE_SEQ_FADD:
7220 case ISD::VP_REDUCE_FMIN:
7221 case ISD::VP_REDUCE_FMAX:
7222 case ISD::VP_REDUCE_FMINIMUM:
7223 case ISD::VP_REDUCE_FMAXIMUM:
7224 if (isPromotedOpNeedingSplit(Op.getOperand(1), Subtarget))
7225 return SplitVectorReductionOp(Op, DAG);
7226 return lowerVPREDUCE(Op, DAG);
7227 case ISD::VP_REDUCE_AND:
7228 case ISD::VP_REDUCE_OR:
7229 case ISD::VP_REDUCE_XOR:
7230 if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
7231 return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
7232 return lowerVPREDUCE(Op, DAG);
7233 case ISD::VP_CTTZ_ELTS:
7234 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF:
7235 return lowerVPCttzElements(Op, DAG);
7236 case ISD::UNDEF: {
7237 MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
7238 return convertFromScalableVector(Op.getSimpleValueType(),
7239 DAG.getUNDEF(ContainerVT), DAG, Subtarget);
7240 }
7242 return lowerINSERT_SUBVECTOR(Op, DAG);
7244 return lowerEXTRACT_SUBVECTOR(Op, DAG);
7246 return lowerVECTOR_DEINTERLEAVE(Op, DAG);
7248 return lowerVECTOR_INTERLEAVE(Op, DAG);
7249 case ISD::STEP_VECTOR:
7250 return lowerSTEP_VECTOR(Op, DAG);
7252 return lowerVECTOR_REVERSE(Op, DAG);
7253 case ISD::VECTOR_SPLICE:
7254 return lowerVECTOR_SPLICE(Op, DAG);
7255 case ISD::BUILD_VECTOR:
7256 return lowerBUILD_VECTOR(Op, DAG, Subtarget);
7257 case ISD::SPLAT_VECTOR: {
7258 MVT VT = Op.getSimpleValueType();
7259 MVT EltVT = VT.getVectorElementType();
7260 if ((EltVT == MVT::f16 && !Subtarget.hasStdExtZvfh()) ||
7261 EltVT == MVT::bf16) {
7262 SDLoc DL(Op);
7263 SDValue Elt;
7264 if ((EltVT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) ||
7265 (EltVT == MVT::f16 && Subtarget.hasStdExtZfhmin()))
7266 Elt = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(),
7267 Op.getOperand(0));
7268 else
7269 Elt = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op.getOperand(0));
7270 MVT IVT = VT.changeVectorElementType(MVT::i16);
7271 return DAG.getNode(ISD::BITCAST, DL, VT,
7272 DAG.getNode(ISD::SPLAT_VECTOR, DL, IVT, Elt));
7273 }
7274
7275 if (EltVT == MVT::i1)
7276 return lowerVectorMaskSplat(Op, DAG);
7277 return SDValue();
7278 }
7280 return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
7281 case ISD::CONCAT_VECTORS: {
7282 // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
7283 // better than going through the stack, as the default expansion does.
7284 SDLoc DL(Op);
7285 MVT VT = Op.getSimpleValueType();
7286 MVT ContainerVT = VT;
7287 if (VT.isFixedLengthVector())
7288 ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget);
7289
7290 // Recursively split concat_vectors with more than 2 operands:
7291 //
7292 // concat_vector op1, op2, op3, op4
7293 // ->
7294 // concat_vector (concat_vector op1, op2), (concat_vector op3, op4)
7295 //
7296 // This reduces the length of the chain of vslideups and allows us to
7297 // perform the vslideups at a smaller LMUL, limited to MF2.
7298 if (Op.getNumOperands() > 2 &&
7299 ContainerVT.bitsGE(getLMUL1VT(ContainerVT))) {
7300 MVT HalfVT = VT.getHalfNumVectorElementsVT();
7302 size_t HalfNumOps = Op.getNumOperands() / 2;
7303 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7304 Op->ops().take_front(HalfNumOps));
7305 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, HalfVT,
7306 Op->ops().drop_front(HalfNumOps));
7307 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7308 }
7309
7310 unsigned NumOpElts =
7311 Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
7312 SDValue Vec = DAG.getUNDEF(VT);
7313 for (const auto &OpIdx : enumerate(Op->ops())) {
7314 SDValue SubVec = OpIdx.value();
7315 // Don't insert undef subvectors.
7316 if (SubVec.isUndef())
7317 continue;
7318 Vec =
7319 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
7320 DAG.getVectorIdxConstant(OpIdx.index() * NumOpElts, DL));
7321 }
7322 return Vec;
7323 }
7324 case ISD::LOAD: {
7325 auto *Load = cast<LoadSDNode>(Op);
7326 EVT VecTy = Load->getMemoryVT();
7327 // Handle normal vector tuple load.
7328 if (VecTy.isRISCVVectorTuple()) {
7329 SDLoc DL(Op);
7330 MVT XLenVT = Subtarget.getXLenVT();
7331 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7332 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7333 unsigned NumElts = Sz / (NF * 8);
7334 int Log2LMUL = Log2_64(NumElts) - 3;
7335
7336 auto Flag = SDNodeFlags();
7337 Flag.setNoUnsignedWrap(true);
7338 SDValue Ret = DAG.getUNDEF(VecTy);
7339 SDValue BasePtr = Load->getBasePtr();
7340 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7341 VROffset =
7342 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7343 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7344 SmallVector<SDValue, 8> OutChains;
7345
7346 // Load NF vector registers and combine them to a vector tuple.
7347 for (unsigned i = 0; i < NF; ++i) {
7348 SDValue LoadVal = DAG.getLoad(
7349 MVT::getScalableVectorVT(MVT::i8, NumElts), DL, Load->getChain(),
7350 BasePtr, MachinePointerInfo(Load->getAddressSpace()), Align(8));
7351 OutChains.push_back(LoadVal.getValue(1));
7352 Ret = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VecTy, Ret, LoadVal,
7353 DAG.getVectorIdxConstant(i, DL));
7354 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7355 }
7356 return DAG.getMergeValues(
7357 {Ret, DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains)}, DL);
7358 }
7359
7360 if (auto V = expandUnalignedRVVLoad(Op, DAG))
7361 return V;
7362 if (Op.getValueType().isFixedLengthVector())
7363 return lowerFixedLengthVectorLoadToRVV(Op, DAG);
7364 return Op;
7365 }
7366 case ISD::STORE: {
7367 auto *Store = cast<StoreSDNode>(Op);
7368 SDValue StoredVal = Store->getValue();
7369 EVT VecTy = StoredVal.getValueType();
7370 // Handle normal vector tuple store.
7371 if (VecTy.isRISCVVectorTuple()) {
7372 SDLoc DL(Op);
7373 MVT XLenVT = Subtarget.getXLenVT();
7374 unsigned NF = VecTy.getRISCVVectorTupleNumFields();
7375 unsigned Sz = VecTy.getSizeInBits().getKnownMinValue();
7376 unsigned NumElts = Sz / (NF * 8);
7377 int Log2LMUL = Log2_64(NumElts) - 3;
7378
7379 auto Flag = SDNodeFlags();
7380 Flag.setNoUnsignedWrap(true);
7381 SDValue Ret;
7382 SDValue Chain = Store->getChain();
7383 SDValue BasePtr = Store->getBasePtr();
7384 SDValue VROffset = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT);
7385 VROffset =
7386 DAG.getNode(ISD::SHL, DL, XLenVT, VROffset,
7387 DAG.getConstant(std::max(Log2LMUL, 0), DL, XLenVT));
7388
7389 // Extract subregisters in a vector tuple and store them individually.
7390 for (unsigned i = 0; i < NF; ++i) {
7391 auto Extract = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL,
7392 MVT::getScalableVectorVT(MVT::i8, NumElts),
7393 StoredVal, DAG.getVectorIdxConstant(i, DL));
7394 Ret = DAG.getStore(Chain, DL, Extract, BasePtr,
7395 MachinePointerInfo(Store->getAddressSpace()),
7396 Store->getOriginalAlign(),
7397 Store->getMemOperand()->getFlags());
7398 Chain = Ret.getValue(0);
7399 BasePtr = DAG.getNode(ISD::ADD, DL, XLenVT, BasePtr, VROffset, Flag);
7400 }
7401 return Ret;
7402 }
7403
7404 if (auto V = expandUnalignedRVVStore(Op, DAG))
7405 return V;
7406 if (Op.getOperand(1).getValueType().isFixedLengthVector())
7407 return lowerFixedLengthVectorStoreToRVV(Op, DAG);
7408 return Op;
7409 }
7410 case ISD::MLOAD:
7411 case ISD::VP_LOAD:
7412 return lowerMaskedLoad(Op, DAG);
7413 case ISD::MSTORE:
7414 case ISD::VP_STORE:
7415 return lowerMaskedStore(Op, DAG);
7417 return lowerVectorCompress(Op, DAG);
7418 case ISD::SELECT_CC: {
7419 // This occurs because we custom legalize SETGT and SETUGT for setcc. That
7420 // causes LegalizeDAG to think we need to custom legalize select_cc. Expand
7421 // into separate SETCC+SELECT just like LegalizeDAG.
7422 SDValue Tmp1 = Op.getOperand(0);
7423 SDValue Tmp2 = Op.getOperand(1);
7424 SDValue True = Op.getOperand(2);
7425 SDValue False = Op.getOperand(3);
7426 EVT VT = Op.getValueType();
7427 SDValue CC = Op.getOperand(4);
7428 EVT CmpVT = Tmp1.getValueType();
7429 EVT CCVT =
7430 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
7431 SDLoc DL(Op);
7432 SDValue Cond =
7433 DAG.getNode(ISD::SETCC, DL, CCVT, Tmp1, Tmp2, CC, Op->getFlags());
7434 return DAG.getSelect(DL, VT, Cond, True, False);
7435 }
7436 case ISD::SETCC: {
7437 MVT OpVT = Op.getOperand(0).getSimpleValueType();
7438 if (OpVT.isScalarInteger()) {
7439 MVT VT = Op.getSimpleValueType();
7440 SDValue LHS = Op.getOperand(0);
7441 SDValue RHS = Op.getOperand(1);
7442 ISD::CondCode CCVal = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7443 assert((CCVal == ISD::SETGT || CCVal == ISD::SETUGT) &&
7444 "Unexpected CondCode");
7445
7446 SDLoc DL(Op);
7447
7448 // If the RHS is a constant in the range [-2049, 0) or (0, 2046], we can
7449 // convert this to the equivalent of (set(u)ge X, C+1) by using
7450 // (xori (slti(u) X, C+1), 1). This avoids materializing a small constant
7451 // in a register.
7452 if (isa<ConstantSDNode>(RHS)) {
7453 int64_t Imm = cast<ConstantSDNode>(RHS)->getSExtValue();
7454 if (Imm != 0 && isInt<12>((uint64_t)Imm + 1)) {
7455 // If this is an unsigned compare and the constant is -1, incrementing
7456 // the constant would change behavior. The result should be false.
7457 if (CCVal == ISD::SETUGT && Imm == -1)
7458 return DAG.getConstant(0, DL, VT);
7459 // Using getSetCCSwappedOperands will convert SET(U)GT->SET(U)LT.
7460 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7461 SDValue SetCC = DAG.getSetCC(
7462 DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
7463 return DAG.getLogicalNOT(DL, SetCC, VT);
7464 }
7465 }
7466
7467 // Not a constant we could handle, swap the operands and condition code to
7468 // SETLT/SETULT.
7469 CCVal = ISD::getSetCCSwappedOperands(CCVal);
7470 return DAG.getSetCC(DL, VT, RHS, LHS, CCVal);
7471 }
7472
7473 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7474 return SplitVectorOp(Op, DAG);
7475
7476 return lowerFixedLengthVectorSetccToRVV(Op, DAG);
7477 }
7478 case ISD::ADD:
7479 case ISD::SUB:
7480 case ISD::MUL:
7481 case ISD::MULHS:
7482 case ISD::MULHU:
7483 case ISD::AND:
7484 case ISD::OR:
7485 case ISD::XOR:
7486 case ISD::SDIV:
7487 case ISD::SREM:
7488 case ISD::UDIV:
7489 case ISD::UREM:
7490 case ISD::BSWAP:
7491 case ISD::CTPOP:
7492 return lowerToScalableOp(Op, DAG);
7493 case ISD::SHL:
7494 case ISD::SRA:
7495 case ISD::SRL:
7496 if (Op.getSimpleValueType().isFixedLengthVector())
7497 return lowerToScalableOp(Op, DAG);
7498 // This can be called for an i32 shift amount that needs to be promoted.
7499 assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
7500 "Unexpected custom legalisation");
7501 return SDValue();
7502 case ISD::FABS:
7503 case ISD::FNEG:
7504 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7505 return lowerFABSorFNEG(Op, DAG, Subtarget);
7506 [[fallthrough]];
7507 case ISD::FADD:
7508 case ISD::FSUB:
7509 case ISD::FMUL:
7510 case ISD::FDIV:
7511 case ISD::FSQRT:
7512 case ISD::FMA:
7513 case ISD::FMINNUM:
7514 case ISD::FMAXNUM:
7515 if (isPromotedOpNeedingSplit(Op, Subtarget))
7516 return SplitVectorOp(Op, DAG);
7517 [[fallthrough]];
7518 case ISD::AVGFLOORS:
7519 case ISD::AVGFLOORU:
7520 case ISD::AVGCEILS:
7521 case ISD::AVGCEILU:
7522 case ISD::SMIN:
7523 case ISD::SMAX:
7524 case ISD::UMIN:
7525 case ISD::UMAX:
7526 case ISD::UADDSAT:
7527 case ISD::USUBSAT:
7528 case ISD::SADDSAT:
7529 case ISD::SSUBSAT:
7530 return lowerToScalableOp(Op, DAG);
7531 case ISD::ABDS:
7532 case ISD::ABDU: {
7533 SDLoc dl(Op);
7534 EVT VT = Op->getValueType(0);
7535 SDValue LHS = DAG.getFreeze(Op->getOperand(0));
7536 SDValue RHS = DAG.getFreeze(Op->getOperand(1));
7537 bool IsSigned = Op->getOpcode() == ISD::ABDS;
7538
7539 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
7540 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
7541 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
7542 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
7543 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
7544 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
7545 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
7546 }
7547 case ISD::ABS:
7548 case ISD::VP_ABS:
7549 return lowerABS(Op, DAG);
7550 case ISD::CTLZ:
7552 case ISD::CTTZ:
7554 if (Subtarget.hasStdExtZvbb())
7555 return lowerToScalableOp(Op, DAG);
7556 assert(Op.getOpcode() != ISD::CTTZ);
7557 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7558 case ISD::VSELECT:
7559 return lowerFixedLengthVectorSelectToRVV(Op, DAG);
7560 case ISD::FCOPYSIGN:
7561 if (Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16)
7562 return lowerFCOPYSIGN(Op, DAG, Subtarget);
7563 if (isPromotedOpNeedingSplit(Op, Subtarget))
7564 return SplitVectorOp(Op, DAG);
7565 return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
7566 case ISD::STRICT_FADD:
7567 case ISD::STRICT_FSUB:
7568 case ISD::STRICT_FMUL:
7569 case ISD::STRICT_FDIV:
7570 case ISD::STRICT_FSQRT:
7571 case ISD::STRICT_FMA:
7572 if (isPromotedOpNeedingSplit(Op, Subtarget))
7573 return SplitStrictFPVectorOp(Op, DAG);
7574 return lowerToScalableOp(Op, DAG);
7575 case ISD::STRICT_FSETCC:
7577 return lowerVectorStrictFSetcc(Op, DAG);
7578 case ISD::STRICT_FCEIL:
7579 case ISD::STRICT_FRINT:
7580 case ISD::STRICT_FFLOOR:
7581 case ISD::STRICT_FTRUNC:
7583 case ISD::STRICT_FROUND:
7585 return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7586 case ISD::MGATHER:
7587 case ISD::VP_GATHER:
7588 return lowerMaskedGather(Op, DAG);
7589 case ISD::MSCATTER:
7590 case ISD::VP_SCATTER:
7591 return lowerMaskedScatter(Op, DAG);
7592 case ISD::GET_ROUNDING:
7593 return lowerGET_ROUNDING(Op, DAG);
7594 case ISD::SET_ROUNDING:
7595 return lowerSET_ROUNDING(Op, DAG);
7596 case ISD::EH_DWARF_CFA:
7597 return lowerEH_DWARF_CFA(Op, DAG);
7598 case ISD::VP_MERGE:
7599 if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
7600 return lowerVPMergeMask(Op, DAG);
7601 [[fallthrough]];
7602 case ISD::VP_SELECT:
7603 case ISD::VP_ADD:
7604 case ISD::VP_SUB:
7605 case ISD::VP_MUL:
7606 case ISD::VP_SDIV:
7607 case ISD::VP_UDIV:
7608 case ISD::VP_SREM:
7609 case ISD::VP_UREM:
7610 case ISD::VP_UADDSAT:
7611 case ISD::VP_USUBSAT:
7612 case ISD::VP_SADDSAT:
7613 case ISD::VP_SSUBSAT:
7614 case ISD::VP_LRINT:
7615 case ISD::VP_LLRINT:
7616 return lowerVPOp(Op, DAG);
7617 case ISD::VP_AND:
7618 case ISD::VP_OR:
7619 case ISD::VP_XOR:
7620 return lowerLogicVPOp(Op, DAG);
7621 case ISD::VP_FADD:
7622 case ISD::VP_FSUB:
7623 case ISD::VP_FMUL:
7624 case ISD::VP_FDIV:
7625 case ISD::VP_FNEG:
7626 case ISD::VP_FABS:
7627 case ISD::VP_SQRT:
7628 case ISD::VP_FMA:
7629 case ISD::VP_FMINNUM:
7630 case ISD::VP_FMAXNUM:
7631 case ISD::VP_FCOPYSIGN:
7632 if (isPromotedOpNeedingSplit(Op, Subtarget))
7633 return SplitVPOp(Op, DAG);
7634 [[fallthrough]];
7635 case ISD::VP_SRA:
7636 case ISD::VP_SRL:
7637 case ISD::VP_SHL:
7638 return lowerVPOp(Op, DAG);
7639 case ISD::VP_IS_FPCLASS:
7640 return LowerIS_FPCLASS(Op, DAG);
7641 case ISD::VP_SIGN_EXTEND:
7642 case ISD::VP_ZERO_EXTEND:
7643 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7644 return lowerVPExtMaskOp(Op, DAG);
7645 return lowerVPOp(Op, DAG);
7646 case ISD::VP_TRUNCATE:
7647 return lowerVectorTruncLike(Op, DAG);
7648 case ISD::VP_FP_EXTEND:
7649 case ISD::VP_FP_ROUND:
7650 return lowerVectorFPExtendOrRoundLike(Op, DAG);
7651 case ISD::VP_SINT_TO_FP:
7652 case ISD::VP_UINT_TO_FP:
7653 if (Op.getValueType().isVector() &&
7654 ((Op.getValueType().getScalarType() == MVT::f16 &&
7655 (Subtarget.hasVInstructionsF16Minimal() &&
7656 !Subtarget.hasVInstructionsF16())) ||
7657 Op.getValueType().getScalarType() == MVT::bf16)) {
7658 if (isPromotedOpNeedingSplit(Op, Subtarget))
7659 return SplitVectorOp(Op, DAG);
7660 // int -> f32
7661 SDLoc DL(Op);
7662 MVT NVT =
7663 MVT::getVectorVT(MVT::f32, Op.getValueType().getVectorElementCount());
7664 auto NC = DAG.getNode(Op.getOpcode(), DL, NVT, Op->ops());
7665 // f32 -> [b]f16
7666 return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(), NC,
7667 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
7668 }
7669 [[fallthrough]];
7670 case ISD::VP_FP_TO_SINT:
7671 case ISD::VP_FP_TO_UINT:
7672 if (SDValue Op1 = Op.getOperand(0);
7673 Op1.getValueType().isVector() &&
7674 ((Op1.getValueType().getScalarType() == MVT::f16 &&
7675 (Subtarget.hasVInstructionsF16Minimal() &&
7676 !Subtarget.hasVInstructionsF16())) ||
7677 Op1.getValueType().getScalarType() == MVT::bf16)) {
7678 if (isPromotedOpNeedingSplit(Op1, Subtarget))
7679 return SplitVectorOp(Op, DAG);
7680 // [b]f16 -> f32
7681 SDLoc DL(Op);
7682 MVT NVT = MVT::getVectorVT(MVT::f32,
7683 Op1.getValueType().getVectorElementCount());
7684 SDValue WidenVec = DAG.getNode(ISD::FP_EXTEND, DL, NVT, Op1);
7685 // f32 -> int
7686 return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
7687 {WidenVec, Op.getOperand(1), Op.getOperand(2)});
7688 }
7689 return lowerVPFPIntConvOp(Op, DAG);
7690 case ISD::VP_SETCC:
7691 if (isPromotedOpNeedingSplit(Op.getOperand(0), Subtarget))
7692 return SplitVPOp(Op, DAG);
7693 if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
7694 return lowerVPSetCCMaskOp(Op, DAG);
7695 [[fallthrough]];
7696 case ISD::VP_SMIN:
7697 case ISD::VP_SMAX:
7698 case ISD::VP_UMIN:
7699 case ISD::VP_UMAX:
7700 case ISD::VP_BITREVERSE:
7701 case ISD::VP_BSWAP:
7702 return lowerVPOp(Op, DAG);
7703 case ISD::VP_CTLZ:
7704 case ISD::VP_CTLZ_ZERO_UNDEF:
7705 if (Subtarget.hasStdExtZvbb())
7706 return lowerVPOp(Op, DAG);
7707 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7708 case ISD::VP_CTTZ:
7709 case ISD::VP_CTTZ_ZERO_UNDEF:
7710 if (Subtarget.hasStdExtZvbb())
7711 return lowerVPOp(Op, DAG);
7712 return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
7713 case ISD::VP_CTPOP:
7714 return lowerVPOp(Op, DAG);
7715 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
7716 return lowerVPStridedLoad(Op, DAG);
7717 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7718 return lowerVPStridedStore(Op, DAG);
7719 case ISD::VP_FCEIL:
7720 case ISD::VP_FFLOOR:
7721 case ISD::VP_FRINT:
7722 case ISD::VP_FNEARBYINT:
7723 case ISD::VP_FROUND:
7724 case ISD::VP_FROUNDEVEN:
7725 case ISD::VP_FROUNDTOZERO:
7726 if (isPromotedOpNeedingSplit(Op, Subtarget))
7727 return SplitVPOp(Op, DAG);
7728 return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
7729 case ISD::VP_FMAXIMUM:
7730 case ISD::VP_FMINIMUM:
7731 if (isPromotedOpNeedingSplit(Op, Subtarget))
7732 return SplitVPOp(Op, DAG);
7733 return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
7734 case ISD::EXPERIMENTAL_VP_SPLICE:
7735 return lowerVPSpliceExperimental(Op, DAG);
7736 case ISD::EXPERIMENTAL_VP_REVERSE:
7737 return lowerVPReverseExperimental(Op, DAG);
7738 case ISD::EXPERIMENTAL_VP_SPLAT:
7739 return lowerVPSplatExperimental(Op, DAG);
7740 case ISD::CLEAR_CACHE: {
7741 assert(getTargetMachine().getTargetTriple().isOSLinux() &&
7742 "llvm.clear_cache only needs custom lower on Linux targets");
7743 SDLoc DL(Op);
7744 SDValue Flags = DAG.getConstant(0, DL, Subtarget.getXLenVT());
7745 return emitFlushICache(DAG, Op.getOperand(0), Op.getOperand(1),
7746 Op.getOperand(2), Flags, DL);
7747 }
7749 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7751 return lowerINIT_TRAMPOLINE(Op, DAG);
7753 return lowerADJUST_TRAMPOLINE(Op, DAG);
7754 }
7755}
7756
7757SDValue RISCVTargetLowering::emitFlushICache(SelectionDAG &DAG, SDValue InChain,
7758 SDValue Start, SDValue End,
7759 SDValue Flags, SDLoc DL) const {
7760 MakeLibCallOptions CallOptions;
7761 std::pair<SDValue, SDValue> CallResult =
7762 makeLibCall(DAG, RTLIB::RISCV_FLUSH_ICACHE, MVT::isVoid,
7763 {Start, End, Flags}, CallOptions, DL, InChain);
7764
7765 // This function returns void so only the out chain matters.
7766 return CallResult.second;
7767}
7768
7769SDValue RISCVTargetLowering::lowerINIT_TRAMPOLINE(SDValue Op,
7770 SelectionDAG &DAG) const {
7771 if (!Subtarget.is64Bit())
7772 llvm::report_fatal_error("Trampolines only implemented for RV64");
7773
7774 // Create an MCCodeEmitter to encode instructions.
7776 assert(TLO);
7777 MCContext &MCCtx = TLO->getContext();
7778
7779 std::unique_ptr<MCCodeEmitter> CodeEmitter(
7780 createRISCVMCCodeEmitter(*getTargetMachine().getMCInstrInfo(), MCCtx));
7781
7782 SDValue Root = Op.getOperand(0);
7783 SDValue Trmp = Op.getOperand(1); // trampoline
7784 SDLoc dl(Op);
7785
7786 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7787
7788 // We store in the trampoline buffer the following instructions and data.
7789 // Offset:
7790 // 0: auipc t2, 0
7791 // 4: ld t0, 24(t2)
7792 // 8: ld t2, 16(t2)
7793 // 12: jalr t0
7794 // 16: <StaticChainOffset>
7795 // 24: <FunctionAddressOffset>
7796 // 32:
7797
7798 constexpr unsigned StaticChainOffset = 16;
7799 constexpr unsigned FunctionAddressOffset = 24;
7800
7802 assert(STI);
7803 auto GetEncoding = [&](const MCInst &MC) {
7806 CodeEmitter->encodeInstruction(MC, CB, Fixups, *STI);
7807 uint32_t Encoding = support::endian::read32le(CB.data());
7808 return Encoding;
7809 };
7810
7811 SDValue OutChains[6];
7812
7813 uint32_t Encodings[] = {
7814 // auipc t2, 0
7815 // Loads the current PC into t2.
7816 GetEncoding(MCInstBuilder(RISCV::AUIPC).addReg(RISCV::X7).addImm(0)),
7817 // ld t0, 24(t2)
7818 // Loads the function address into t0. Note that we are using offsets
7819 // pc-relative to the first instruction of the trampoline.
7820 GetEncoding(
7821 MCInstBuilder(RISCV::LD).addReg(RISCV::X5).addReg(RISCV::X7).addImm(
7822 FunctionAddressOffset)),
7823 // ld t2, 16(t2)
7824 // Load the value of the static chain.
7825 GetEncoding(
7826 MCInstBuilder(RISCV::LD).addReg(RISCV::X7).addReg(RISCV::X7).addImm(
7827 StaticChainOffset)),
7828 // jalr t0
7829 // Jump to the function.
7830 GetEncoding(MCInstBuilder(RISCV::JALR)
7831 .addReg(RISCV::X0)
7832 .addReg(RISCV::X5)
7833 .addImm(0))};
7834
7835 // Store encoded instructions.
7836 for (auto [Idx, Encoding] : llvm::enumerate(Encodings)) {
7837 SDValue Addr = Idx > 0 ? DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7838 DAG.getConstant(Idx * 4, dl, MVT::i64))
7839 : Trmp;
7840 OutChains[Idx] = DAG.getTruncStore(
7841 Root, dl, DAG.getConstant(Encoding, dl, MVT::i64), Addr,
7842 MachinePointerInfo(TrmpAddr, Idx * 4), MVT::i32);
7843 }
7844
7845 // Now store the variable part of the trampoline.
7846 SDValue FunctionAddress = Op.getOperand(2);
7847 SDValue StaticChain = Op.getOperand(3);
7848
7849 // Store the given static chain and function pointer in the trampoline buffer.
7850 struct OffsetValuePair {
7851 const unsigned Offset;
7852 const SDValue Value;
7853 SDValue Addr = SDValue(); // Used to cache the address.
7854 } OffsetValues[] = {
7855 {StaticChainOffset, StaticChain},
7856 {FunctionAddressOffset, FunctionAddress},
7857 };
7858 for (auto [Idx, OffsetValue] : llvm::enumerate(OffsetValues)) {
7859 SDValue Addr =
7860 DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
7861 DAG.getConstant(OffsetValue.Offset, dl, MVT::i64));
7862 OffsetValue.Addr = Addr;
7863 OutChains[Idx + 4] =
7864 DAG.getStore(Root, dl, OffsetValue.Value, Addr,
7865 MachinePointerInfo(TrmpAddr, OffsetValue.Offset));
7866 }
7867
7868 SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
7869
7870 // The end of instructions of trampoline is the same as the static chain
7871 // address that we computed earlier.
7872 SDValue EndOfTrmp = OffsetValues[0].Addr;
7873
7874 // Call clear cache on the trampoline instructions.
7875 SDValue Chain = DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
7876 Trmp, EndOfTrmp);
7877
7878 return Chain;
7879}
7880
7881SDValue RISCVTargetLowering::lowerADJUST_TRAMPOLINE(SDValue Op,
7882 SelectionDAG &DAG) const {
7883 if (!Subtarget.is64Bit())
7884 llvm::report_fatal_error("Trampolines only implemented for RV64");
7885
7886 return Op.getOperand(0);
7887}
7888
7890 SelectionDAG &DAG, unsigned Flags) {
7891 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
7892}
7893
7895 SelectionDAG &DAG, unsigned Flags) {
7896 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
7897 Flags);
7898}
7899
7901 SelectionDAG &DAG, unsigned Flags) {
7902 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
7903 N->getOffset(), Flags);
7904}
7905
7907 SelectionDAG &DAG, unsigned Flags) {
7908 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
7909}
7910
7912 EVT Ty, SelectionDAG &DAG) {
7914 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7915 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7916 return DAG.getLoad(
7917 Ty, DL, DAG.getEntryNode(), LC,
7919}
7920
7922 EVT Ty, SelectionDAG &DAG) {
7924 RISCVConstantPoolValue::Create(*DAG.getContext(), N->getSymbol());
7925 SDValue CPAddr = DAG.getTargetConstantPool(CPV, Ty, Align(8));
7926 SDValue LC = DAG.getNode(RISCVISD::LLA, DL, Ty, CPAddr);
7927 return DAG.getLoad(
7928 Ty, DL, DAG.getEntryNode(), LC,
7930}
7931
7932template <class NodeTy>
7933SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
7934 bool IsLocal, bool IsExternWeak) const {
7935 SDLoc DL(N);
7936 EVT Ty = getPointerTy(DAG.getDataLayout());
7937
7938 // When HWASAN is used and tagging of global variables is enabled
7939 // they should be accessed via the GOT, since the tagged address of a global
7940 // is incompatible with existing code models. This also applies to non-pic
7941 // mode.
7942 if (isPositionIndependent() || Subtarget.allowTaggedGlobals()) {
7943 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7944 if (IsLocal && !Subtarget.allowTaggedGlobals())
7945 // Use PC-relative addressing to access the symbol. This generates the
7946 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
7947 // %pcrel_lo(auipc)).
7948 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7949
7950 // Use PC-relative addressing to access the GOT for this symbol, then load
7951 // the address from the GOT. This generates the pattern (PseudoLGA sym),
7952 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7953 SDValue Load =
7954 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7960 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7961 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7962 return Load;
7963 }
7964
7965 switch (getTargetMachine().getCodeModel()) {
7966 default:
7967 report_fatal_error("Unsupported code model for lowering");
7968 case CodeModel::Small: {
7969 // Generate a sequence for accessing addresses within the first 2 GiB of
7970 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
7971 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
7972 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
7973 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
7974 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
7975 }
7976 case CodeModel::Medium: {
7977 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
7978 if (IsExternWeak) {
7979 // An extern weak symbol may be undefined, i.e. have value 0, which may
7980 // not be within 2GiB of PC, so use GOT-indirect addressing to access the
7981 // symbol. This generates the pattern (PseudoLGA sym), which expands to
7982 // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
7983 SDValue Load =
7984 SDValue(DAG.getMachineNode(RISCV::PseudoLGA, DL, Ty, Addr), 0);
7990 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
7991 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
7992 return Load;
7993 }
7994
7995 // Generate a sequence for accessing addresses within any 2GiB range within
7996 // the address space. This generates the pattern (PseudoLLA sym), which
7997 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
7998 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
7999 }
8000 case CodeModel::Large: {
8001 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N))
8002 return getLargeGlobalAddress(G, DL, Ty, DAG);
8003
8004 // Using pc-relative mode for other node type.
8005 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
8006 return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
8007 }
8008 }
8009}
8010
8011SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
8012 SelectionDAG &DAG) const {
8013 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8014 assert(N->getOffset() == 0 && "unexpected offset in global node");
8015 const GlobalValue *GV = N->getGlobal();
8016 return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
8017}
8018
8019SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
8020 SelectionDAG &DAG) const {
8021 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
8022
8023 return getAddr(N, DAG);
8024}
8025
8026SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
8027 SelectionDAG &DAG) const {
8028 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
8029
8030 return getAddr(N, DAG);
8031}
8032
8033SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
8034 SelectionDAG &DAG) const {
8035 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
8036
8037 return getAddr(N, DAG);
8038}
8039
8040SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
8041 SelectionDAG &DAG,
8042 bool UseGOT) const {
8043 SDLoc DL(N);
8044 EVT Ty = getPointerTy(DAG.getDataLayout());
8045 const GlobalValue *GV = N->getGlobal();
8046 MVT XLenVT = Subtarget.getXLenVT();
8047
8048 if (UseGOT) {
8049 // Use PC-relative addressing to access the GOT for this TLS symbol, then
8050 // load the address from the GOT and add the thread pointer. This generates
8051 // the pattern (PseudoLA_TLS_IE sym), which expands to
8052 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
8053 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8054 SDValue Load =
8055 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
8061 LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
8062 DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
8063
8064 // Add the thread pointer.
8065 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8066 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
8067 }
8068
8069 // Generate a sequence for accessing the address relative to the thread
8070 // pointer, with the appropriate adjustment for the thread pointer offset.
8071 // This generates the pattern
8072 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
8073 SDValue AddrHi =
8075 SDValue AddrAdd =
8077 SDValue AddrLo =
8079
8080 SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
8081 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
8082 SDValue MNAdd =
8083 DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
8084 return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
8085}
8086
8087SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
8088 SelectionDAG &DAG) const {
8089 SDLoc DL(N);
8090 EVT Ty = getPointerTy(DAG.getDataLayout());
8091 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
8092 const GlobalValue *GV = N->getGlobal();
8093
8094 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8095 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
8096 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
8097 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8098 SDValue Load =
8099 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
8100
8101 // Prepare argument list to generate call.
8103 ArgListEntry Entry;
8104 Entry.Node = Load;
8105 Entry.Ty = CallTy;
8106 Args.push_back(Entry);
8107
8108 // Setup call to __tls_get_addr.
8110 CLI.setDebugLoc(DL)
8111 .setChain(DAG.getEntryNode())
8112 .setLibCallee(CallingConv::C, CallTy,
8113 DAG.getExternalSymbol("__tls_get_addr", Ty),
8114 std::move(Args));
8115
8116 return LowerCallTo(CLI).first;
8117}
8118
8119SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
8120 SelectionDAG &DAG) const {
8121 SDLoc DL(N);
8122 EVT Ty = getPointerTy(DAG.getDataLayout());
8123 const GlobalValue *GV = N->getGlobal();
8124
8125 // Use a PC-relative addressing mode to access the global dynamic GOT address.
8126 // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
8127 //
8128 // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
8129 // lw tY, tX, %tlsdesc_load_lo(label) // R_RISCV_TLSDESC_LOAD_LO12(label)
8130 // addi a0, tX, %tlsdesc_add_lo(label) // R_RISCV_TLSDESC_ADD_LO12(label)
8131 // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
8132 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
8133 return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
8134}
8135
8136SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
8137 SelectionDAG &DAG) const {
8138 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
8139 assert(N->getOffset() == 0 && "unexpected offset in global node");
8140
8141 if (DAG.getTarget().useEmulatedTLS())
8142 return LowerToTLSEmulatedModel(N, DAG);
8143
8145
8148 report_fatal_error("In GHC calling convention TLS is not supported");
8149
8150 SDValue Addr;
8151 switch (Model) {
8153 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
8154 break;
8156 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
8157 break;
8160 Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
8161 : getDynamicTLSAddr(N, DAG);
8162 break;
8163 }
8164
8165 return Addr;
8166}
8167
8168// Return true if Val is equal to (setcc LHS, RHS, CC).
8169// Return false if Val is the inverse of (setcc LHS, RHS, CC).
8170// Otherwise, return std::nullopt.
8171static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
8172 ISD::CondCode CC, SDValue Val) {
8173 assert(Val->getOpcode() == ISD::SETCC);
8174 SDValue LHS2 = Val.getOperand(0);
8175 SDValue RHS2 = Val.getOperand(1);
8176 ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
8177
8178 if (LHS == LHS2 && RHS == RHS2) {
8179 if (CC == CC2)
8180 return true;
8181 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8182 return false;
8183 } else if (LHS == RHS2 && RHS == LHS2) {
8185 if (CC == CC2)
8186 return true;
8187 if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
8188 return false;
8189 }
8190
8191 return std::nullopt;
8192}
8193
8195 const RISCVSubtarget &Subtarget) {
8196 SDValue CondV = N->getOperand(0);
8197 SDValue TrueV = N->getOperand(1);
8198 SDValue FalseV = N->getOperand(2);
8199 MVT VT = N->getSimpleValueType(0);
8200 SDLoc DL(N);
8201
8202 if (!Subtarget.hasConditionalMoveFusion()) {
8203 // (select c, -1, y) -> -c | y
8204 if (isAllOnesConstant(TrueV)) {
8205 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8206 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(FalseV));
8207 }
8208 // (select c, y, -1) -> (c-1) | y
8209 if (isAllOnesConstant(FalseV)) {
8210 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8211 DAG.getAllOnesConstant(DL, VT));
8212 return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
8213 }
8214
8215 // (select c, 0, y) -> (c-1) & y
8216 if (isNullConstant(TrueV)) {
8217 SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
8218 DAG.getAllOnesConstant(DL, VT));
8219 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
8220 }
8221 // (select c, y, 0) -> -c & y
8222 if (isNullConstant(FalseV)) {
8223 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8224 return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
8225 }
8226 }
8227
8228 // select c, ~x, x --> xor -c, x
8229 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8230 const APInt &TrueVal = TrueV->getAsAPIntVal();
8231 const APInt &FalseVal = FalseV->getAsAPIntVal();
8232 if (~TrueVal == FalseVal) {
8233 SDValue Neg = DAG.getNegative(CondV, DL, VT);
8234 return DAG.getNode(ISD::XOR, DL, VT, Neg, FalseV);
8235 }
8236 }
8237
8238 // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
8239 // when both truev and falsev are also setcc.
8240 if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
8241 FalseV.getOpcode() == ISD::SETCC) {
8242 SDValue LHS = CondV.getOperand(0);
8243 SDValue RHS = CondV.getOperand(1);
8244 ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8245
8246 // (select x, x, y) -> x | y
8247 // (select !x, x, y) -> x & y
8248 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
8249 return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
8250 DAG.getFreeze(FalseV));
8251 }
8252 // (select x, y, x) -> x & y
8253 // (select !x, y, x) -> x | y
8254 if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
8255 return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT,
8256 DAG.getFreeze(TrueV), FalseV);
8257 }
8258 }
8259
8260 return SDValue();
8261}
8262
8263// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
8264// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
8265// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
8266// being `0` or `-1`. In such cases we can replace `select` with `and`.
8267// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
8268// than `c0`?
8269static SDValue
8271 const RISCVSubtarget &Subtarget) {
8272 if (Subtarget.hasShortForwardBranchOpt())
8273 return SDValue();
8274
8275 unsigned SelOpNo = 0;
8276 SDValue Sel = BO->getOperand(0);
8277 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
8278 SelOpNo = 1;
8279 Sel = BO->getOperand(1);
8280 }
8281
8282 if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
8283 return SDValue();
8284
8285 unsigned ConstSelOpNo = 1;
8286 unsigned OtherSelOpNo = 2;
8287 if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
8288 ConstSelOpNo = 2;
8289 OtherSelOpNo = 1;
8290 }
8291 SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
8292 ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
8293 if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
8294 return SDValue();
8295
8296 SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
8297 ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
8298 if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
8299 return SDValue();
8300
8301 SDLoc DL(Sel);
8302 EVT VT = BO->getValueType(0);
8303
8304 SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
8305 if (SelOpNo == 1)
8306 std::swap(NewConstOps[0], NewConstOps[1]);
8307
8308 SDValue NewConstOp =
8309 DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
8310 if (!NewConstOp)
8311 return SDValue();
8312
8313 const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
8314 if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
8315 return SDValue();
8316
8317 SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
8318 SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
8319 if (SelOpNo == 1)
8320 std::swap(NewNonConstOps[0], NewNonConstOps[1]);
8321 SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
8322
8323 SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
8324 SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
8325 return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
8326}
8327
8328SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
8329 SDValue CondV = Op.getOperand(0);
8330 SDValue TrueV = Op.getOperand(1);
8331 SDValue FalseV = Op.getOperand(2);
8332 SDLoc DL(Op);
8333 MVT VT = Op.getSimpleValueType();
8334 MVT XLenVT = Subtarget.getXLenVT();
8335
8336 // Lower vector SELECTs to VSELECTs by splatting the condition.
8337 if (VT.isVector()) {
8338 MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
8339 SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
8340 return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
8341 }
8342
8343 // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
8344 // nodes to implement the SELECT. Performing the lowering here allows for
8345 // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
8346 // sequence or RISCVISD::SELECT_CC node (branch-based select).
8347 if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
8348 VT.isScalarInteger()) {
8349 // (select c, t, 0) -> (czero_eqz t, c)
8350 if (isNullConstant(FalseV))
8351 return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
8352 // (select c, 0, f) -> (czero_nez f, c)
8353 if (isNullConstant(TrueV))
8354 return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
8355
8356 // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
8357 if (TrueV.getOpcode() == ISD::AND &&
8358 (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
8359 return DAG.getNode(
8360 ISD::OR, DL, VT, TrueV,
8361 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8362 // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
8363 if (FalseV.getOpcode() == ISD::AND &&
8364 (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
8365 return DAG.getNode(
8366 ISD::OR, DL, VT, FalseV,
8367 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
8368
8369 // Try some other optimizations before falling back to generic lowering.
8370 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8371 return V;
8372
8373 // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
8374 // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
8375 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
8376 const APInt &TrueVal = TrueV->getAsAPIntVal();
8377 const APInt &FalseVal = FalseV->getAsAPIntVal();
8378 const int TrueValCost = RISCVMatInt::getIntMatCost(
8379 TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8380 const int FalseValCost = RISCVMatInt::getIntMatCost(
8381 FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
8382 bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
8383 SDValue LHSVal = DAG.getConstant(
8384 IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
8385 SDValue RHSVal =
8386 DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
8387 SDValue CMOV =
8389 DL, VT, LHSVal, CondV);
8390 return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
8391 }
8392
8393 // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
8394 // Unless we have the short forward branch optimization.
8395 if (!Subtarget.hasConditionalMoveFusion())
8396 return DAG.getNode(
8397 ISD::OR, DL, VT,
8398 DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
8399 DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
8400 }
8401
8402 if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
8403 return V;
8404
8405 if (Op.hasOneUse()) {
8406 unsigned UseOpc = Op->user_begin()->getOpcode();
8407 if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
8408 SDNode *BinOp = *Op->user_begin();
8409 if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->user_begin(),
8410 DAG, Subtarget)) {
8411 DAG.ReplaceAllUsesWith(BinOp, &NewSel);
8412 // Opcode check is necessary because foldBinOpIntoSelectIfProfitable
8413 // may return a constant node and cause crash in lowerSELECT.
8414 if (NewSel.getOpcode() == ISD::SELECT)
8415 return lowerSELECT(NewSel, DAG);
8416 return NewSel;
8417 }
8418 }
8419 }
8420
8421 // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
8422 // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
8423 const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
8424 const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
8425 if (FPTV && FPFV) {
8426 if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
8427 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
8428 if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
8429 SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
8430 DAG.getConstant(1, DL, XLenVT));
8431 return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
8432 }
8433 }
8434
8435 // If the condition is not an integer SETCC which operates on XLenVT, we need
8436 // to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
8437 // (select condv, truev, falsev)
8438 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
8439 if (CondV.getOpcode() != ISD::SETCC ||
8440 CondV.getOperand(0).getSimpleValueType() != XLenVT) {
8441 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
8442 SDValue SetNE = DAG.getCondCode(ISD::SETNE);
8443
8444 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
8445
8446 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8447 }
8448
8449 // If the CondV is the output of a SETCC node which operates on XLenVT inputs,
8450 // then merge the SETCC node into the lowered RISCVISD::SELECT_CC to take
8451 // advantage of the integer compare+branch instructions. i.e.:
8452 // (select (setcc lhs, rhs, cc), truev, falsev)
8453 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
8454 SDValue LHS = CondV.getOperand(0);
8455 SDValue RHS = CondV.getOperand(1);
8456 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8457
8458 // Special case for a select of 2 constants that have a diffence of 1.
8459 // Normally this is done by DAGCombine, but if the select is introduced by
8460 // type legalization or op legalization, we miss it. Restricting to SETLT
8461 // case for now because that is what signed saturating add/sub need.
8462 // FIXME: We don't need the condition to be SETLT or even a SETCC,
8463 // but we would probably want to swap the true/false values if the condition
8464 // is SETGE/SETLE to avoid an XORI.
8465 if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
8466 CCVal == ISD::SETLT) {
8467 const APInt &TrueVal = TrueV->getAsAPIntVal();
8468 const APInt &FalseVal = FalseV->getAsAPIntVal();
8469 if (TrueVal - 1 == FalseVal)
8470 return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
8471 if (TrueVal + 1 == FalseVal)
8472 return DAG.getNode(ISD::SUB, DL, VT, FalseV, CondV);
8473 }
8474
8475 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8476 // 1 < x ? x : 1 -> 0 < x ? x : 1
8477 if (isOneConstant(LHS) && (CCVal == ISD::SETLT || CCVal == ISD::SETULT) &&
8478 RHS == TrueV && LHS == FalseV) {
8479 LHS = DAG.getConstant(0, DL, VT);
8480 // 0 <u x is the same as x != 0.
8481 if (CCVal == ISD::SETULT) {
8482 std::swap(LHS, RHS);
8483 CCVal = ISD::SETNE;
8484 }
8485 }
8486
8487 // x <s -1 ? x : -1 -> x <s 0 ? x : -1
8488 if (isAllOnesConstant(RHS) && CCVal == ISD::SETLT && LHS == TrueV &&
8489 RHS == FalseV) {
8490 RHS = DAG.getConstant(0, DL, VT);
8491 }
8492
8493 SDValue TargetCC = DAG.getCondCode(CCVal);
8494
8495 if (isa<ConstantSDNode>(TrueV) && !isa<ConstantSDNode>(FalseV)) {
8496 // (select (setcc lhs, rhs, CC), constant, falsev)
8497 // -> (select (setcc lhs, rhs, InverseCC), falsev, constant)
8498 std::swap(TrueV, FalseV);
8499 TargetCC = DAG.getCondCode(ISD::getSetCCInverse(CCVal, LHS.getValueType()));
8500 }
8501
8502 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
8503 return DAG.getNode(RISCVISD::SELECT_CC, DL, VT, Ops);
8504}
8505
8506SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
8507 SDValue CondV = Op.getOperand(1);
8508 SDLoc DL(Op);
8509 MVT XLenVT = Subtarget.getXLenVT();
8510
8511 if (CondV.getOpcode() == ISD::SETCC &&
8512 CondV.getOperand(0).getValueType() == XLenVT) {
8513 SDValue LHS = CondV.getOperand(0);
8514 SDValue RHS = CondV.getOperand(1);
8515 ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
8516
8517 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
8518
8519 SDValue TargetCC = DAG.getCondCode(CCVal);
8520 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8521 LHS, RHS, TargetCC, Op.getOperand(2));
8522 }
8523
8524 return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
8525 CondV, DAG.getConstant(0, DL, XLenVT),
8526 DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
8527}
8528
8529SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
8532
8533 SDLoc DL(Op);
8534 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
8536
8537 // vastart just stores the address of the VarArgsFrameIndex slot into the
8538 // memory location argument.
8539 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
8540 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
8541 MachinePointerInfo(SV));
8542}
8543
8544SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
8545 SelectionDAG &DAG) const {
8546 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8548 MachineFrameInfo &MFI = MF.getFrameInfo();
8549 MFI.setFrameAddressIsTaken(true);
8550 Register FrameReg = RI.getFrameRegister(MF);
8551 int XLenInBytes = Subtarget.getXLen() / 8;
8552
8553 EVT VT = Op.getValueType();
8554 SDLoc DL(Op);
8555 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
8556 unsigned Depth = Op.getConstantOperandVal(0);
8557 while (Depth--) {
8558 int Offset = -(XLenInBytes * 2);
8559 SDValue Ptr = DAG.getNode(
8560 ISD::ADD, DL, VT, FrameAddr,
8562 FrameAddr =
8563 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
8564 }
8565 return FrameAddr;
8566}
8567
8568SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
8569 SelectionDAG &DAG) const {
8570 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
8572 MachineFrameInfo &MFI = MF.getFrameInfo();
8573 MFI.setReturnAddressIsTaken(true);
8574 MVT XLenVT = Subtarget.getXLenVT();
8575 int XLenInBytes = Subtarget.getXLen() / 8;
8576
8578 return SDValue();
8579
8580 EVT VT = Op.getValueType();
8581 SDLoc DL(Op);
8582 unsigned Depth = Op.getConstantOperandVal(0);
8583 if (Depth) {
8584 int Off = -XLenInBytes;
8585 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
8586 SDValue Offset = DAG.getSignedConstant(Off, DL, VT);
8587 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
8588 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
8590 }
8591
8592 // Return the value of the return address register, marking it an implicit
8593 // live-in.
8594 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
8595 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
8596}
8597
8598SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
8599 SelectionDAG &DAG) const {
8600 SDLoc DL(Op);
8601 SDValue Lo = Op.getOperand(0);
8602 SDValue Hi = Op.getOperand(1);
8603 SDValue Shamt = Op.getOperand(2);
8604 EVT VT = Lo.getValueType();
8605
8606 // if Shamt-XLEN < 0: // Shamt < XLEN
8607 // Lo = Lo << Shamt
8608 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
8609 // else:
8610 // Lo = 0
8611 // Hi = Lo << (Shamt-XLEN)
8612
8613 SDValue Zero = DAG.getConstant(0, DL, VT);
8614 SDValue One = DAG.getConstant(1, DL, VT);
8615 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8616 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8617 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8618 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8619
8620 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
8621 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
8622 SDValue ShiftRightLo =
8623 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
8624 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
8625 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
8626 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
8627
8628 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8629
8630 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
8631 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8632
8633 SDValue Parts[2] = {Lo, Hi};
8634 return DAG.getMergeValues(Parts, DL);
8635}
8636
8637SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
8638 bool IsSRA) const {
8639 SDLoc DL(Op);
8640 SDValue Lo = Op.getOperand(0);
8641 SDValue Hi = Op.getOperand(1);
8642 SDValue Shamt = Op.getOperand(2);
8643 EVT VT = Lo.getValueType();
8644
8645 // SRA expansion:
8646 // if Shamt-XLEN < 0: // Shamt < XLEN
8647 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8648 // Hi = Hi >>s Shamt
8649 // else:
8650 // Lo = Hi >>s (Shamt-XLEN);
8651 // Hi = Hi >>s (XLEN-1)
8652 //
8653 // SRL expansion:
8654 // if Shamt-XLEN < 0: // Shamt < XLEN
8655 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - ShAmt))
8656 // Hi = Hi >>u Shamt
8657 // else:
8658 // Lo = Hi >>u (Shamt-XLEN);
8659 // Hi = 0;
8660
8661 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
8662
8663 SDValue Zero = DAG.getConstant(0, DL, VT);
8664 SDValue One = DAG.getConstant(1, DL, VT);
8665 SDValue MinusXLen = DAG.getSignedConstant(-(int)Subtarget.getXLen(), DL, VT);
8666 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
8667 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
8668 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
8669
8670 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
8671 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
8672 SDValue ShiftLeftHi =
8673 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
8674 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
8675 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
8676 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
8677 SDValue HiFalse =
8678 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
8679
8680 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
8681
8682 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
8683 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
8684
8685 SDValue Parts[2] = {Lo, Hi};
8686 return DAG.getMergeValues(Parts, DL);
8687}
8688
8689// Lower splats of i1 types to SETCC. For each mask vector type, we have a
8690// legal equivalently-sized i8 type, so we can use that as a go-between.
8691SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
8692 SelectionDAG &DAG) const {
8693 SDLoc DL(Op);
8694 MVT VT = Op.getSimpleValueType();
8695 SDValue SplatVal = Op.getOperand(0);
8696 // All-zeros or all-ones splats are handled specially.
8697 if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
8698 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8699 return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
8700 }
8701 if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
8702 SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
8703 return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
8704 }
8705 MVT InterVT = VT.changeVectorElementType(MVT::i8);
8706 SplatVal = DAG.getNode(ISD::AND, DL, SplatVal.getValueType(), SplatVal,
8707 DAG.getConstant(1, DL, SplatVal.getValueType()));
8708 SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
8709 SDValue Zero = DAG.getConstant(0, DL, InterVT);
8710 return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
8711}
8712
8713// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
8714// illegal (currently only vXi64 RV32).
8715// FIXME: We could also catch non-constant sign-extended i32 values and lower
8716// them to VMV_V_X_VL.
8717SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
8718 SelectionDAG &DAG) const {
8719 SDLoc DL(Op);
8720 MVT VecVT = Op.getSimpleValueType();
8721 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
8722 "Unexpected SPLAT_VECTOR_PARTS lowering");
8723
8724 assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
8725 SDValue Lo = Op.getOperand(0);
8726 SDValue Hi = Op.getOperand(1);
8727
8728 MVT ContainerVT = VecVT;
8729 if (VecVT.isFixedLengthVector())
8730 ContainerVT = getContainerForFixedLengthVector(VecVT);
8731
8732 auto VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8733
8734 SDValue Res =
8735 splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
8736
8737 if (VecVT.isFixedLengthVector())
8738 Res = convertFromScalableVector(VecVT, Res, DAG, Subtarget);
8739
8740 return Res;
8741}
8742
8743// Custom-lower extensions from mask vectors by using a vselect either with 1
8744// for zero/any-extension or -1 for sign-extension:
8745// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
8746// Note that any-extension is lowered identically to zero-extension.
8747SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
8748 int64_t ExtTrueVal) const {
8749 SDLoc DL(Op);
8750 MVT VecVT = Op.getSimpleValueType();
8751 SDValue Src = Op.getOperand(0);
8752 // Only custom-lower extensions from mask types
8753 assert(Src.getValueType().isVector() &&
8754 Src.getValueType().getVectorElementType() == MVT::i1);
8755
8756 if (VecVT.isScalableVector()) {
8757 SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
8758 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, VecVT);
8759 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
8760 }
8761
8762 MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
8763 MVT I1ContainerVT =
8764 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
8765
8766 SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
8767
8768 SDValue VL = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).second;
8769
8770 MVT XLenVT = Subtarget.getXLenVT();
8771 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
8772 SDValue SplatTrueVal = DAG.getSignedConstant(ExtTrueVal, DL, XLenVT);
8773
8774 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8775 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8776 SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8777 DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
8778 SDValue Select =
8779 DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, SplatTrueVal,
8780 SplatZero, DAG.getUNDEF(ContainerVT), VL);
8781
8782 return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
8783}
8784
8785SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
8786 SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
8787 MVT ExtVT = Op.getSimpleValueType();
8788 // Only custom-lower extensions from fixed-length vector types.
8789 if (!ExtVT.isFixedLengthVector())
8790 return Op;
8791 MVT VT = Op.getOperand(0).getSimpleValueType();
8792 // Grab the canonical container type for the extended type. Infer the smaller
8793 // type from that to ensure the same number of vector elements, as we know
8794 // the LMUL will be sufficient to hold the smaller type.
8795 MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
8796 // Get the extended container type manually to ensure the same number of
8797 // vector elements between source and dest.
8798 MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
8799 ContainerExtVT.getVectorElementCount());
8800
8801 SDValue Op1 =
8802 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
8803
8804 SDLoc DL(Op);
8805 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
8806
8807 SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
8808
8809 return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
8810}
8811
8812// Custom-lower truncations from vectors to mask vectors by using a mask and a
8813// setcc operation:
8814// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
8815SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
8816 SelectionDAG &DAG) const {
8817 bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
8818 SDLoc DL(Op);
8819 EVT MaskVT = Op.getValueType();
8820 // Only expect to custom-lower truncations to mask types
8821 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
8822 "Unexpected type for vector mask lowering");
8823 SDValue Src = Op.getOperand(0);
8824 MVT VecVT = Src.getSimpleValueType();
8825 SDValue Mask, VL;
8826 if (IsVPTrunc) {
8827 Mask = Op.getOperand(1);
8828 VL = Op.getOperand(2);
8829 }
8830 // If this is a fixed vector, we need to convert it to a scalable vector.
8831 MVT ContainerVT = VecVT;
8832
8833 if (VecVT.isFixedLengthVector()) {
8834 ContainerVT = getContainerForFixedLengthVector(VecVT);
8835 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8836 if (IsVPTrunc) {
8837 MVT MaskContainerVT =
8838 getContainerForFixedLengthVector(Mask.getSimpleValueType());
8839 Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
8840 }
8841 }
8842
8843 if (!IsVPTrunc) {
8844 std::tie(Mask, VL) =
8845 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
8846 }
8847
8848 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
8849 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
8850
8851 SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8852 DAG.getUNDEF(ContainerVT), SplatOne, VL);
8853 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
8854 DAG.getUNDEF(ContainerVT), SplatZero, VL);
8855
8856 MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
8857 SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne,
8858 DAG.getUNDEF(ContainerVT), Mask, VL);
8859 Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT,
8860 {Trunc, SplatZero, DAG.getCondCode(ISD::SETNE),
8861 DAG.getUNDEF(MaskContainerVT), Mask, VL});
8862 if (MaskVT.isFixedLengthVector())
8863 Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
8864 return Trunc;
8865}
8866
8867SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
8868 SelectionDAG &DAG) const {
8869 unsigned Opc = Op.getOpcode();
8870 bool IsVPTrunc = Opc == ISD::VP_TRUNCATE;
8871 SDLoc DL(Op);
8872
8873 MVT VT = Op.getSimpleValueType();
8874 // Only custom-lower vector truncates
8875 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8876
8877 // Truncates to mask types are handled differently
8878 if (VT.getVectorElementType() == MVT::i1)
8879 return lowerVectorMaskTruncLike(Op, DAG);
8880
8881 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
8882 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
8883 // truncate by one power of two at a time.
8884 MVT DstEltVT = VT.getVectorElementType();
8885
8886 SDValue Src = Op.getOperand(0);
8887 MVT SrcVT = Src.getSimpleValueType();
8888 MVT SrcEltVT = SrcVT.getVectorElementType();
8889
8890 assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
8891 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
8892 "Unexpected vector truncate lowering");
8893
8894 MVT ContainerVT = SrcVT;
8895 SDValue Mask, VL;
8896 if (IsVPTrunc) {
8897 Mask = Op.getOperand(1);
8898 VL = Op.getOperand(2);
8899 }
8900 if (SrcVT.isFixedLengthVector()) {
8901 ContainerVT = getContainerForFixedLengthVector(SrcVT);
8902 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
8903 if (IsVPTrunc) {
8904 MVT MaskVT = getMaskTypeFor(ContainerVT);
8905 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
8906 }
8907 }
8908
8909 SDValue Result = Src;
8910 if (!IsVPTrunc) {
8911 std::tie(Mask, VL) =
8912 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8913 }
8914
8915 unsigned NewOpc;
8916 if (Opc == ISD::TRUNCATE_SSAT_S)
8918 else if (Opc == ISD::TRUNCATE_USAT_U)
8920 else
8922
8923 do {
8924 SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
8925 MVT ResultVT = ContainerVT.changeVectorElementType(SrcEltVT);
8926 Result = DAG.getNode(NewOpc, DL, ResultVT, Result, Mask, VL);
8927 } while (SrcEltVT != DstEltVT);
8928
8929 if (SrcVT.isFixedLengthVector())
8930 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
8931
8932 return Result;
8933}
8934
8935SDValue
8936RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
8937 SelectionDAG &DAG) const {
8938 SDLoc DL(Op);
8939 SDValue Chain = Op.getOperand(0);
8940 SDValue Src = Op.getOperand(1);
8941 MVT VT = Op.getSimpleValueType();
8942 MVT SrcVT = Src.getSimpleValueType();
8943 MVT ContainerVT = VT;
8944 if (VT.isFixedLengthVector()) {
8945 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
8946 ContainerVT =
8947 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
8948 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
8949 }
8950
8951 auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
8952
8953 // RVV can only widen/truncate fp to types double/half the size as the source.
8954 if ((VT.getVectorElementType() == MVT::f64 &&
8955 (SrcVT.getVectorElementType() == MVT::f16 ||
8956 SrcVT.getVectorElementType() == MVT::bf16)) ||
8957 ((VT.getVectorElementType() == MVT::f16 ||
8958 VT.getVectorElementType() == MVT::bf16) &&
8959 SrcVT.getVectorElementType() == MVT::f64)) {
8960 // For double rounding, the intermediate rounding should be round-to-odd.
8961 unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8964 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
8965 Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
8966 Chain, Src, Mask, VL);
8967 Chain = Src.getValue(1);
8968 }
8969
8970 unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
8973 SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
8974 Chain, Src, Mask, VL);
8975 if (VT.isFixedLengthVector()) {
8976 // StrictFP operations have two result values. Their lowered result should
8977 // have same result count.
8978 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
8979 Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
8980 }
8981 return Res;
8982}
8983
8984SDValue
8985RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
8986 SelectionDAG &DAG) const {
8987 bool IsVP =
8988 Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
8989 bool IsExtend =
8990 Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
8991 // RVV can only do truncate fp to types half the size as the source. We
8992 // custom-lower f64->f16 rounds via RVV's round-to-odd float
8993 // conversion instruction.
8994 SDLoc DL(Op);
8995 MVT VT = Op.getSimpleValueType();
8996
8997 assert(VT.isVector() && "Unexpected type for vector truncate lowering");
8998
8999 SDValue Src = Op.getOperand(0);
9000 MVT SrcVT = Src.getSimpleValueType();
9001
9002 bool IsDirectExtend =
9003 IsExtend && (VT.getVectorElementType() != MVT::f64 ||
9004 (SrcVT.getVectorElementType() != MVT::f16 &&
9005 SrcVT.getVectorElementType() != MVT::bf16));
9006 bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
9007 VT.getVectorElementType() != MVT::bf16) ||
9008 SrcVT.getVectorElementType() != MVT::f64);
9009
9010 bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
9011
9012 // Prepare any fixed-length vector operands.
9013 MVT ContainerVT = VT;
9014 SDValue Mask, VL;
9015 if (IsVP) {
9016 Mask = Op.getOperand(1);
9017 VL = Op.getOperand(2);
9018 }
9019 if (VT.isFixedLengthVector()) {
9020 MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
9021 ContainerVT =
9022 SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
9023 Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
9024 if (IsVP) {
9025 MVT MaskVT = getMaskTypeFor(ContainerVT);
9026 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
9027 }
9028 }
9029
9030 if (!IsVP)
9031 std::tie(Mask, VL) =
9032 getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
9033
9034 unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
9035
9036 if (IsDirectConv) {
9037 Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
9038 if (VT.isFixedLengthVector())
9039 Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
9040 return Src;
9041 }
9042
9043 unsigned InterConvOpc =
9045
9046 MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
9047 SDValue IntermediateConv =
9048 DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
9049 SDValue Result =
9050 DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
9051 if (VT.isFixedLengthVector())
9052 return convertFromScalableVector(VT, Result, DAG, Subtarget);
9053 return Result;
9054}
9055
9056// Given a scalable vector type and an index into it, returns the type for the
9057// smallest subvector that the index fits in. This can be used to reduce LMUL
9058// for operations like vslidedown.
9059//
9060// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
9061static std::optional<MVT>
9062getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
9063 const RISCVSubtarget &Subtarget) {
9064 assert(VecVT.isScalableVector());
9065 const unsigned EltSize = VecVT.getScalarSizeInBits();
9066 const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
9067 const unsigned MinVLMAX = VectorBitsMin / EltSize;
9068 MVT SmallerVT;
9069 if (MaxIdx < MinVLMAX)
9070 SmallerVT = getLMUL1VT(VecVT);
9071 else if (MaxIdx < MinVLMAX * 2)
9072 SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
9073 else if (MaxIdx < MinVLMAX * 4)
9074 SmallerVT = getLMUL1VT(VecVT)
9077 if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
9078 return std::nullopt;
9079 return SmallerVT;
9080}
9081
9082// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
9083// first position of a vector, and that vector is slid up to the insert index.
9084// By limiting the active vector length to index+1 and merging with the
9085// original vector (with an undisturbed tail policy for elements >= VL), we
9086// achieve the desired result of leaving all elements untouched except the one
9087// at VL-1, which is replaced with the desired value.
9088SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
9089 SelectionDAG &DAG) const {
9090 SDLoc DL(Op);
9091 MVT VecVT = Op.getSimpleValueType();
9092 MVT XLenVT = Subtarget.getXLenVT();
9093 SDValue Vec = Op.getOperand(0);
9094 SDValue Val = Op.getOperand(1);
9095 MVT ValVT = Val.getSimpleValueType();
9096 SDValue Idx = Op.getOperand(2);
9097
9098 if (VecVT.getVectorElementType() == MVT::i1) {
9099 // FIXME: For now we just promote to an i8 vector and insert into that,
9100 // but this is probably not optimal.
9101 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9102 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9103 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
9104 return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
9105 }
9106
9107 if ((ValVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9108 ValVT == MVT::bf16) {
9109 // If we don't have vfmv.s.f for f16/bf16, use fmv.x.h first.
9110 MVT IntVT = VecVT.changeTypeToInteger();
9111 SDValue IntInsert = DAG.getNode(
9112 ISD::INSERT_VECTOR_ELT, DL, IntVT, DAG.getBitcast(IntVT, Vec),
9113 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Val), Idx);
9114 return DAG.getBitcast(VecVT, IntInsert);
9115 }
9116
9117 MVT ContainerVT = VecVT;
9118 // If the operand is a fixed-length vector, convert to a scalable one.
9119 if (VecVT.isFixedLengthVector()) {
9120 ContainerVT = getContainerForFixedLengthVector(VecVT);
9121 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9122 }
9123
9124 // If we know the index we're going to insert at, we can shrink Vec so that
9125 // we're performing the scalar inserts and slideup on a smaller LMUL.
9126 MVT OrigContainerVT = ContainerVT;
9127 SDValue OrigVec = Vec;
9128 SDValue AlignedIdx;
9129 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
9130 const unsigned OrigIdx = IdxC->getZExtValue();
9131 // Do we know an upper bound on LMUL?
9132 if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, OrigIdx,
9133 DL, DAG, Subtarget)) {
9134 ContainerVT = *ShrunkVT;
9135 AlignedIdx = DAG.getVectorIdxConstant(0, DL);
9136 }
9137
9138 // If we're compiling for an exact VLEN value, we can always perform
9139 // the insert in m1 as we can determine the register corresponding to
9140 // the index in the register group.
9141 const MVT M1VT = getLMUL1VT(ContainerVT);
9142 if (auto VLEN = Subtarget.getRealVLen();
9143 VLEN && ContainerVT.bitsGT(M1VT)) {
9144 EVT ElemVT = VecVT.getVectorElementType();
9145 unsigned ElemsPerVReg = *VLEN / ElemVT.getFixedSizeInBits();
9146 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9147 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9148 unsigned ExtractIdx =
9149 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9150 AlignedIdx = DAG.getVectorIdxConstant(ExtractIdx, DL);
9151 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9152 ContainerVT = M1VT;
9153 }
9154
9155 if (AlignedIdx)
9156 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9157 AlignedIdx);
9158 }
9159
9160 bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
9161 // Even i64-element vectors on RV32 can be lowered without scalar
9162 // legalization if the most-significant 32 bits of the value are not affected
9163 // by the sign-extension of the lower 32 bits.
9164 // TODO: We could also catch sign extensions of a 32-bit value.
9165 if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
9166 const auto *CVal = cast<ConstantSDNode>(Val);
9167 if (isInt<32>(CVal->getSExtValue())) {
9168 IsLegalInsert = true;
9169 Val = DAG.getSignedConstant(CVal->getSExtValue(), DL, MVT::i32);
9170 }
9171 }
9172
9173 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9174
9175 SDValue ValInVec;
9176
9177 if (IsLegalInsert) {
9178 unsigned Opc =
9180 if (isNullConstant(Idx)) {
9181 if (!VecVT.isFloatingPoint())
9182 Val = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Val);
9183 Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
9184
9185 if (AlignedIdx)
9186 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9187 Vec, AlignedIdx);
9188 if (!VecVT.isFixedLengthVector())
9189 return Vec;
9190 return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
9191 }
9192 ValInVec = lowerScalarInsert(Val, VL, ContainerVT, DL, DAG, Subtarget);
9193 } else {
9194 // On RV32, i64-element vectors must be specially handled to place the
9195 // value at element 0, by using two vslide1down instructions in sequence on
9196 // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
9197 // this.
9198 SDValue ValLo, ValHi;
9199 std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
9200 MVT I32ContainerVT =
9201 MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
9202 SDValue I32Mask =
9203 getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
9204 // Limit the active VL to two.
9205 SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
9206 // If the Idx is 0 we can insert directly into the vector.
9207 if (isNullConstant(Idx)) {
9208 // First slide in the lo value, then the hi in above it. We use slide1down
9209 // to avoid the register group overlap constraint of vslide1up.
9210 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9211 Vec, Vec, ValLo, I32Mask, InsertI64VL);
9212 // If the source vector is undef don't pass along the tail elements from
9213 // the previous slide1down.
9214 SDValue Tail = Vec.isUndef() ? Vec : ValInVec;
9215 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9216 Tail, ValInVec, ValHi, I32Mask, InsertI64VL);
9217 // Bitcast back to the right container type.
9218 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9219
9220 if (AlignedIdx)
9221 ValInVec =
9222 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9223 ValInVec, AlignedIdx);
9224 if (!VecVT.isFixedLengthVector())
9225 return ValInVec;
9226 return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
9227 }
9228
9229 // First slide in the lo value, then the hi in above it. We use slide1down
9230 // to avoid the register group overlap constraint of vslide1up.
9231 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9232 DAG.getUNDEF(I32ContainerVT),
9233 DAG.getUNDEF(I32ContainerVT), ValLo,
9234 I32Mask, InsertI64VL);
9235 ValInVec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32ContainerVT,
9236 DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
9237 I32Mask, InsertI64VL);
9238 // Bitcast back to the right container type.
9239 ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
9240 }
9241
9242 // Now that the value is in a vector, slide it into position.
9243 SDValue InsertVL =
9244 DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
9245
9246 // Use tail agnostic policy if Idx is the last index of Vec.
9248 if (VecVT.isFixedLengthVector() && isa<ConstantSDNode>(Idx) &&
9249 Idx->getAsZExtVal() + 1 == VecVT.getVectorNumElements())
9250 Policy = RISCVII::TAIL_AGNOSTIC;
9251 SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
9252 Idx, Mask, InsertVL, Policy);
9253
9254 if (AlignedIdx)
9255 Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
9256 Slideup, AlignedIdx);
9257 if (!VecVT.isFixedLengthVector())
9258 return Slideup;
9259 return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
9260}
9261
9262// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
9263// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
9264// types this is done using VMV_X_S to allow us to glean information about the
9265// sign bits of the result.
9266SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
9267 SelectionDAG &DAG) const {
9268 SDLoc DL(Op);
9269 SDValue Idx = Op.getOperand(1);
9270 SDValue Vec = Op.getOperand(0);
9271 EVT EltVT = Op.getValueType();
9272 MVT VecVT = Vec.getSimpleValueType();
9273 MVT XLenVT = Subtarget.getXLenVT();
9274
9275 if (VecVT.getVectorElementType() == MVT::i1) {
9276 // Use vfirst.m to extract the first bit.
9277 if (isNullConstant(Idx)) {
9278 MVT ContainerVT = VecVT;
9279 if (VecVT.isFixedLengthVector()) {
9280 ContainerVT = getContainerForFixedLengthVector(VecVT);
9281 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9282 }
9283 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
9284 SDValue Vfirst =
9285 DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Vec, Mask, VL);
9286 SDValue Res = DAG.getSetCC(DL, XLenVT, Vfirst,
9287 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
9288 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9289 }
9290 if (VecVT.isFixedLengthVector()) {
9291 unsigned NumElts = VecVT.getVectorNumElements();
9292 if (NumElts >= 8) {
9293 MVT WideEltVT;
9294 unsigned WidenVecLen;
9295 SDValue ExtractElementIdx;
9296 SDValue ExtractBitIdx;
9297 unsigned MaxEEW = Subtarget.getELen();
9298 MVT LargestEltVT = MVT::getIntegerVT(
9299 std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
9300 if (NumElts <= LargestEltVT.getSizeInBits()) {
9301 assert(isPowerOf2_32(NumElts) &&
9302 "the number of elements should be power of 2");
9303 WideEltVT = MVT::getIntegerVT(NumElts);
9304 WidenVecLen = 1;
9305 ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
9306 ExtractBitIdx = Idx;
9307 } else {
9308 WideEltVT = LargestEltVT;
9309 WidenVecLen = NumElts / WideEltVT.getSizeInBits();
9310 // extract element index = index / element width
9311 ExtractElementIdx = DAG.getNode(
9312 ISD::SRL, DL, XLenVT, Idx,
9313 DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
9314 // mask bit index = index % element width
9315 ExtractBitIdx = DAG.getNode(
9316 ISD::AND, DL, XLenVT, Idx,
9317 DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
9318 }
9319 MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
9320 Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
9321 SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
9322 Vec, ExtractElementIdx);
9323 // Extract the bit from GPR.
9324 SDValue ShiftRight =
9325 DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
9326 SDValue Res = DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
9327 DAG.getConstant(1, DL, XLenVT));
9328 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Res);
9329 }
9330 }
9331 // Otherwise, promote to an i8 vector and extract from that.
9332 MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
9333 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
9334 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
9335 }
9336
9337 if ((EltVT == MVT::f16 && !Subtarget.hasVInstructionsF16()) ||
9338 EltVT == MVT::bf16) {
9339 // If we don't have vfmv.f.s for f16/bf16, extract to a gpr then use fmv.h.x
9340 MVT IntVT = VecVT.changeTypeToInteger();
9341 SDValue IntVec = DAG.getBitcast(IntVT, Vec);
9342 SDValue IntExtract =
9343 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT, IntVec, Idx);
9344 return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
9345 }
9346
9347 // If this is a fixed vector, we need to convert it to a scalable vector.
9348 MVT ContainerVT = VecVT;
9349 if (VecVT.isFixedLengthVector()) {
9350 ContainerVT = getContainerForFixedLengthVector(VecVT);
9351 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
9352 }
9353
9354 // If we're compiling for an exact VLEN value and we have a known
9355 // constant index, we can always perform the extract in m1 (or
9356 // smaller) as we can determine the register corresponding to
9357 // the index in the register group.
9358 const auto VLen = Subtarget.getRealVLen();
9359 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx);
9360 IdxC && VLen && VecVT.getSizeInBits().getKnownMinValue() > *VLen) {
9361 MVT M1VT = getLMUL1VT(ContainerVT);
9362 unsigned OrigIdx = IdxC->getZExtValue();
9363 EVT ElemVT = VecVT.getVectorElementType();
9364 unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
9365 unsigned RemIdx = OrigIdx % ElemsPerVReg;
9366 unsigned SubRegIdx = OrigIdx / ElemsPerVReg;
9367 unsigned ExtractIdx =
9368 SubRegIdx * M1VT.getVectorElementCount().getKnownMinValue();
9369 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
9370 DAG.getVectorIdxConstant(ExtractIdx, DL));
9371 Idx = DAG.getVectorIdxConstant(RemIdx, DL);
9372 ContainerVT = M1VT;
9373 }
9374
9375 // Reduce the LMUL of our slidedown and vmv.x.s to the smallest LMUL which
9376 // contains our index.
9377 std::optional<uint64_t> MaxIdx;
9378 if (VecVT.isFixedLengthVector())
9379 MaxIdx = VecVT.getVectorNumElements() - 1;
9380 if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx))
9381 MaxIdx = IdxC->getZExtValue();
9382 if (MaxIdx) {
9383 if (auto SmallerVT =
9384 getSmallestVTForIndex(ContainerVT, *MaxIdx, DL, DAG, Subtarget)) {
9385 ContainerVT = *SmallerVT;
9386 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
9387 DAG.getConstant(0, DL, XLenVT));
9388 }
9389 }
9390
9391 // If after narrowing, the required slide is still greater than LMUL2,
9392 // fallback to generic expansion and go through the stack. This is done
9393 // for a subtle reason: extracting *all* elements out of a vector is
9394 // widely expected to be linear in vector size, but because vslidedown
9395 // is linear in LMUL, performing N extracts using vslidedown becomes
9396 // O(n^2) / (VLEN/ETYPE) work. On the surface, going through the stack
9397 // seems to have the same problem (the store is linear in LMUL), but the
9398 // generic expansion *memoizes* the store, and thus for many extracts of
9399 // the same vector we end up with one store and a bunch of loads.
9400 // TODO: We don't have the same code for insert_vector_elt because we
9401 // have BUILD_VECTOR and handle the degenerate case there. Should we
9402 // consider adding an inverse BUILD_VECTOR node?
9403 MVT LMUL2VT = getLMUL1VT(ContainerVT).getDoubleNumVectorElementsVT();
9404 if (ContainerVT.bitsGT(LMUL2VT) && VecVT.isFixedLengthVector())
9405 return SDValue();
9406
9407 // If the index is 0, the vector is already in the right position.
9408 if (!isNullConstant(Idx)) {
9409 // Use a VL of 1 to avoid processing more elements than we need.
9410 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
9411 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
9412 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
9413 }
9414
9415 if (!EltVT.isInteger()) {
9416 // Floating-point extracts are handled in TableGen.
9417 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
9418 DAG.getVectorIdxConstant(0, DL));
9419 }
9420
9421 SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
9422 return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
9423}
9424
9425// Some RVV intrinsics may claim that they want an integer operand to be
9426// promoted or expanded.
9428 const RISCVSubtarget &Subtarget) {
9429 assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
9430 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
9431 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
9432 "Unexpected opcode");
9433
9434 if (!Subtarget.hasVInstructions())
9435 return SDValue();
9436
9437 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9438 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9439 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9440
9441 SDLoc DL(Op);
9442
9444 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9445 if (!II || !II->hasScalarOperand())
9446 return SDValue();
9447
9448 unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
9449 assert(SplatOp < Op.getNumOperands());
9450
9452 SDValue &ScalarOp = Operands[SplatOp];
9453 MVT OpVT = ScalarOp.getSimpleValueType();
9454 MVT XLenVT = Subtarget.getXLenVT();
9455
9456 // If this isn't a scalar, or its type is XLenVT we're done.
9457 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9458 return SDValue();
9459
9460 // Simplest case is that the operand needs to be promoted to XLenVT.
9461 if (OpVT.bitsLT(XLenVT)) {
9462 // If the operand is a constant, sign extend to increase our chances
9463 // of being able to use a .vi instruction. ANY_EXTEND would become a
9464 // a zero extend and the simm5 check in isel would fail.
9465 // FIXME: Should we ignore the upper bits in isel instead?
9466 unsigned ExtOpc =
9467 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9468 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9469 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9470 }
9471
9472 // Use the previous operand to get the vXi64 VT. The result might be a mask
9473 // VT for compares. Using the previous operand assumes that the previous
9474 // operand will never have a smaller element size than a scalar operand and
9475 // that a widening operation never uses SEW=64.
9476 // NOTE: If this fails the below assert, we can probably just find the
9477 // element count from any operand or result and use it to construct the VT.
9478 assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
9479 MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
9480
9481 // The more complex case is when the scalar is larger than XLenVT.
9482 assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
9483 VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
9484
9485 // If this is a sign-extended 32-bit value, we can truncate it and rely on the
9486 // instruction to sign-extend since SEW>XLEN.
9487 if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
9488 ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
9489 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9490 }
9491
9492 switch (IntNo) {
9493 case Intrinsic::riscv_vslide1up:
9494 case Intrinsic::riscv_vslide1down:
9495 case Intrinsic::riscv_vslide1up_mask:
9496 case Intrinsic::riscv_vslide1down_mask: {
9497 // We need to special case these when the scalar is larger than XLen.
9498 unsigned NumOps = Op.getNumOperands();
9499 bool IsMasked = NumOps == 7;
9500
9501 // Convert the vector source to the equivalent nxvXi32 vector.
9502 MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
9503 SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
9504 SDValue ScalarLo, ScalarHi;
9505 std::tie(ScalarLo, ScalarHi) =
9506 DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
9507
9508 // Double the VL since we halved SEW.
9509 SDValue AVL = getVLOperand(Op);
9510 SDValue I32VL;
9511
9512 // Optimize for constant AVL
9513 if (isa<ConstantSDNode>(AVL)) {
9514 const auto [MinVLMAX, MaxVLMAX] =
9516
9517 uint64_t AVLInt = AVL->getAsZExtVal();
9518 if (AVLInt <= MinVLMAX) {
9519 I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
9520 } else if (AVLInt >= 2 * MaxVLMAX) {
9521 // Just set vl to VLMAX in this situation
9522 I32VL = DAG.getRegister(RISCV::X0, XLenVT);
9523 } else {
9524 // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
9525 // is related to the hardware implementation.
9526 // So let the following code handle
9527 }
9528 }
9529 if (!I32VL) {
9531 SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
9532 unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
9533 SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
9534 SDValue SETVL =
9535 DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
9536 // Using vsetvli instruction to get actually used length which related to
9537 // the hardware implementation
9538 SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
9539 SEW, LMUL);
9540 I32VL =
9541 DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
9542 }
9543
9544 SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
9545
9546 // Shift the two scalar parts in using SEW=32 slide1up/slide1down
9547 // instructions.
9548 SDValue Passthru;
9549 if (IsMasked)
9550 Passthru = DAG.getUNDEF(I32VT);
9551 else
9552 Passthru = DAG.getBitcast(I32VT, Operands[1]);
9553
9554 if (IntNo == Intrinsic::riscv_vslide1up ||
9555 IntNo == Intrinsic::riscv_vslide1up_mask) {
9556 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9557 ScalarHi, I32Mask, I32VL);
9558 Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
9559 ScalarLo, I32Mask, I32VL);
9560 } else {
9561 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9562 ScalarLo, I32Mask, I32VL);
9563 Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
9564 ScalarHi, I32Mask, I32VL);
9565 }
9566
9567 // Convert back to nxvXi64.
9568 Vec = DAG.getBitcast(VT, Vec);
9569
9570 if (!IsMasked)
9571 return Vec;
9572 // Apply mask after the operation.
9573 SDValue Mask = Operands[NumOps - 3];
9574 SDValue MaskedOff = Operands[1];
9575 // Assume Policy operand is the last operand.
9576 uint64_t Policy = Operands[NumOps - 1]->getAsZExtVal();
9577 // We don't need to select maskedoff if it's undef.
9578 if (MaskedOff.isUndef())
9579 return Vec;
9580 // TAMU
9581 if (Policy == RISCVII::TAIL_AGNOSTIC)
9582 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9583 DAG.getUNDEF(VT), AVL);
9584 // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
9585 // It's fine because vmerge does not care mask policy.
9586 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, Mask, Vec, MaskedOff,
9587 MaskedOff, AVL);
9588 }
9589 }
9590
9591 // We need to convert the scalar to a splat vector.
9592 SDValue VL = getVLOperand(Op);
9593 assert(VL.getValueType() == XLenVT);
9594 ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
9595 return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
9596}
9597
9598// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
9599// scalable vector llvm.get.vector.length for now.
9600//
9601// We need to convert from a scalable VF to a vsetvli with VLMax equal to
9602// (vscale * VF). The vscale and VF are independent of element width. We use
9603// SEW=8 for the vsetvli because it is the only element width that supports all
9604// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
9605// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
9606// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
9607// SEW and LMUL are better for the surrounding vector instructions.
9609 const RISCVSubtarget &Subtarget) {
9610 MVT XLenVT = Subtarget.getXLenVT();
9611
9612 // The smallest LMUL is only valid for the smallest element width.
9613 const unsigned ElementWidth = 8;
9614
9615 // Determine the VF that corresponds to LMUL 1 for ElementWidth.
9616 unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
9617 // We don't support VF==1 with ELEN==32.
9618 [[maybe_unused]] unsigned MinVF =
9619 RISCV::RVVBitsPerBlock / Subtarget.getELen();
9620
9621 [[maybe_unused]] unsigned VF = N->getConstantOperandVal(2);
9622 assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
9623 "Unexpected VF");
9624
9625 bool Fractional = VF < LMul1VF;
9626 unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
9627 unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
9628 unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
9629
9630 SDLoc DL(N);
9631
9632 SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
9633 SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
9634
9635 SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
9636
9637 SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
9638 SDValue Res =
9639 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
9640 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
9641}
9642
9644 const RISCVSubtarget &Subtarget) {
9645 SDValue Op0 = N->getOperand(1);
9646 MVT OpVT = Op0.getSimpleValueType();
9647 MVT ContainerVT = OpVT;
9648 if (OpVT.isFixedLengthVector()) {
9649 ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget);
9650 Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
9651 }
9652 MVT XLenVT = Subtarget.getXLenVT();
9653 SDLoc DL(N);
9654 auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget);
9655 SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL);
9656 if (isOneConstant(N->getOperand(2)))
9657 return Res;
9658
9659 // Convert -1 to VL.
9660 SDValue Setcc =
9661 DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT);
9662 VL = DAG.getElementCount(DL, XLenVT, OpVT.getVectorElementCount());
9663 return DAG.getSelect(DL, XLenVT, Setcc, VL, Res);
9664}
9665
9666static inline void promoteVCIXScalar(const SDValue &Op,
9668 SelectionDAG &DAG) {
9669 const RISCVSubtarget &Subtarget =
9671
9672 bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
9673 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
9674 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
9675 SDLoc DL(Op);
9676
9678 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
9679 if (!II || !II->hasScalarOperand())
9680 return;
9681
9682 unsigned SplatOp = II->ScalarOperand + 1;
9683 assert(SplatOp < Op.getNumOperands());
9684
9685 SDValue &ScalarOp = Operands[SplatOp];
9686 MVT OpVT = ScalarOp.getSimpleValueType();
9687 MVT XLenVT = Subtarget.getXLenVT();
9688
9689 // The code below is partially copied from lowerVectorIntrinsicScalars.
9690 // If this isn't a scalar, or its type is XLenVT we're done.
9691 if (!OpVT.isScalarInteger() || OpVT == XLenVT)
9692 return;
9693
9694 // Manually emit promote operation for scalar operation.
9695 if (OpVT.bitsLT(XLenVT)) {
9696 unsigned ExtOpc =
9697 isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
9698 ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
9699 }
9700}
9701
9702static void processVCIXOperands(SDValue &OrigOp,
9704 SelectionDAG &DAG) {
9705 promoteVCIXScalar(OrigOp, Operands, DAG);
9706 const RISCVSubtarget &Subtarget =
9708 for (SDValue &V : Operands) {
9709 EVT ValType = V.getValueType();
9710 if (ValType.isVector() && ValType.isFloatingPoint()) {
9711 MVT InterimIVT =
9712 MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
9713 ValType.getVectorElementCount());
9714 V = DAG.getBitcast(InterimIVT, V);
9715 }
9716 if (ValType.isFixedLengthVector()) {
9717 MVT OpContainerVT = getContainerForFixedLengthVector(
9718 DAG, V.getSimpleValueType(), Subtarget);
9719 V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
9720 }
9721 }
9722}
9723
9724// LMUL * VLEN should be greater than or equal to EGS * SEW
9725static inline bool isValidEGW(int EGS, EVT VT,
9726 const RISCVSubtarget &Subtarget) {
9727 return (Subtarget.getRealMinVLen() *
9729 EGS * VT.getScalarSizeInBits();
9730}
9731
9732SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9733 SelectionDAG &DAG) const {
9734 unsigned IntNo = Op.getConstantOperandVal(0);
9735 SDLoc DL(Op);
9736 MVT XLenVT = Subtarget.getXLenVT();
9737
9738 switch (IntNo) {
9739 default:
9740 break; // Don't custom lower most intrinsics.
9741 case Intrinsic::riscv_tuple_insert: {
9742 SDValue Vec = Op.getOperand(1);
9743 SDValue SubVec = Op.getOperand(2);
9744 SDValue Index = Op.getOperand(3);
9745
9746 return DAG.getNode(RISCVISD::TUPLE_INSERT, DL, Op.getValueType(), Vec,
9747 SubVec, Index);
9748 }
9749 case Intrinsic::riscv_tuple_extract: {
9750 SDValue Vec = Op.getOperand(1);
9751 SDValue Index = Op.getOperand(2);
9752
9753 return DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, Op.getValueType(), Vec,
9754 Index);
9755 }
9756 case Intrinsic::thread_pointer: {
9757 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9758 return DAG.getRegister(RISCV::X4, PtrVT);
9759 }
9760 case Intrinsic::riscv_orc_b:
9761 case Intrinsic::riscv_brev8:
9762 case Intrinsic::riscv_sha256sig0:
9763 case Intrinsic::riscv_sha256sig1:
9764 case Intrinsic::riscv_sha256sum0:
9765 case Intrinsic::riscv_sha256sum1:
9766 case Intrinsic::riscv_sm3p0:
9767 case Intrinsic::riscv_sm3p1: {
9768 unsigned Opc;
9769 switch (IntNo) {
9770 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
9771 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
9772 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
9773 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
9774 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
9775 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
9776 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
9777 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
9778 }
9779
9780 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9781 }
9782 case Intrinsic::riscv_sm4ks:
9783 case Intrinsic::riscv_sm4ed: {
9784 unsigned Opc =
9785 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
9786
9787 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
9788 Op.getOperand(3));
9789 }
9790 case Intrinsic::riscv_zip:
9791 case Intrinsic::riscv_unzip: {
9792 unsigned Opc =
9793 IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
9794 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
9795 }
9796 case Intrinsic::riscv_mopr:
9797 return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
9798 Op.getOperand(2));
9799
9800 case Intrinsic::riscv_moprr: {
9801 return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
9802 Op.getOperand(2), Op.getOperand(3));
9803 }
9804 case Intrinsic::riscv_clmul:
9805 return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
9806 Op.getOperand(2));
9807 case Intrinsic::riscv_clmulh:
9808 case Intrinsic::riscv_clmulr: {
9809 unsigned Opc =
9810 IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH : RISCVISD::CLMULR;
9811 return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
9812 }
9813 case Intrinsic::experimental_get_vector_length:
9814 return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
9815 case Intrinsic::experimental_cttz_elts:
9816 return lowerCttzElts(Op.getNode(), DAG, Subtarget);
9817 case Intrinsic::riscv_vmv_x_s: {
9818 SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1));
9819 return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res);
9820 }
9821 case Intrinsic::riscv_vfmv_f_s:
9822 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
9823 Op.getOperand(1), DAG.getVectorIdxConstant(0, DL));
9824 case Intrinsic::riscv_vmv_v_x:
9825 return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
9826 Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
9827 Subtarget);
9828 case Intrinsic::riscv_vfmv_v_f:
9829 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
9830 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9831 case Intrinsic::riscv_vmv_s_x: {
9832 SDValue Scalar = Op.getOperand(2);
9833
9834 if (Scalar.getValueType().bitsLE(XLenVT)) {
9835 Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
9836 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
9837 Op.getOperand(1), Scalar, Op.getOperand(3));
9838 }
9839
9840 assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
9841
9842 // This is an i64 value that lives in two scalar registers. We have to
9843 // insert this in a convoluted way. First we build vXi64 splat containing
9844 // the two values that we assemble using some bit math. Next we'll use
9845 // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
9846 // to merge element 0 from our splat into the source vector.
9847 // FIXME: This is probably not the best way to do this, but it is
9848 // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
9849 // point.
9850 // sw lo, (a0)
9851 // sw hi, 4(a0)
9852 // vlse vX, (a0)
9853 //
9854 // vid.v vVid
9855 // vmseq.vx mMask, vVid, 0
9856 // vmerge.vvm vDest, vSrc, vVal, mMask
9857 MVT VT = Op.getSimpleValueType();
9858 SDValue Vec = Op.getOperand(1);
9859 SDValue VL = getVLOperand(Op);
9860
9861 SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
9862 if (Op.getOperand(1).isUndef())
9863 return SplattedVal;
9864 SDValue SplattedIdx =
9865 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
9866 DAG.getConstant(0, DL, MVT::i32), VL);
9867
9868 MVT MaskVT = getMaskTypeFor(VT);
9869 SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
9870 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
9871 SDValue SelectCond =
9872 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
9873 {VID, SplattedIdx, DAG.getCondCode(ISD::SETEQ),
9874 DAG.getUNDEF(MaskVT), Mask, VL});
9875 return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
9876 Vec, DAG.getUNDEF(VT), VL);
9877 }
9878 case Intrinsic::riscv_vfmv_s_f:
9879 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
9880 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
9881 // EGS * EEW >= 128 bits
9882 case Intrinsic::riscv_vaesdf_vv:
9883 case Intrinsic::riscv_vaesdf_vs:
9884 case Intrinsic::riscv_vaesdm_vv:
9885 case Intrinsic::riscv_vaesdm_vs:
9886 case Intrinsic::riscv_vaesef_vv:
9887 case Intrinsic::riscv_vaesef_vs:
9888 case Intrinsic::riscv_vaesem_vv:
9889 case Intrinsic::riscv_vaesem_vs:
9890 case Intrinsic::riscv_vaeskf1:
9891 case Intrinsic::riscv_vaeskf2:
9892 case Intrinsic::riscv_vaesz_vs:
9893 case Intrinsic::riscv_vsm4k:
9894 case Intrinsic::riscv_vsm4r_vv:
9895 case Intrinsic::riscv_vsm4r_vs: {
9896 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9897 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9898 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9899 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9900 return Op;
9901 }
9902 // EGS * EEW >= 256 bits
9903 case Intrinsic::riscv_vsm3c:
9904 case Intrinsic::riscv_vsm3me: {
9905 if (!isValidEGW(8, Op.getSimpleValueType(), Subtarget) ||
9906 !isValidEGW(8, Op->getOperand(1).getSimpleValueType(), Subtarget))
9907 report_fatal_error("EGW should be greater than or equal to 8 * SEW.");
9908 return Op;
9909 }
9910 // zvknha(SEW=32)/zvknhb(SEW=[32|64])
9911 case Intrinsic::riscv_vsha2ch:
9912 case Intrinsic::riscv_vsha2cl:
9913 case Intrinsic::riscv_vsha2ms: {
9914 if (Op->getSimpleValueType(0).getScalarSizeInBits() == 64 &&
9915 !Subtarget.hasStdExtZvknhb())
9916 report_fatal_error("SEW=64 needs Zvknhb to be enabled.");
9917 if (!isValidEGW(4, Op.getSimpleValueType(), Subtarget) ||
9918 !isValidEGW(4, Op->getOperand(1).getSimpleValueType(), Subtarget) ||
9919 !isValidEGW(4, Op->getOperand(2).getSimpleValueType(), Subtarget))
9920 report_fatal_error("EGW should be greater than or equal to 4 * SEW.");
9921 return Op;
9922 }
9923 case Intrinsic::riscv_sf_vc_v_x:
9924 case Intrinsic::riscv_sf_vc_v_i:
9925 case Intrinsic::riscv_sf_vc_v_xv:
9926 case Intrinsic::riscv_sf_vc_v_iv:
9927 case Intrinsic::riscv_sf_vc_v_vv:
9928 case Intrinsic::riscv_sf_vc_v_fv:
9929 case Intrinsic::riscv_sf_vc_v_xvv:
9930 case Intrinsic::riscv_sf_vc_v_ivv:
9931 case Intrinsic::riscv_sf_vc_v_vvv:
9932 case Intrinsic::riscv_sf_vc_v_fvv:
9933 case Intrinsic::riscv_sf_vc_v_xvw:
9934 case Intrinsic::riscv_sf_vc_v_ivw:
9935 case Intrinsic::riscv_sf_vc_v_vvw:
9936 case Intrinsic::riscv_sf_vc_v_fvw: {
9937 MVT VT = Op.getSimpleValueType();
9938
9939 SmallVector<SDValue> Operands{Op->op_values()};
9941
9942 MVT RetVT = VT;
9943 if (VT.isFixedLengthVector())
9945 else if (VT.isFloatingPoint())
9948
9949 SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
9950
9951 if (VT.isFixedLengthVector())
9952 NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
9953 else if (VT.isFloatingPoint())
9954 NewNode = DAG.getBitcast(VT, NewNode);
9955
9956 if (Op == NewNode)
9957 break;
9958
9959 return NewNode;
9960 }
9961 }
9962
9963 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
9964}
9965
9967 unsigned Type) {
9968 SDLoc DL(Op);
9969 SmallVector<SDValue> Operands{Op->op_values()};
9970 Operands.erase(Operands.begin() + 1);
9971
9972 const RISCVSubtarget &Subtarget =
9974 MVT VT = Op.getSimpleValueType();
9975 MVT RetVT = VT;
9976 MVT FloatVT = VT;
9977
9978 if (VT.isFloatingPoint()) {
9979 RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
9981 FloatVT = RetVT;
9982 }
9983 if (VT.isFixedLengthVector())
9985 Subtarget);
9986
9988
9989 SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
9990 SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
9991 SDValue Chain = NewNode.getValue(1);
9992
9993 if (VT.isFixedLengthVector())
9994 NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
9995 if (VT.isFloatingPoint())
9996 NewNode = DAG.getBitcast(VT, NewNode);
9997
9998 NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
9999
10000 return NewNode;
10001}
10002
10004 unsigned Type) {
10005 SmallVector<SDValue> Operands{Op->op_values()};
10006 Operands.erase(Operands.begin() + 1);
10008
10009 return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
10010}
10011
10012SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
10013 SelectionDAG &DAG) const {
10014 unsigned IntNo = Op.getConstantOperandVal(1);
10015 switch (IntNo) {
10016 default:
10017 break;
10018 case Intrinsic::riscv_seg2_load:
10019 case Intrinsic::riscv_seg3_load:
10020 case Intrinsic::riscv_seg4_load:
10021 case Intrinsic::riscv_seg5_load:
10022 case Intrinsic::riscv_seg6_load:
10023 case Intrinsic::riscv_seg7_load:
10024 case Intrinsic::riscv_seg8_load: {
10025 SDLoc DL(Op);
10026 static const Intrinsic::ID VlsegInts[7] = {
10027 Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
10028 Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
10029 Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
10030 Intrinsic::riscv_vlseg8};
10031 unsigned NF = Op->getNumValues() - 1;
10032 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10033 MVT XLenVT = Subtarget.getXLenVT();
10034 MVT VT = Op->getSimpleValueType(0);
10035 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10036 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10037 ContainerVT.getScalarSizeInBits();
10038 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10039
10040 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10041 SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
10042 auto *Load = cast<MemIntrinsicSDNode>(Op);
10043
10044 SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other});
10045 SDValue Ops[] = {
10046 Load->getChain(),
10047 IntID,
10048 DAG.getUNDEF(VecTupTy),
10049 Op.getOperand(2),
10050 VL,
10051 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10052 SDValue Result =
10054 Load->getMemoryVT(), Load->getMemOperand());
10056 for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) {
10057 SDValue SubVec =
10058 DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT,
10059 Result.getValue(0), DAG.getVectorIdxConstant(RetIdx, DL));
10060 Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget));
10061 }
10062 Results.push_back(Result.getValue(1));
10063 return DAG.getMergeValues(Results, DL);
10064 }
10065 case Intrinsic::riscv_sf_vc_v_x_se:
10067 case Intrinsic::riscv_sf_vc_v_i_se:
10069 case Intrinsic::riscv_sf_vc_v_xv_se:
10071 case Intrinsic::riscv_sf_vc_v_iv_se:
10073 case Intrinsic::riscv_sf_vc_v_vv_se:
10075 case Intrinsic::riscv_sf_vc_v_fv_se:
10077 case Intrinsic::riscv_sf_vc_v_xvv_se:
10079 case Intrinsic::riscv_sf_vc_v_ivv_se:
10081 case Intrinsic::riscv_sf_vc_v_vvv_se:
10083 case Intrinsic::riscv_sf_vc_v_fvv_se:
10085 case Intrinsic::riscv_sf_vc_v_xvw_se:
10087 case Intrinsic::riscv_sf_vc_v_ivw_se:
10089 case Intrinsic::riscv_sf_vc_v_vvw_se:
10091 case Intrinsic::riscv_sf_vc_v_fvw_se:
10093 }
10094
10095 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10096}
10097
10098SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10099 SelectionDAG &DAG) const {
10100 unsigned IntNo = Op.getConstantOperandVal(1);
10101 switch (IntNo) {
10102 default:
10103 break;
10104 case Intrinsic::riscv_seg2_store:
10105 case Intrinsic::riscv_seg3_store:
10106 case Intrinsic::riscv_seg4_store:
10107 case Intrinsic::riscv_seg5_store:
10108 case Intrinsic::riscv_seg6_store:
10109 case Intrinsic::riscv_seg7_store:
10110 case Intrinsic::riscv_seg8_store: {
10111 SDLoc DL(Op);
10112 static const Intrinsic::ID VssegInts[] = {
10113 Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
10114 Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
10115 Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
10116 Intrinsic::riscv_vsseg8};
10117 // Operands are (chain, int_id, vec*, ptr, vl)
10118 unsigned NF = Op->getNumOperands() - 4;
10119 assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
10120 MVT XLenVT = Subtarget.getXLenVT();
10121 MVT VT = Op->getOperand(2).getSimpleValueType();
10122 MVT ContainerVT = getContainerForFixedLengthVector(VT);
10123 unsigned Sz = NF * ContainerVT.getVectorMinNumElements() *
10124 ContainerVT.getScalarSizeInBits();
10125 EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF);
10126
10127 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
10128 SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
10129 SDValue Ptr = Op->getOperand(NF + 2);
10130
10131 auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
10132
10133 SDValue StoredVal = DAG.getUNDEF(VecTupTy);
10134 for (unsigned i = 0; i < NF; i++)
10135 StoredVal = DAG.getNode(
10136 RISCVISD::TUPLE_INSERT, DL, VecTupTy, StoredVal,
10138 ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget),
10139 DAG.getVectorIdxConstant(i, DL));
10140
10141 SDValue Ops[] = {
10142 FixedIntrinsic->getChain(),
10143 IntID,
10144 StoredVal,
10145 Ptr,
10146 VL,
10147 DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)};
10148
10149 return DAG.getMemIntrinsicNode(
10150 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
10151 FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
10152 }
10153 case Intrinsic::riscv_sf_vc_xv_se:
10155 case Intrinsic::riscv_sf_vc_iv_se:
10157 case Intrinsic::riscv_sf_vc_vv_se:
10159 case Intrinsic::riscv_sf_vc_fv_se:
10161 case Intrinsic::riscv_sf_vc_xvv_se:
10163 case Intrinsic::riscv_sf_vc_ivv_se:
10165 case Intrinsic::riscv_sf_vc_vvv_se:
10167 case Intrinsic::riscv_sf_vc_fvv_se:
10169 case Intrinsic::riscv_sf_vc_xvw_se:
10171 case Intrinsic::riscv_sf_vc_ivw_se:
10173 case Intrinsic::riscv_sf_vc_vvw_se:
10175 case Intrinsic::riscv_sf_vc_fvw_se:
10177 }
10178
10179 return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
10180}
10181
10182static unsigned getRVVReductionOp(unsigned ISDOpcode) {
10183 switch (ISDOpcode) {
10184 default:
10185 llvm_unreachable("Unhandled reduction");
10186 case ISD::VP_REDUCE_ADD:
10187 case ISD::VECREDUCE_ADD:
10189 case ISD::VP_REDUCE_UMAX:
10192 case ISD::VP_REDUCE_SMAX:
10195 case ISD::VP_REDUCE_UMIN:
10198 case ISD::VP_REDUCE_SMIN:
10201 case ISD::VP_REDUCE_AND:
10202 case ISD::VECREDUCE_AND:
10204 case ISD::VP_REDUCE_OR:
10205 case ISD::VECREDUCE_OR:
10207 case ISD::VP_REDUCE_XOR:
10208 case ISD::VECREDUCE_XOR:
10210 case ISD::VP_REDUCE_FADD:
10212 case ISD::VP_REDUCE_SEQ_FADD:
10214 case ISD::VP_REDUCE_FMAX:
10215 case ISD::VP_REDUCE_FMAXIMUM:
10217 case ISD::VP_REDUCE_FMIN:
10218 case ISD::VP_REDUCE_FMINIMUM:
10220 }
10221
10222}
10223
10224SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
10225 SelectionDAG &DAG,
10226 bool IsVP) const {
10227 SDLoc DL(Op);
10228 SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
10229 MVT VecVT = Vec.getSimpleValueType();
10230 assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
10231 Op.getOpcode() == ISD::VECREDUCE_OR ||
10232 Op.getOpcode() == ISD::VECREDUCE_XOR ||
10233 Op.getOpcode() == ISD::VP_REDUCE_AND ||
10234 Op.getOpcode() == ISD::VP_REDUCE_OR ||
10235 Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
10236 "Unexpected reduction lowering");
10237
10238 MVT XLenVT = Subtarget.getXLenVT();
10239
10240 MVT ContainerVT = VecVT;
10241 if (VecVT.isFixedLengthVector()) {
10242 ContainerVT = getContainerForFixedLengthVector(VecVT);
10243 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10244 }
10245
10246 SDValue Mask, VL;
10247 if (IsVP) {
10248 Mask = Op.getOperand(2);
10249 VL = Op.getOperand(3);
10250 } else {
10251 std::tie(Mask, VL) =
10252 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10253 }
10254
10256 switch (Op.getOpcode()) {
10257 default:
10258 llvm_unreachable("Unhandled reduction");
10259 case ISD::VECREDUCE_AND:
10260 case ISD::VP_REDUCE_AND: {
10261 // vcpop ~x == 0
10262 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
10263 if (IsVP || VecVT.isFixedLengthVector())
10264 Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
10265 else
10266 Vec = DAG.getNode(ISD::XOR, DL, ContainerVT, Vec, TrueMask);
10267 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10268 CC = ISD::SETEQ;
10269 break;
10270 }
10271 case ISD::VECREDUCE_OR:
10272 case ISD::VP_REDUCE_OR:
10273 // vcpop x != 0
10274 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10275 CC = ISD::SETNE;
10276 break;
10277 case ISD::VECREDUCE_XOR:
10278 case ISD::VP_REDUCE_XOR: {
10279 // ((vcpop x) & 1) != 0
10280 SDValue One = DAG.getConstant(1, DL, XLenVT);
10281 Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
10282 Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
10283 CC = ISD::SETNE;
10284 break;
10285 }
10286 }
10287
10288 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
10289 SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
10290 SetCC = DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), SetCC);
10291
10292 if (!IsVP)
10293 return SetCC;
10294
10295 // Now include the start value in the operation.
10296 // Note that we must return the start value when no elements are operated
10297 // upon. The vcpop instructions we've emitted in each case above will return
10298 // 0 for an inactive vector, and so we've already received the neutral value:
10299 // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
10300 // can simply include the start value.
10301 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10302 return DAG.getNode(BaseOpc, DL, Op.getValueType(), SetCC, Op.getOperand(0));
10303}
10304
10305static bool isNonZeroAVL(SDValue AVL) {
10306 auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
10307 auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
10308 return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
10309 (ImmAVL && ImmAVL->getZExtValue() >= 1);
10310}
10311
10312/// Helper to lower a reduction sequence of the form:
10313/// scalar = reduce_op vec, scalar_start
10314static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
10315 SDValue StartValue, SDValue Vec, SDValue Mask,
10316 SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
10317 const RISCVSubtarget &Subtarget) {
10318 const MVT VecVT = Vec.getSimpleValueType();
10319 const MVT M1VT = getLMUL1VT(VecVT);
10320 const MVT XLenVT = Subtarget.getXLenVT();
10321 const bool NonZeroAVL = isNonZeroAVL(VL);
10322
10323 // The reduction needs an LMUL1 input; do the splat at either LMUL1
10324 // or the original VT if fractional.
10325 auto InnerVT = VecVT.bitsLE(M1VT) ? VecVT : M1VT;
10326 // We reuse the VL of the reduction to reduce vsetvli toggles if we can
10327 // prove it is non-zero. For the AVL=0 case, we need the scalar to
10328 // be the result of the reduction operation.
10329 auto InnerVL = NonZeroAVL ? VL : DAG.getConstant(1, DL, XLenVT);
10330 SDValue InitialValue = lowerScalarInsert(StartValue, InnerVL, InnerVT, DL,
10331 DAG, Subtarget);
10332 if (M1VT != InnerVT)
10333 InitialValue =
10334 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, M1VT, DAG.getUNDEF(M1VT),
10335 InitialValue, DAG.getVectorIdxConstant(0, DL));
10336 SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
10337 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
10338 SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
10339 SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
10340 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
10341 DAG.getVectorIdxConstant(0, DL));
10342}
10343
10344SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
10345 SelectionDAG &DAG) const {
10346 SDLoc DL(Op);
10347 SDValue Vec = Op.getOperand(0);
10348 EVT VecEVT = Vec.getValueType();
10349
10350 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
10351
10352 // Due to ordering in legalize types we may have a vector type that needs to
10353 // be split. Do that manually so we can get down to a legal type.
10354 while (getTypeAction(*DAG.getContext(), VecEVT) ==
10356 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
10357 VecEVT = Lo.getValueType();
10358 Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
10359 }
10360
10361 // TODO: The type may need to be widened rather than split. Or widened before
10362 // it can be split.
10363 if (!isTypeLegal(VecEVT))
10364 return SDValue();
10365
10366 MVT VecVT = VecEVT.getSimpleVT();
10367 MVT VecEltVT = VecVT.getVectorElementType();
10368 unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
10369
10370 MVT ContainerVT = VecVT;
10371 if (VecVT.isFixedLengthVector()) {
10372 ContainerVT = getContainerForFixedLengthVector(VecVT);
10373 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10374 }
10375
10376 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10377
10378 SDValue StartV = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
10379 switch (BaseOpc) {
10380 case ISD::AND:
10381 case ISD::OR:
10382 case ISD::UMAX:
10383 case ISD::UMIN:
10384 case ISD::SMAX:
10385 case ISD::SMIN:
10386 StartV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Vec,
10387 DAG.getVectorIdxConstant(0, DL));
10388 }
10389 return lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), StartV, Vec,
10390 Mask, VL, DL, DAG, Subtarget);
10391}
10392
10393// Given a reduction op, this function returns the matching reduction opcode,
10394// the vector SDValue and the scalar SDValue required to lower this to a
10395// RISCVISD node.
10396static std::tuple<unsigned, SDValue, SDValue>
10398 const RISCVSubtarget &Subtarget) {
10399 SDLoc DL(Op);
10400 auto Flags = Op->getFlags();
10401 unsigned Opcode = Op.getOpcode();
10402 switch (Opcode) {
10403 default:
10404 llvm_unreachable("Unhandled reduction");
10405 case ISD::VECREDUCE_FADD: {
10406 // Use positive zero if we can. It is cheaper to materialize.
10407 SDValue Zero =
10408 DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
10409 return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
10410 }
10412 return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
10413 Op.getOperand(0));
10417 case ISD::VECREDUCE_FMAX: {
10418 SDValue Front =
10419 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op.getOperand(0),
10420 DAG.getVectorIdxConstant(0, DL));
10421 unsigned RVVOpc =
10422 (Opcode == ISD::VECREDUCE_FMIN || Opcode == ISD::VECREDUCE_FMINIMUM)
10425 return std::make_tuple(RVVOpc, Op.getOperand(0), Front);
10426 }
10427 }
10428}
10429
10430SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
10431 SelectionDAG &DAG) const {
10432 SDLoc DL(Op);
10433 MVT VecEltVT = Op.getSimpleValueType();
10434
10435 unsigned RVVOpcode;
10436 SDValue VectorVal, ScalarVal;
10437 std::tie(RVVOpcode, VectorVal, ScalarVal) =
10438 getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT, Subtarget);
10439 MVT VecVT = VectorVal.getSimpleValueType();
10440
10441 MVT ContainerVT = VecVT;
10442 if (VecVT.isFixedLengthVector()) {
10443 ContainerVT = getContainerForFixedLengthVector(VecVT);
10444 VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
10445 }
10446
10447 MVT ResVT = Op.getSimpleValueType();
10448 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
10449 SDValue Res = lowerReductionSeq(RVVOpcode, ResVT, ScalarVal, VectorVal, Mask,
10450 VL, DL, DAG, Subtarget);
10451 if (Op.getOpcode() != ISD::VECREDUCE_FMINIMUM &&
10452 Op.getOpcode() != ISD::VECREDUCE_FMAXIMUM)
10453 return Res;
10454
10455 if (Op->getFlags().hasNoNaNs())
10456 return Res;
10457
10458 // Force output to NaN if any element is Nan.
10459 SDValue IsNan =
10460 DAG.getNode(RISCVISD::SETCC_VL, DL, Mask.getValueType(),
10461 {VectorVal, VectorVal, DAG.getCondCode(ISD::SETNE),
10462 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
10463 MVT XLenVT = Subtarget.getXLenVT();
10464 SDValue CPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNan, Mask, VL);
10465 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, CPop,
10466 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10467 return DAG.getSelect(
10468 DL, ResVT, NoNaNs, Res,
10469 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10470}
10471
10472SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
10473 SelectionDAG &DAG) const {
10474 SDLoc DL(Op);
10475 unsigned Opc = Op.getOpcode();
10476 SDValue Start = Op.getOperand(0);
10477 SDValue Vec = Op.getOperand(1);
10478 EVT VecEVT = Vec.getValueType();
10479 MVT XLenVT = Subtarget.getXLenVT();
10480
10481 // TODO: The type may need to be widened rather than split. Or widened before
10482 // it can be split.
10483 if (!isTypeLegal(VecEVT))
10484 return SDValue();
10485
10486 MVT VecVT = VecEVT.getSimpleVT();
10487 unsigned RVVOpcode = getRVVReductionOp(Opc);
10488
10489 if (VecVT.isFixedLengthVector()) {
10490 auto ContainerVT = getContainerForFixedLengthVector(VecVT);
10491 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10492 }
10493
10494 SDValue VL = Op.getOperand(3);
10495 SDValue Mask = Op.getOperand(2);
10496 SDValue Res =
10497 lowerReductionSeq(RVVOpcode, Op.getSimpleValueType(), Op.getOperand(0),
10498 Vec, Mask, VL, DL, DAG, Subtarget);
10499 if ((Opc != ISD::VP_REDUCE_FMINIMUM && Opc != ISD::VP_REDUCE_FMAXIMUM) ||
10500 Op->getFlags().hasNoNaNs())
10501 return Res;
10502
10503 // Propagate NaNs.
10504 MVT PredVT = getMaskTypeFor(Vec.getSimpleValueType());
10505 // Check if any of the elements in Vec is NaN.
10506 SDValue IsNaN = DAG.getNode(
10507 RISCVISD::SETCC_VL, DL, PredVT,
10508 {Vec, Vec, DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(PredVT), Mask, VL});
10509 SDValue VCPop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, IsNaN, Mask, VL);
10510 // Check if the start value is NaN.
10511 SDValue StartIsNaN = DAG.getSetCC(DL, XLenVT, Start, Start, ISD::SETUO);
10512 VCPop = DAG.getNode(ISD::OR, DL, XLenVT, VCPop, StartIsNaN);
10513 SDValue NoNaNs = DAG.getSetCC(DL, XLenVT, VCPop,
10514 DAG.getConstant(0, DL, XLenVT), ISD::SETEQ);
10515 MVT ResVT = Res.getSimpleValueType();
10516 return DAG.getSelect(
10517 DL, ResVT, NoNaNs, Res,
10518 DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
10519}
10520
10521SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
10522 SelectionDAG &DAG) const {
10523 SDValue Vec = Op.getOperand(0);
10524 SDValue SubVec = Op.getOperand(1);
10525 MVT VecVT = Vec.getSimpleValueType();
10526 MVT SubVecVT = SubVec.getSimpleValueType();
10527
10528 SDLoc DL(Op);
10529 MVT XLenVT = Subtarget.getXLenVT();
10530 unsigned OrigIdx = Op.getConstantOperandVal(2);
10531 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10532
10533 if (OrigIdx == 0 && Vec.isUndef())
10534 return Op;
10535
10536 // We don't have the ability to slide mask vectors up indexed by their i1
10537 // elements; the smallest we can do is i8. Often we are able to bitcast to
10538 // equivalent i8 vectors. Note that when inserting a fixed-length vector
10539 // into a scalable one, we might not necessarily have enough scalable
10540 // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
10541 if (SubVecVT.getVectorElementType() == MVT::i1) {
10542 if (VecVT.getVectorMinNumElements() >= 8 &&
10543 SubVecVT.getVectorMinNumElements() >= 8) {
10544 assert(OrigIdx % 8 == 0 && "Invalid index");
10545 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10546 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10547 "Unexpected mask vector lowering");
10548 OrigIdx /= 8;
10549 SubVecVT =
10550 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10551 SubVecVT.isScalableVector());
10552 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10553 VecVT.isScalableVector());
10554 Vec = DAG.getBitcast(VecVT, Vec);
10555 SubVec = DAG.getBitcast(SubVecVT, SubVec);
10556 } else {
10557 // We can't slide this mask vector up indexed by its i1 elements.
10558 // This poses a problem when we wish to insert a scalable vector which
10559 // can't be re-expressed as a larger type. Just choose the slow path and
10560 // extend to a larger type, then truncate back down.
10561 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10562 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10563 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10564 SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
10565 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
10566 Op.getOperand(2));
10567 SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
10568 return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
10569 }
10570 }
10571
10572 // If the subvector vector is a fixed-length type and we don't know VLEN
10573 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10574 // don't know which register of a LMUL group contains the specific subvector
10575 // as we only know the minimum register size. Therefore we must slide the
10576 // vector group up the full amount.
10577 const auto VLen = Subtarget.getRealVLen();
10578 if (SubVecVT.isFixedLengthVector() && !VLen) {
10579 MVT ContainerVT = VecVT;
10580 if (VecVT.isFixedLengthVector()) {
10581 ContainerVT = getContainerForFixedLengthVector(VecVT);
10582 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10583 }
10584
10585 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
10586 DAG.getUNDEF(ContainerVT), SubVec,
10587 DAG.getVectorIdxConstant(0, DL));
10588
10589 SDValue Mask =
10590 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10591 // Set the vector length to only the number of elements we care about. Note
10592 // that for slideup this includes the offset.
10593 unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
10594 SDValue VL = DAG.getConstant(EndIndex, DL, XLenVT);
10595
10596 // Use tail agnostic policy if we're inserting over Vec's tail.
10598 if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
10599 Policy = RISCVII::TAIL_AGNOSTIC;
10600
10601 // If we're inserting into the lowest elements, use a tail undisturbed
10602 // vmv.v.v.
10603 if (OrigIdx == 0) {
10604 SubVec =
10605 DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
10606 } else {
10607 SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10608 SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
10609 SlideupAmt, Mask, VL, Policy);
10610 }
10611
10612 if (VecVT.isFixedLengthVector())
10613 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10614 return DAG.getBitcast(Op.getValueType(), SubVec);
10615 }
10616
10617 MVT ContainerVecVT = VecVT;
10618 if (VecVT.isFixedLengthVector()) {
10619 ContainerVecVT = getContainerForFixedLengthVector(VecVT);
10620 Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
10621 }
10622
10623 MVT ContainerSubVecVT = SubVecVT;
10624 if (SubVecVT.isFixedLengthVector()) {
10625 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10626 SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
10627 }
10628
10629 unsigned SubRegIdx;
10630 ElementCount RemIdx;
10631 // insert_subvector scales the index by vscale if the subvector is scalable,
10632 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10633 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10634 if (SubVecVT.isFixedLengthVector()) {
10635 assert(VLen);
10636 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10637 auto Decompose =
10639 ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10640 SubRegIdx = Decompose.first;
10641 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10642 (OrigIdx % Vscale));
10643 } else {
10644 auto Decompose =
10646 ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
10647 SubRegIdx = Decompose.first;
10648 RemIdx = ElementCount::getScalable(Decompose.second);
10649 }
10650
10653 Subtarget.expandVScale(SubVecVT.getSizeInBits()).getKnownMinValue()));
10654 bool ExactlyVecRegSized =
10655 Subtarget.expandVScale(SubVecVT.getSizeInBits())
10656 .isKnownMultipleOf(Subtarget.expandVScale(VecRegSize));
10657
10658 // 1. If the Idx has been completely eliminated and this subvector's size is
10659 // a vector register or a multiple thereof, or the surrounding elements are
10660 // undef, then this is a subvector insert which naturally aligns to a vector
10661 // register. These can easily be handled using subregister manipulation.
10662 // 2. If the subvector isn't an exact multiple of a valid register group size,
10663 // then the insertion must preserve the undisturbed elements of the register.
10664 // We do this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1
10665 // vector type (which resolves to a subregister copy), performing a VSLIDEUP
10666 // to place the subvector within the vector register, and an INSERT_SUBVECTOR
10667 // of that LMUL=1 type back into the larger vector (resolving to another
10668 // subregister operation). See below for how our VSLIDEUP works. We go via a
10669 // LMUL=1 type to avoid allocating a large register group to hold our
10670 // subvector.
10671 if (RemIdx.isZero() && (ExactlyVecRegSized || Vec.isUndef())) {
10672 if (SubVecVT.isFixedLengthVector()) {
10673 // We may get NoSubRegister if inserting at index 0 and the subvec
10674 // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
10675 if (SubRegIdx == RISCV::NoSubRegister) {
10676 assert(OrigIdx == 0);
10677 return Op;
10678 }
10679
10680 // Use a insert_subvector that will resolve to an insert subreg.
10681 assert(VLen);
10682 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10683 SDValue Insert =
10684 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10685 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10686 if (VecVT.isFixedLengthVector())
10687 Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
10688 return Insert;
10689 }
10690 return Op;
10691 }
10692
10693 // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
10694 // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
10695 // (in our case undisturbed). This means we can set up a subvector insertion
10696 // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
10697 // size of the subvector.
10698 MVT InterSubVT = ContainerVecVT;
10699 SDValue AlignedExtract = Vec;
10700 unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
10701 if (SubVecVT.isFixedLengthVector()) {
10702 assert(VLen);
10703 AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
10704 }
10705 if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
10706 InterSubVT = getLMUL1VT(ContainerVecVT);
10707 // Extract a subvector equal to the nearest full vector register type. This
10708 // should resolve to a EXTRACT_SUBREG instruction.
10709 AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10710 DAG.getVectorIdxConstant(AlignedIdx, DL));
10711 }
10712
10713 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
10714 DAG.getUNDEF(InterSubVT), SubVec,
10715 DAG.getVectorIdxConstant(0, DL));
10716
10717 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
10718
10719 ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
10720 VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
10721
10722 // Use tail agnostic policy if we're inserting over InterSubVT's tail.
10724 if (Subtarget.expandVScale(EndIndex) ==
10725 Subtarget.expandVScale(InterSubVT.getVectorElementCount()))
10726 Policy = RISCVII::TAIL_AGNOSTIC;
10727
10728 // If we're inserting into the lowest elements, use a tail undisturbed
10729 // vmv.v.v.
10730 if (RemIdx.isZero()) {
10731 SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
10732 SubVec, VL);
10733 } else {
10734 SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10735
10736 // Construct the vector length corresponding to RemIdx + length(SubVecVT).
10737 VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
10738
10739 SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
10740 SlideupAmt, Mask, VL, Policy);
10741 }
10742
10743 // If required, insert this subvector back into the correct vector register.
10744 // This should resolve to an INSERT_SUBREG instruction.
10745 if (ContainerVecVT.bitsGT(InterSubVT))
10746 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
10747 DAG.getVectorIdxConstant(AlignedIdx, DL));
10748
10749 if (VecVT.isFixedLengthVector())
10750 SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
10751
10752 // We might have bitcast from a mask type: cast back to the original type if
10753 // required.
10754 return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
10755}
10756
10757SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
10758 SelectionDAG &DAG) const {
10759 SDValue Vec = Op.getOperand(0);
10760 MVT SubVecVT = Op.getSimpleValueType();
10761 MVT VecVT = Vec.getSimpleValueType();
10762
10763 SDLoc DL(Op);
10764 MVT XLenVT = Subtarget.getXLenVT();
10765 unsigned OrigIdx = Op.getConstantOperandVal(1);
10766 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
10767
10768 // With an index of 0 this is a cast-like subvector, which can be performed
10769 // with subregister operations.
10770 if (OrigIdx == 0)
10771 return Op;
10772
10773 // We don't have the ability to slide mask vectors down indexed by their i1
10774 // elements; the smallest we can do is i8. Often we are able to bitcast to
10775 // equivalent i8 vectors. Note that when extracting a fixed-length vector
10776 // from a scalable one, we might not necessarily have enough scalable
10777 // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
10778 if (SubVecVT.getVectorElementType() == MVT::i1) {
10779 if (VecVT.getVectorMinNumElements() >= 8 &&
10780 SubVecVT.getVectorMinNumElements() >= 8) {
10781 assert(OrigIdx % 8 == 0 && "Invalid index");
10782 assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
10783 SubVecVT.getVectorMinNumElements() % 8 == 0 &&
10784 "Unexpected mask vector lowering");
10785 OrigIdx /= 8;
10786 SubVecVT =
10787 MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
10788 SubVecVT.isScalableVector());
10789 VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
10790 VecVT.isScalableVector());
10791 Vec = DAG.getBitcast(VecVT, Vec);
10792 } else {
10793 // We can't slide this mask vector down, indexed by its i1 elements.
10794 // This poses a problem when we wish to extract a scalable vector which
10795 // can't be re-expressed as a larger type. Just choose the slow path and
10796 // extend to a larger type, then truncate back down.
10797 // TODO: We could probably improve this when extracting certain fixed
10798 // from fixed, where we can extract as i8 and shift the correct element
10799 // right to reach the desired subvector?
10800 MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
10801 MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
10802 Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
10803 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
10804 Op.getOperand(1));
10805 SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
10806 return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
10807 }
10808 }
10809
10810 const auto VLen = Subtarget.getRealVLen();
10811
10812 // If the subvector vector is a fixed-length type and we don't know VLEN
10813 // exactly, we cannot use subregister manipulation to simplify the codegen; we
10814 // don't know which register of a LMUL group contains the specific subvector
10815 // as we only know the minimum register size. Therefore we must slide the
10816 // vector group down the full amount.
10817 if (SubVecVT.isFixedLengthVector() && !VLen) {
10818 MVT ContainerVT = VecVT;
10819 if (VecVT.isFixedLengthVector()) {
10820 ContainerVT = getContainerForFixedLengthVector(VecVT);
10821 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
10822 }
10823
10824 // Shrink down Vec so we're performing the slidedown on a smaller LMUL.
10825 unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
10826 if (auto ShrunkVT =
10827 getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
10828 ContainerVT = *ShrunkVT;
10829 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
10830 DAG.getVectorIdxConstant(0, DL));
10831 }
10832
10833 SDValue Mask =
10834 getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
10835 // Set the vector length to only the number of elements we care about. This
10836 // avoids sliding down elements we're going to discard straight away.
10837 SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10838 SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
10839 SDValue Slidedown =
10840 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
10841 DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
10842 // Now we can use a cast-like subvector extract to get the result.
10843 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10844 DAG.getVectorIdxConstant(0, DL));
10845 return DAG.getBitcast(Op.getValueType(), Slidedown);
10846 }
10847
10848 if (VecVT.isFixedLengthVector()) {
10849 VecVT = getContainerForFixedLengthVector(VecVT);
10850 Vec = convertToScalableVector(VecVT, Vec, DAG, Subtarget);
10851 }
10852
10853 MVT ContainerSubVecVT = SubVecVT;
10854 if (SubVecVT.isFixedLengthVector())
10855 ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
10856
10857 unsigned SubRegIdx;
10858 ElementCount RemIdx;
10859 // extract_subvector scales the index by vscale if the subvector is scalable,
10860 // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
10861 // we have a fixed length subvector, we need to adjust the index by 1/vscale.
10862 if (SubVecVT.isFixedLengthVector()) {
10863 assert(VLen);
10864 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10865 auto Decompose =
10867 VecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
10868 SubRegIdx = Decompose.first;
10869 RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
10870 (OrigIdx % Vscale));
10871 } else {
10872 auto Decompose =
10874 VecVT, ContainerSubVecVT, OrigIdx, TRI);
10875 SubRegIdx = Decompose.first;
10876 RemIdx = ElementCount::getScalable(Decompose.second);
10877 }
10878
10879 // If the Idx has been completely eliminated then this is a subvector extract
10880 // which naturally aligns to a vector register. These can easily be handled
10881 // using subregister manipulation. We use an extract_subvector that will
10882 // resolve to an extract subreg.
10883 if (RemIdx.isZero()) {
10884 if (SubVecVT.isFixedLengthVector()) {
10885 assert(VLen);
10886 unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
10887 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerSubVecVT, Vec,
10888 DAG.getConstant(OrigIdx / Vscale, DL, XLenVT));
10889 return convertFromScalableVector(SubVecVT, Vec, DAG, Subtarget);
10890 }
10891 return Op;
10892 }
10893
10894 // Else SubVecVT is M1 or smaller and may need to be slid down: if SubVecVT
10895 // was > M1 then the index would need to be a multiple of VLMAX, and so would
10896 // divide exactly.
10897 assert(RISCVVType::decodeVLMUL(getLMUL(ContainerSubVecVT)).second ||
10898 getLMUL(ContainerSubVecVT) == RISCVII::VLMUL::LMUL_1);
10899
10900 // If the vector type is an LMUL-group type, extract a subvector equal to the
10901 // nearest full vector register type.
10902 MVT InterSubVT = VecVT;
10903 if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
10904 // If VecVT has an LMUL > 1, then SubVecVT should have a smaller LMUL, and
10905 // we should have successfully decomposed the extract into a subregister.
10906 // We use an extract_subvector that will resolve to a subreg extract.
10907 assert(SubRegIdx != RISCV::NoSubRegister);
10908 (void)SubRegIdx;
10909 unsigned Idx = OrigIdx - RemIdx.getKnownMinValue();
10910 if (SubVecVT.isFixedLengthVector()) {
10911 assert(VLen);
10912 Idx /= *VLen / RISCV::RVVBitsPerBlock;
10913 }
10914 InterSubVT = getLMUL1VT(VecVT);
10915 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
10916 DAG.getConstant(Idx, DL, XLenVT));
10917 }
10918
10919 // Slide this vector register down by the desired number of elements in order
10920 // to place the desired subvector starting at element 0.
10921 SDValue SlidedownAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
10922 auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
10923 if (SubVecVT.isFixedLengthVector())
10924 VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
10925 SDValue Slidedown =
10926 getVSlidedown(DAG, Subtarget, DL, InterSubVT, DAG.getUNDEF(InterSubVT),
10927 Vec, SlidedownAmt, Mask, VL);
10928
10929 // Now the vector is in the right position, extract our final subvector. This
10930 // should resolve to a COPY.
10931 Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
10932 DAG.getVectorIdxConstant(0, DL));
10933
10934 // We might have bitcast from a mask type: cast back to the original type if
10935 // required.
10936 return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
10937}
10938
10939// Widen a vector's operands to i8, then truncate its results back to the
10940// original type, typically i1. All operand and result types must be the same.
10942 SelectionDAG &DAG) {
10943 MVT VT = N.getSimpleValueType();
10944 MVT WideVT = VT.changeVectorElementType(MVT::i8);
10946 for (SDValue Op : N->ops()) {
10947 assert(Op.getSimpleValueType() == VT &&
10948 "Operands and result must be same type");
10949 WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
10950 }
10951
10952 unsigned NumVals = N->getNumValues();
10953
10955 NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
10956 SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
10957 SmallVector<SDValue, 4> TruncVals;
10958 for (unsigned I = 0; I < NumVals; I++) {
10959 TruncVals.push_back(
10960 DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
10961 DAG.getConstant(0, DL, WideVT), ISD::SETNE));
10962 }
10963
10964 if (TruncVals.size() > 1)
10965 return DAG.getMergeValues(TruncVals, DL);
10966 return TruncVals.front();
10967}
10968
10969SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
10970 SelectionDAG &DAG) const {
10971 SDLoc DL(Op);
10972 MVT VecVT = Op.getSimpleValueType();
10973
10974 assert(VecVT.isScalableVector() &&
10975 "vector_interleave on non-scalable vector!");
10976
10977 // 1 bit element vectors need to be widened to e8
10978 if (VecVT.getVectorElementType() == MVT::i1)
10979 return widenVectorOpsToi8(Op, DL, DAG);
10980
10981 // If the VT is LMUL=8, we need to split and reassemble.
10982 if (VecVT.getSizeInBits().getKnownMinValue() ==
10983 (8 * RISCV::RVVBitsPerBlock)) {
10984 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
10985 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
10986 EVT SplitVT = Op0Lo.getValueType();
10987
10989 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
10991 DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
10992
10993 SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
10994 ResLo.getValue(0), ResHi.getValue(0));
10995 SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
10996 ResHi.getValue(1));
10997 return DAG.getMergeValues({Even, Odd}, DL);
10998 }
10999
11000 // Concatenate the two vectors as one vector to deinterleave
11001 MVT ConcatVT =
11004 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11005 Op.getOperand(0), Op.getOperand(1));
11006
11007 // We can deinterleave through vnsrl.wi if the element type is smaller than
11008 // ELEN
11009 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11010 SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG);
11011 SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG);
11012 return DAG.getMergeValues({Even, Odd}, DL);
11013 }
11014
11015 // For the indices, use the vmv.v.x of an i8 constant to fill the largest
11016 // possibly mask vector, then extract the required subvector. Doing this
11017 // (instead of a vid, vmsne sequence) reduces LMUL, and allows the mask
11018 // creation to be rematerialized during register allocation to reduce
11019 // register pressure if needed.
11020
11021 MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
11022
11023 SDValue EvenSplat = DAG.getConstant(0b01010101, DL, MVT::nxv8i8);
11024 EvenSplat = DAG.getBitcast(MVT::nxv64i1, EvenSplat);
11025 SDValue EvenMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, EvenSplat,
11026 DAG.getVectorIdxConstant(0, DL));
11027
11028 SDValue OddSplat = DAG.getConstant(0b10101010, DL, MVT::nxv8i8);
11029 OddSplat = DAG.getBitcast(MVT::nxv64i1, OddSplat);
11030 SDValue OddMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskVT, OddSplat,
11031 DAG.getVectorIdxConstant(0, DL));
11032
11033 // vcompress the even and odd elements into two separate vectors
11034 SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11035 EvenMask, DAG.getUNDEF(ConcatVT));
11036 SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
11037 OddMask, DAG.getUNDEF(ConcatVT));
11038
11039 // Extract the result half of the gather for even and odd
11040 SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
11041 DAG.getVectorIdxConstant(0, DL));
11042 SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
11043 DAG.getVectorIdxConstant(0, DL));
11044
11045 return DAG.getMergeValues({Even, Odd}, DL);
11046}
11047
11048SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
11049 SelectionDAG &DAG) const {
11050 SDLoc DL(Op);
11051 MVT VecVT = Op.getSimpleValueType();
11052
11053 assert(VecVT.isScalableVector() &&
11054 "vector_interleave on non-scalable vector!");
11055
11056 // i1 vectors need to be widened to i8
11057 if (VecVT.getVectorElementType() == MVT::i1)
11058 return widenVectorOpsToi8(Op, DL, DAG);
11059
11060 MVT XLenVT = Subtarget.getXLenVT();
11061 SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
11062
11063 // If the VT is LMUL=8, we need to split and reassemble.
11064 if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
11065 auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11066 auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
11067 EVT SplitVT = Op0Lo.getValueType();
11068
11070 DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
11072 DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
11073
11074 SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11075 ResLo.getValue(0), ResLo.getValue(1));
11076 SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
11077 ResHi.getValue(0), ResHi.getValue(1));
11078 return DAG.getMergeValues({Lo, Hi}, DL);
11079 }
11080
11081 SDValue Interleaved;
11082
11083 // If the element type is smaller than ELEN, then we can interleave with
11084 // vwaddu.vv and vwmaccu.vx
11085 if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
11086 Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
11087 DAG, Subtarget);
11088 } else {
11089 // Otherwise, fallback to using vrgathere16.vv
11090 MVT ConcatVT =
11093 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
11094 Op.getOperand(0), Op.getOperand(1));
11095
11096 MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
11097
11098 // 0 1 2 3 4 5 6 7 ...
11099 SDValue StepVec = DAG.getStepVector(DL, IdxVT);
11100
11101 // 1 1 1 1 1 1 1 1 ...
11102 SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
11103
11104 // 1 0 1 0 1 0 1 0 ...
11105 SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
11106 OddMask = DAG.getSetCC(
11107 DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
11108 DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
11110
11111 SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
11112
11113 // Build up the index vector for interleaving the concatenated vector
11114 // 0 0 1 1 2 2 3 3 ...
11115 SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
11116 // 0 n 1 n+1 2 n+2 3 n+3 ...
11117 Idx =
11118 DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
11119
11120 // Then perform the interleave
11121 // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
11122 SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
11123 Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
11124 Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
11125 }
11126
11127 // Extract the two halves from the interleaved result
11128 SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11129 DAG.getVectorIdxConstant(0, DL));
11130 SDValue Hi = DAG.getNode(
11131 ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
11133
11134 return DAG.getMergeValues({Lo, Hi}, DL);
11135}
11136
11137// Lower step_vector to the vid instruction. Any non-identity step value must
11138// be accounted for my manual expansion.
11139SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
11140 SelectionDAG &DAG) const {
11141 SDLoc DL(Op);
11142 MVT VT = Op.getSimpleValueType();
11143 assert(VT.isScalableVector() && "Expected scalable vector");
11144 MVT XLenVT = Subtarget.getXLenVT();
11145 auto [Mask, VL] = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
11146 SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
11147 uint64_t StepValImm = Op.getConstantOperandVal(0);
11148 if (StepValImm != 1) {
11149 if (isPowerOf2_64(StepValImm)) {
11150 SDValue StepVal =
11151 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
11152 DAG.getConstant(Log2_64(StepValImm), DL, XLenVT), VL);
11153 StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
11154 } else {
11155 SDValue StepVal = lowerScalarSplat(
11156 SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
11157 VL, VT, DL, DAG, Subtarget);
11158 StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
11159 }
11160 }
11161 return StepVec;
11162}
11163
11164// Implement vector_reverse using vrgather.vv with indices determined by
11165// subtracting the id of each element from (VLMAX-1). This will convert
11166// the indices like so:
11167// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
11168// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
11169SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
11170 SelectionDAG &DAG) const {
11171 SDLoc DL(Op);
11172 MVT VecVT = Op.getSimpleValueType();
11173 if (VecVT.getVectorElementType() == MVT::i1) {
11174 MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
11175 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
11176 SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
11177 return DAG.getSetCC(DL, VecVT, Op2,
11178 DAG.getConstant(0, DL, Op2.getValueType()), ISD::SETNE);
11179 }
11180
11181 MVT ContainerVT = VecVT;
11182 SDValue Vec = Op.getOperand(0);
11183 if (VecVT.isFixedLengthVector()) {
11184 ContainerVT = getContainerForFixedLengthVector(VecVT);
11185 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
11186 }
11187
11188 MVT XLenVT = Subtarget.getXLenVT();
11189 auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
11190
11191 // On some uarchs vrgather.vv will read from every input register for each
11192 // output register, regardless of the indices. However to reverse a vector
11193 // each output register only needs to read from one register. So decompose it
11194 // into LMUL * M1 vrgather.vvs, so we get O(LMUL) performance instead of
11195 // O(LMUL^2).
11196 //
11197 // vsetvli a1, zero, e64, m4, ta, ma
11198 // vrgatherei16.vv v12, v8, v16
11199 // ->
11200 // vsetvli a1, zero, e64, m1, ta, ma
11201 // vrgather.vv v15, v8, v16
11202 // vrgather.vv v14, v9, v16
11203 // vrgather.vv v13, v10, v16
11204 // vrgather.vv v12, v11, v16
11205 if (ContainerVT.bitsGT(getLMUL1VT(ContainerVT)) &&
11206 ContainerVT.getVectorElementCount().isKnownMultipleOf(2)) {
11207 auto [Lo, Hi] = DAG.SplitVector(Vec, DL);
11208 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, Lo.getSimpleValueType(), Lo);
11209 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, Hi.getSimpleValueType(), Hi);
11210 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ContainerVT, Hi, Lo);
11211
11212 // Fixed length vectors might not fit exactly into their container, and so
11213 // leave a gap in the front of the vector after being reversed. Slide this
11214 // away.
11215 //
11216 // x x x x 3 2 1 0 <- v4i16 @ vlen=128
11217 // 0 1 2 3 x x x x <- reverse
11218 // x x x x 0 1 2 3 <- vslidedown.vx
11219 if (VecVT.isFixedLengthVector()) {
11220 SDValue Offset = DAG.getNode(
11221 ISD::SUB, DL, XLenVT,
11222 DAG.getElementCount(DL, XLenVT, ContainerVT.getVectorElementCount()),
11223 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()));
11224 Concat =
11225 getVSlidedown(DAG, Subtarget, DL, ContainerVT,
11226 DAG.getUNDEF(ContainerVT), Concat, Offset, Mask, VL);
11227 Concat = convertFromScalableVector(VecVT, Concat, DAG, Subtarget);
11228 }
11229 return Concat;
11230 }
11231
11232 unsigned EltSize = ContainerVT.getScalarSizeInBits();
11233 unsigned MinSize = ContainerVT.getSizeInBits().getKnownMinValue();
11234 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
11235 unsigned MaxVLMAX =
11236 VecVT.isFixedLengthVector()
11237 ? VecVT.getVectorNumElements()
11238 : RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
11239
11240 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
11241 MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
11242
11243 // If this is SEW=8 and VLMAX is potentially more than 256, we need
11244 // to use vrgatherei16.vv.
11245 if (MaxVLMAX > 256 && EltSize == 8) {
11246 // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
11247 // Reverse each half, then reassemble them in reverse order.
11248 // NOTE: It's also possible that after splitting that VLMAX no longer
11249 // requires vrgatherei16.vv.
11250 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
11251 auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
11252 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
11253 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
11254 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
11255 // Reassemble the low and high pieces reversed.
11256 // FIXME: This is a CONCAT_VECTORS.
11257 SDValue Res =
11258 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
11259 DAG.getVectorIdxConstant(0, DL));
11260 return DAG.getNode(
11261 ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
11262 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
11263 }
11264
11265 // Just promote the int type to i16 which will double the LMUL.
11266 IntVT = MVT::getVectorVT(MVT::i16, ContainerVT.getVectorElementCount());
11267 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11268 }
11269
11270 // At LMUL > 1, do the index computation in 16 bits to reduce register
11271 // pressure.
11272 if (IntVT.getScalarType().bitsGT(MVT::i16) &&
11273 IntVT.bitsGT(getLMUL1VT(IntVT))) {
11274 assert(isUInt<16>(MaxVLMAX - 1)); // Largest VLMAX is 65536 @ zvl65536b
11275 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
11276 IntVT = IntVT.changeVectorElementType(MVT::i16);
11277 }
11278
11279 // Calculate VLMAX-1 for the desired SEW.
11280 SDValue VLMinus1 = DAG.getNode(
11281 ISD::SUB, DL, XLenVT,
11282 DAG.getElementCount(DL, XLenVT, VecVT.getVectorElementCount()),
11283 DAG.getConstant(1, DL, XLenVT));
11284
11285 // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
11286 bool IsRV32E64 =
11287 !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
11288 SDValue SplatVL;
11289 if (!IsRV32E64)
11290 SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
11291 else
11292 SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
11293 VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
11294
11295 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
11296 SDValue Indices = DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID,
11297 DAG.getUNDEF(IntVT), Mask, VL);
11298
11299 SDValue Gather = DAG.getNode(GatherOpc, DL, ContainerVT, Vec, Indices,
11300 DAG.getUNDEF(ContainerVT), Mask, VL);
11301 if (VecVT.isFixedLengthVector())
11302 Gather = convertFromScalableVector(VecVT, Gather, DAG, Subtarget);
11303 return Gather;
11304}
11305
11306SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
11307 SelectionDAG &DAG) const {
11308 SDLoc DL(Op);
11309 SDValue V1 = Op.getOperand(0);
11310 SDValue V2 = Op.getOperand(1);
11311 MVT XLenVT = Subtarget.getXLenVT();
11312 MVT VecVT = Op.getSimpleValueType();
11313
11314 SDValue VLMax = computeVLMax(VecVT, DL, DAG);
11315
11316 int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
11317 SDValue DownOffset, UpOffset;
11318 if (ImmValue >= 0) {
11319 // The operand is a TargetConstant, we need to rebuild it as a regular
11320 // constant.
11321 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
11322 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
11323 } else {
11324 // The operand is a TargetConstant, we need to rebuild it as a regular
11325 // constant rather than negating the original operand.
11326 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
11327 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
11328 }
11329
11330 SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
11331
11332 SDValue SlideDown =
11333 getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1,
11334 DownOffset, TrueMask, UpOffset);
11335 return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset,
11336 TrueMask, DAG.getRegister(RISCV::X0, XLenVT),
11338}
11339
11340SDValue
11341RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
11342 SelectionDAG &DAG) const {
11343 SDLoc DL(Op);
11344 auto *Load = cast<LoadSDNode>(Op);
11345
11347 Load->getMemoryVT(),
11348 *Load->getMemOperand()) &&
11349 "Expecting a correctly-aligned load");
11350
11351 MVT VT = Op.getSimpleValueType();
11352 MVT XLenVT = Subtarget.getXLenVT();
11353 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11354
11355 // If we know the exact VLEN and our fixed length vector completely fills
11356 // the container, use a whole register load instead.
11357 const auto [MinVLMAX, MaxVLMAX] =
11358 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11359 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11360 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11361 MachineMemOperand *MMO = Load->getMemOperand();
11362 SDValue NewLoad =
11363 DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
11364 MMO->getPointerInfo(), MMO->getBaseAlign(), MMO->getFlags(),
11365 MMO->getAAInfo(), MMO->getRanges());
11366 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11367 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11368 }
11369
11370 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11371
11372 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11373 SDValue IntID = DAG.getTargetConstant(
11374 IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
11375 SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
11376 if (!IsMaskOp)
11377 Ops.push_back(DAG.getUNDEF(ContainerVT));
11378 Ops.push_back(Load->getBasePtr());
11379 Ops.push_back(VL);
11380 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11381 SDValue NewLoad =
11383 Load->getMemoryVT(), Load->getMemOperand());
11384
11385 SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
11386 return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
11387}
11388
11389SDValue
11390RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
11391 SelectionDAG &DAG) const {
11392 SDLoc DL(Op);
11393 auto *Store = cast<StoreSDNode>(Op);
11394
11396 Store->getMemoryVT(),
11397 *Store->getMemOperand()) &&
11398 "Expecting a correctly-aligned store");
11399
11400 SDValue StoreVal = Store->getValue();
11401 MVT VT = StoreVal.getSimpleValueType();
11402 MVT XLenVT = Subtarget.getXLenVT();
11403
11404 // If the size less than a byte, we need to pad with zeros to make a byte.
11405 if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
11406 VT = MVT::v8i1;
11407 StoreVal =
11408 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getConstant(0, DL, VT),
11409 StoreVal, DAG.getVectorIdxConstant(0, DL));
11410 }
11411
11412 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11413
11414 SDValue NewValue =
11415 convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
11416
11417 // If we know the exact VLEN and our fixed length vector completely fills
11418 // the container, use a whole register store instead.
11419 const auto [MinVLMAX, MaxVLMAX] =
11420 RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
11421 if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
11422 getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
11423 MachineMemOperand *MMO = Store->getMemOperand();
11424 return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
11425 MMO->getPointerInfo(), MMO->getBaseAlign(),
11426 MMO->getFlags(), MMO->getAAInfo());
11427 }
11428
11429 SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
11430
11431 bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
11432 SDValue IntID = DAG.getTargetConstant(
11433 IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
11434 return DAG.getMemIntrinsicNode(
11435 ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
11436 {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
11437 Store->getMemoryVT(), Store->getMemOperand());
11438}
11439
11440SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
11441 SelectionDAG &DAG) const {
11442 SDLoc DL(Op);
11443 MVT VT = Op.getSimpleValueType();
11444
11445 const auto *MemSD = cast<MemSDNode>(Op);
11446 EVT MemVT = MemSD->getMemoryVT();
11447 MachineMemOperand *MMO = MemSD->getMemOperand();
11448 SDValue Chain = MemSD->getChain();
11449 SDValue BasePtr = MemSD->getBasePtr();
11450
11451 SDValue Mask, PassThru, VL;
11452 bool IsExpandingLoad = false;
11453 if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
11454 Mask = VPLoad->getMask();
11455 PassThru = DAG.getUNDEF(VT);
11456 VL = VPLoad->getVectorLength();
11457 } else {
11458 const auto *MLoad = cast<MaskedLoadSDNode>(Op);
11459 Mask = MLoad->getMask();
11460 PassThru = MLoad->getPassThru();
11461 IsExpandingLoad = MLoad->isExpandingLoad();
11462 }
11463
11464 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
11465
11466 MVT XLenVT = Subtarget.getXLenVT();
11467
11468 MVT ContainerVT = VT;
11469 if (VT.isFixedLengthVector()) {
11470 ContainerVT = getContainerForFixedLengthVector(VT);
11471 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
11472 if (!IsUnmasked) {
11473 MVT MaskVT = getMaskTypeFor(ContainerVT);
11474 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11475 }
11476 }
11477
11478 if (!VL)
11479 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11480
11481 SDValue ExpandingVL;
11482 if (!IsUnmasked && IsExpandingLoad) {
11483 ExpandingVL = VL;
11484 VL =
11485 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11486 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11487 }
11488
11489 unsigned IntID = IsUnmasked || IsExpandingLoad ? Intrinsic::riscv_vle
11490 : Intrinsic::riscv_vle_mask;
11491 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11492 if (IntID == Intrinsic::riscv_vle)
11493 Ops.push_back(DAG.getUNDEF(ContainerVT));
11494 else
11495 Ops.push_back(PassThru);
11496 Ops.push_back(BasePtr);
11497 if (IntID == Intrinsic::riscv_vle_mask)
11498 Ops.push_back(Mask);
11499 Ops.push_back(VL);
11500 if (IntID == Intrinsic::riscv_vle_mask)
11501 Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
11502
11503 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
11504
11505 SDValue Result =
11506 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
11507 Chain = Result.getValue(1);
11508 if (ExpandingVL) {
11509 MVT IndexVT = ContainerVT;
11510 if (ContainerVT.isFloatingPoint())
11511 IndexVT = ContainerVT.changeVectorElementTypeToInteger();
11512
11513 MVT IndexEltVT = IndexVT.getVectorElementType();
11514 bool UseVRGATHEREI16 = false;
11515 // If index vector is an i8 vector and the element count exceeds 256, we
11516 // should change the element type of index vector to i16 to avoid
11517 // overflow.
11518 if (IndexEltVT == MVT::i8 && VT.getVectorNumElements() > 256) {
11519 // FIXME: We need to do vector splitting manually for LMUL=8 cases.
11520 assert(getLMUL(IndexVT) != RISCVII::LMUL_8);
11521 IndexVT = IndexVT.changeVectorElementType(MVT::i16);
11522 UseVRGATHEREI16 = true;
11523 }
11524
11525 SDValue Iota =
11526 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
11527 DAG.getConstant(Intrinsic::riscv_viota, DL, XLenVT),
11528 DAG.getUNDEF(IndexVT), Mask, ExpandingVL);
11529 Result =
11530 DAG.getNode(UseVRGATHEREI16 ? RISCVISD::VRGATHEREI16_VV_VL
11532 DL, ContainerVT, Result, Iota, PassThru, Mask, ExpandingVL);
11533 }
11534
11535 if (VT.isFixedLengthVector())
11536 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
11537
11538 return DAG.getMergeValues({Result, Chain}, DL);
11539}
11540
11541SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
11542 SelectionDAG &DAG) const {
11543 SDLoc DL(Op);
11544
11545 const auto *MemSD = cast<MemSDNode>(Op);
11546 EVT MemVT = MemSD->getMemoryVT();
11547 MachineMemOperand *MMO = MemSD->getMemOperand();
11548 SDValue Chain = MemSD->getChain();
11549 SDValue BasePtr = MemSD->getBasePtr();
11550 SDValue Val, Mask, VL;
11551
11552 bool IsCompressingStore = false;
11553 if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
11554 Val = VPStore->getValue();
11555 Mask = VPStore->getMask();
11556 VL = VPStore->getVectorLength();
11557 } else {
11558 const auto *MStore = cast<MaskedStoreSDNode>(Op);
11559 Val = MStore->getValue();
11560 Mask = MStore->getMask();
11561 IsCompressingStore = MStore->isCompressingStore();
11562 }
11563
11564 bool IsUnmasked =
11565 ISD::isConstantSplatVectorAllOnes(Mask.getNode()) || IsCompressingStore;
11566
11567 MVT VT = Val.getSimpleValueType();
11568 MVT XLenVT = Subtarget.getXLenVT();
11569
11570 MVT ContainerVT = VT;
11571 if (VT.isFixedLengthVector()) {
11572 ContainerVT = getContainerForFixedLengthVector(VT);
11573
11574 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11575 if (!IsUnmasked || IsCompressingStore) {
11576 MVT MaskVT = getMaskTypeFor(ContainerVT);
11577 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11578 }
11579 }
11580
11581 if (!VL)
11582 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11583
11584 if (IsCompressingStore) {
11585 Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11586 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11587 DAG.getUNDEF(ContainerVT), Val, Mask, VL);
11588 VL =
11589 DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Mask,
11590 getAllOnesMask(Mask.getSimpleValueType(), VL, DL, DAG), VL);
11591 }
11592
11593 unsigned IntID =
11594 IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
11595 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
11596 Ops.push_back(Val);
11597 Ops.push_back(BasePtr);
11598 if (!IsUnmasked)
11599 Ops.push_back(Mask);
11600 Ops.push_back(VL);
11601
11603 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
11604}
11605
11606SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11607 SelectionDAG &DAG) const {
11608 SDLoc DL(Op);
11609 SDValue Val = Op.getOperand(0);
11610 SDValue Mask = Op.getOperand(1);
11611 SDValue Passthru = Op.getOperand(2);
11612
11613 MVT VT = Val.getSimpleValueType();
11614 MVT XLenVT = Subtarget.getXLenVT();
11615 MVT ContainerVT = VT;
11616 if (VT.isFixedLengthVector()) {
11617 ContainerVT = getContainerForFixedLengthVector(VT);
11618 MVT MaskVT = getMaskTypeFor(ContainerVT);
11619 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11620 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11621 Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11622 }
11623
11624 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11625 SDValue Res =
11626 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11627 DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11628 Passthru, Val, Mask, VL);
11629
11630 if (VT.isFixedLengthVector())
11631 Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11632
11633 return Res;
11634}
11635
11636SDValue
11637RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
11638 SelectionDAG &DAG) const {
11639 MVT InVT = Op.getOperand(0).getSimpleValueType();
11640 MVT ContainerVT = getContainerForFixedLengthVector(InVT);
11641
11642 MVT VT = Op.getSimpleValueType();
11643
11644 SDValue Op1 =
11645 convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
11646 SDValue Op2 =
11647 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11648
11649 SDLoc DL(Op);
11650 auto [Mask, VL] = getDefaultVLOps(VT.getVectorNumElements(), ContainerVT, DL,
11651 DAG, Subtarget);
11652 MVT MaskVT = getMaskTypeFor(ContainerVT);
11653
11654 SDValue Cmp =
11655 DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT,
11656 {Op1, Op2, Op.getOperand(2), DAG.getUNDEF(MaskVT), Mask, VL});
11657
11658 return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
11659}
11660
11661SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
11662 SelectionDAG &DAG) const {
11663 unsigned Opc = Op.getOpcode();
11664 SDLoc DL(Op);
11665 SDValue Chain = Op.getOperand(0);
11666 SDValue Op1 = Op.getOperand(1);
11667 SDValue Op2 = Op.getOperand(2);
11668 SDValue CC = Op.getOperand(3);
11669 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
11670 MVT VT = Op.getSimpleValueType();
11671 MVT InVT = Op1.getSimpleValueType();
11672
11673 // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
11674 // condition code.
11675 if (Opc == ISD::STRICT_FSETCCS) {
11676 // Expand strict_fsetccs(x, oeq) to
11677 // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
11678 SDVTList VTList = Op->getVTList();
11679 if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
11680 SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
11681 SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11682 Op2, OLECCVal);
11683 SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
11684 Op1, OLECCVal);
11685 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11686 Tmp1.getValue(1), Tmp2.getValue(1));
11687 // Tmp1 and Tmp2 might be the same node.
11688 if (Tmp1 != Tmp2)
11689 Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
11690 return DAG.getMergeValues({Tmp1, OutChain}, DL);
11691 }
11692
11693 // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
11694 if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
11695 SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
11696 SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
11697 Op2, OEQCCVal);
11698 SDValue Res = DAG.getNOT(DL, OEQ, VT);
11699 return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
11700 }
11701 }
11702
11703 MVT ContainerInVT = InVT;
11704 if (InVT.isFixedLengthVector()) {
11705 ContainerInVT = getContainerForFixedLengthVector(InVT);
11706 Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
11707 Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
11708 }
11709 MVT MaskVT = getMaskTypeFor(ContainerInVT);
11710
11711 auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
11712
11713 SDValue Res;
11714 if (Opc == ISD::STRICT_FSETCC &&
11715 (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
11716 CCVal == ISD::SETOLE)) {
11717 // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
11718 // active when both input elements are ordered.
11719 SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
11720 SDValue OrderMask1 = DAG.getNode(
11721 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11722 {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11723 True, VL});
11724 SDValue OrderMask2 = DAG.getNode(
11725 RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
11726 {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
11727 True, VL});
11728 Mask =
11729 DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
11730 // Use Mask as the passthru operand to let the result be 0 if either of the
11731 // inputs is unordered.
11733 DAG.getVTList(MaskVT, MVT::Other),
11734 {Chain, Op1, Op2, CC, Mask, Mask, VL});
11735 } else {
11736 unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
11738 Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
11739 {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
11740 }
11741
11742 if (VT.isFixedLengthVector()) {
11743 SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
11744 return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
11745 }
11746 return Res;
11747}
11748
11749// Lower vector ABS to smax(X, sub(0, X)).
11750SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
11751 SDLoc DL(Op);
11752 MVT VT = Op.getSimpleValueType();
11753 SDValue X = Op.getOperand(0);
11754
11755 assert((Op.getOpcode() == ISD::VP_ABS || VT.isFixedLengthVector()) &&
11756 "Unexpected type for ISD::ABS");
11757
11758 MVT ContainerVT = VT;
11759 if (VT.isFixedLengthVector()) {
11760 ContainerVT = getContainerForFixedLengthVector(VT);
11761 X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
11762 }
11763
11764 SDValue Mask, VL;
11765 if (Op->getOpcode() == ISD::VP_ABS) {
11766 Mask = Op->getOperand(1);
11767 if (VT.isFixedLengthVector())
11768 Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
11769 Subtarget);
11770 VL = Op->getOperand(2);
11771 } else
11772 std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11773
11774 SDValue SplatZero = DAG.getNode(
11775 RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
11776 DAG.getConstant(0, DL, Subtarget.getXLenVT()), VL);
11777 SDValue NegX = DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X,
11778 DAG.getUNDEF(ContainerVT), Mask, VL);
11779 SDValue Max = DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX,
11780 DAG.getUNDEF(ContainerVT), Mask, VL);
11781
11782 if (VT.isFixedLengthVector())
11783 Max = convertFromScalableVector(VT, Max, DAG, Subtarget);
11784 return Max;
11785}
11786
11787SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
11788 SDValue Op, SelectionDAG &DAG) const {
11789 SDLoc DL(Op);
11790 MVT VT = Op.getSimpleValueType();
11791 SDValue Mag = Op.getOperand(0);
11792 SDValue Sign = Op.getOperand(1);
11793 assert(Mag.getValueType() == Sign.getValueType() &&
11794 "Can only handle COPYSIGN with matching types.");
11795
11796 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11797 Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
11798 Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
11799
11800 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11801
11802 SDValue CopySign = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag,
11803 Sign, DAG.getUNDEF(ContainerVT), Mask, VL);
11804
11805 return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
11806}
11807
11808SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
11809 SDValue Op, SelectionDAG &DAG) const {
11810 MVT VT = Op.getSimpleValueType();
11811 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11812
11813 MVT I1ContainerVT =
11814 MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11815
11816 SDValue CC =
11817 convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
11818 SDValue Op1 =
11819 convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
11820 SDValue Op2 =
11821 convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
11822
11823 SDLoc DL(Op);
11824 SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11825
11826 SDValue Select = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, CC, Op1,
11827 Op2, DAG.getUNDEF(ContainerVT), VL);
11828
11829 return convertFromScalableVector(VT, Select, DAG, Subtarget);
11830}
11831
11832SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
11833 SelectionDAG &DAG) const {
11834 unsigned NewOpc = getRISCVVLOp(Op);
11835 bool HasPassthruOp = hasPassthruOp(NewOpc);
11836 bool HasMask = hasMaskOp(NewOpc);
11837
11838 MVT VT = Op.getSimpleValueType();
11839 MVT ContainerVT = getContainerForFixedLengthVector(VT);
11840
11841 // Create list of operands by converting existing ones to scalable types.
11843 for (const SDValue &V : Op->op_values()) {
11844 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11845
11846 // Pass through non-vector operands.
11847 if (!V.getValueType().isVector()) {
11848 Ops.push_back(V);
11849 continue;
11850 }
11851
11852 // "cast" fixed length vector to a scalable vector.
11853 assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
11854 "Only fixed length vectors are supported!");
11855 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11856 }
11857
11858 SDLoc DL(Op);
11859 auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
11860 if (HasPassthruOp)
11861 Ops.push_back(DAG.getUNDEF(ContainerVT));
11862 if (HasMask)
11863 Ops.push_back(Mask);
11864 Ops.push_back(VL);
11865
11866 // StrictFP operations have two result values. Their lowered result should
11867 // have same result count.
11868 if (Op->isStrictFPOpcode()) {
11869 SDValue ScalableRes =
11870 DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
11871 Op->getFlags());
11872 SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11873 return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
11874 }
11875
11876 SDValue ScalableRes =
11877 DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
11878 return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
11879}
11880
11881// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
11882// * Operands of each node are assumed to be in the same order.
11883// * The EVL operand is promoted from i32 to i64 on RV64.
11884// * Fixed-length vectors are converted to their scalable-vector container
11885// types.
11886SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG) const {
11887 unsigned RISCVISDOpc = getRISCVVLOp(Op);
11888 bool HasPassthruOp = hasPassthruOp(RISCVISDOpc);
11889
11890 SDLoc DL(Op);
11891 MVT VT = Op.getSimpleValueType();
11893
11894 MVT ContainerVT = VT;
11895 if (VT.isFixedLengthVector())
11896 ContainerVT = getContainerForFixedLengthVector(VT);
11897
11898 for (const auto &OpIdx : enumerate(Op->ops())) {
11899 SDValue V = OpIdx.value();
11900 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
11901 // Add dummy passthru value before the mask. Or if there isn't a mask,
11902 // before EVL.
11903 if (HasPassthruOp) {
11904 auto MaskIdx = ISD::getVPMaskIdx(Op.getOpcode());
11905 if (MaskIdx) {
11906 if (*MaskIdx == OpIdx.index())
11907 Ops.push_back(DAG.getUNDEF(ContainerVT));
11908 } else if (ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) ==
11909 OpIdx.index()) {
11910 if (Op.getOpcode() == ISD::VP_MERGE) {
11911 // For VP_MERGE, copy the false operand instead of an undef value.
11912 Ops.push_back(Ops.back());
11913 } else {
11914 assert(Op.getOpcode() == ISD::VP_SELECT);
11915 // For VP_SELECT, add an undef value.
11916 Ops.push_back(DAG.getUNDEF(ContainerVT));
11917 }
11918 }
11919 }
11920 // VFCVT_RM_X_F_VL requires a rounding mode to be injected before the VL.
11921 if (RISCVISDOpc == RISCVISD::VFCVT_RM_X_F_VL &&
11922 ISD::getVPExplicitVectorLengthIdx(Op.getOpcode()) == OpIdx.index())
11924 Subtarget.getXLenVT()));
11925 // Pass through operands which aren't fixed-length vectors.
11926 if (!V.getValueType().isFixedLengthVector()) {
11927 Ops.push_back(V);
11928 continue;
11929 }
11930 // "cast" fixed length vector to a scalable vector.
11931 MVT OpVT = V.getSimpleValueType();
11932 MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
11933 assert(useRVVForFixedLengthVectorVT(OpVT) &&
11934 "Only fixed length vectors are supported!");
11935 Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
11936 }
11937
11938 if (!VT.isFixedLengthVector())
11939 return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
11940
11941 SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
11942
11943 return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
11944}
11945
11946SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
11947 SelectionDAG &DAG) const {
11948 SDLoc DL(Op);
11949 MVT VT = Op.getSimpleValueType();
11950
11951 SDValue Src = Op.getOperand(0);
11952 // NOTE: Mask is dropped.
11953 SDValue VL = Op.getOperand(2);
11954
11955 MVT ContainerVT = VT;
11956 if (VT.isFixedLengthVector()) {
11957 ContainerVT = getContainerForFixedLengthVector(VT);
11958 MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
11959 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
11960 }
11961
11962 MVT XLenVT = Subtarget.getXLenVT();
11963 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
11964 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11965 DAG.getUNDEF(ContainerVT), Zero, VL);
11966
11967 SDValue SplatValue = DAG.getSignedConstant(
11968 Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
11969 SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
11970 DAG.getUNDEF(ContainerVT), SplatValue, VL);
11971
11972 SDValue Result = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Src, Splat,
11973 ZeroSplat, DAG.getUNDEF(ContainerVT), VL);
11974 if (!VT.isFixedLengthVector())
11975 return Result;
11976 return convertFromScalableVector(VT, Result, DAG, Subtarget);
11977}
11978
11979SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
11980 SelectionDAG &DAG) const {
11981 SDLoc DL(Op);
11982 MVT VT = Op.getSimpleValueType();
11983
11984 SDValue Op1 = Op.getOperand(0);
11985 SDValue Op2 = Op.getOperand(1);
11986 ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11987 // NOTE: Mask is dropped.
11988 SDValue VL = Op.getOperand(4);
11989
11990 MVT ContainerVT = VT;
11991 if (VT.isFixedLengthVector()) {
11992 ContainerVT = getContainerForFixedLengthVector(VT);
11993 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
11994 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
11995 }
11996
11998 SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
11999
12000 switch (Condition) {
12001 default:
12002 break;
12003 // X != Y --> (X^Y)
12004 case ISD::SETNE:
12005 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12006 break;
12007 // X == Y --> ~(X^Y)
12008 case ISD::SETEQ: {
12009 SDValue Temp =
12010 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
12011 Result =
12012 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
12013 break;
12014 }
12015 // X >s Y --> X == 0 & Y == 1 --> ~X & Y
12016 // X <u Y --> X == 0 & Y == 1 --> ~X & Y
12017 case ISD::SETGT:
12018 case ISD::SETULT: {
12019 SDValue Temp =
12020 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12021 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
12022 break;
12023 }
12024 // X <s Y --> X == 1 & Y == 0 --> ~Y & X
12025 // X >u Y --> X == 1 & Y == 0 --> ~Y & X
12026 case ISD::SETLT:
12027 case ISD::SETUGT: {
12028 SDValue Temp =
12029 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12030 Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
12031 break;
12032 }
12033 // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
12034 // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
12035 case ISD::SETGE:
12036 case ISD::SETULE: {
12037 SDValue Temp =
12038 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
12039 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
12040 break;
12041 }
12042 // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
12043 // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
12044 case ISD::SETLE:
12045 case ISD::SETUGE: {
12046 SDValue Temp =
12047 DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
12048 Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
12049 break;
12050 }
12051 }
12052
12053 if (!VT.isFixedLengthVector())
12054 return Result;
12055 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12056}
12057
12058// Lower Floating-Point/Integer Type-Convert VP SDNodes
12059SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op,
12060 SelectionDAG &DAG) const {
12061 SDLoc DL(Op);
12062
12063 SDValue Src = Op.getOperand(0);
12064 SDValue Mask = Op.getOperand(1);
12065 SDValue VL = Op.getOperand(2);
12066 unsigned RISCVISDOpc = getRISCVVLOp(Op);
12067
12068 MVT DstVT = Op.getSimpleValueType();
12069 MVT SrcVT = Src.getSimpleValueType();
12070 if (DstVT.isFixedLengthVector()) {
12071 DstVT = getContainerForFixedLengthVector(DstVT);
12072 SrcVT = getContainerForFixedLengthVector(SrcVT);
12073 Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
12074 MVT MaskVT = getMaskTypeFor(DstVT);
12075 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12076 }
12077
12078 unsigned DstEltSize = DstVT.getScalarSizeInBits();
12079 unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
12080
12082 if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
12083 if (SrcVT.isInteger()) {
12084 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12085
12086 unsigned RISCVISDExtOpc = RISCVISDOpc == RISCVISD::SINT_TO_FP_VL
12089
12090 // Do we need to do any pre-widening before converting?
12091 if (SrcEltSize == 1) {
12092 MVT IntVT = DstVT.changeVectorElementTypeToInteger();
12093 MVT XLenVT = Subtarget.getXLenVT();
12094 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
12095 SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12096 DAG.getUNDEF(IntVT), Zero, VL);
12097 SDValue One = DAG.getSignedConstant(
12098 RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
12099 SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
12100 DAG.getUNDEF(IntVT), One, VL);
12101 Src = DAG.getNode(RISCVISD::VMERGE_VL, DL, IntVT, Src, OneSplat,
12102 ZeroSplat, DAG.getUNDEF(IntVT), VL);
12103 } else if (DstEltSize > (2 * SrcEltSize)) {
12104 // Widen before converting.
12105 MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
12106 DstVT.getVectorElementCount());
12107 Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
12108 }
12109
12110 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12111 } else {
12112 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12113 "Wrong input/output vector types");
12114
12115 // Convert f16 to f32 then convert f32 to i64.
12116 if (DstEltSize > (2 * SrcEltSize)) {
12117 assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12118 MVT InterimFVT =
12119 MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12120 Src =
12121 DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
12122 }
12123
12124 Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
12125 }
12126 } else { // Narrowing + Conversion
12127 if (SrcVT.isInteger()) {
12128 assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
12129 // First do a narrowing convert to an FP type half the size, then round
12130 // the FP type to a small FP type if needed.
12131
12132 MVT InterimFVT = DstVT;
12133 if (SrcEltSize > (2 * DstEltSize)) {
12134 assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
12135 assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
12136 InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
12137 }
12138
12139 Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
12140
12141 if (InterimFVT != DstVT) {
12142 Src = Result;
12143 Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
12144 }
12145 } else {
12146 assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
12147 "Wrong input/output vector types");
12148 // First do a narrowing conversion to an integer half the size, then
12149 // truncate if needed.
12150
12151 if (DstEltSize == 1) {
12152 // First convert to the same size integer, then convert to mask using
12153 // setcc.
12154 assert(SrcEltSize >= 16 && "Unexpected FP type!");
12155 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
12156 DstVT.getVectorElementCount());
12157 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12158
12159 // Compare the integer result to 0. The integer should be 0 or 1/-1,
12160 // otherwise the conversion was undefined.
12161 MVT XLenVT = Subtarget.getXLenVT();
12162 SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
12163 SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
12164 DAG.getUNDEF(InterimIVT), SplatZero, VL);
12165 Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT,
12166 {Result, SplatZero, DAG.getCondCode(ISD::SETNE),
12167 DAG.getUNDEF(DstVT), Mask, VL});
12168 } else {
12169 MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12170 DstVT.getVectorElementCount());
12171
12172 Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
12173
12174 while (InterimIVT != DstVT) {
12175 SrcEltSize /= 2;
12176 Src = Result;
12177 InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
12178 DstVT.getVectorElementCount());
12179 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
12180 Src, Mask, VL);
12181 }
12182 }
12183 }
12184 }
12185
12186 MVT VT = Op.getSimpleValueType();
12187 if (!VT.isFixedLengthVector())
12188 return Result;
12189 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12190}
12191
12192SDValue RISCVTargetLowering::lowerVPMergeMask(SDValue Op,
12193 SelectionDAG &DAG) const {
12194 SDLoc DL(Op);
12195 MVT VT = Op.getSimpleValueType();
12196 MVT XLenVT = Subtarget.getXLenVT();
12197
12198 SDValue Mask = Op.getOperand(0);
12199 SDValue TrueVal = Op.getOperand(1);
12200 SDValue FalseVal = Op.getOperand(2);
12201 SDValue VL = Op.getOperand(3);
12202
12203 // Use default legalization if a vector of EVL type would be legal.
12204 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), VL.getValueType(),
12206 if (isTypeLegal(EVLVecVT))
12207 return SDValue();
12208
12209 MVT ContainerVT = VT;
12210 if (VT.isFixedLengthVector()) {
12211 ContainerVT = getContainerForFixedLengthVector(VT);
12212 Mask = convertToScalableVector(ContainerVT, Mask, DAG, Subtarget);
12213 TrueVal = convertToScalableVector(ContainerVT, TrueVal, DAG, Subtarget);
12214 FalseVal = convertToScalableVector(ContainerVT, FalseVal, DAG, Subtarget);
12215 }
12216
12217 // Promote to a vector of i8.
12218 MVT PromotedVT = ContainerVT.changeVectorElementType(MVT::i8);
12219
12220 // Promote TrueVal and FalseVal using VLMax.
12221 // FIXME: Is there a better way to do this?
12222 SDValue VLMax = DAG.getRegister(RISCV::X0, XLenVT);
12223 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12224 DAG.getUNDEF(PromotedVT),
12225 DAG.getConstant(1, DL, XLenVT), VLMax);
12226 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, PromotedVT,
12227 DAG.getUNDEF(PromotedVT),
12228 DAG.getConstant(0, DL, XLenVT), VLMax);
12229 TrueVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, TrueVal, SplatOne,
12230 SplatZero, DAG.getUNDEF(PromotedVT), VL);
12231 // Any element past VL uses FalseVal, so use VLMax
12232 FalseVal = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, FalseVal,
12233 SplatOne, SplatZero, DAG.getUNDEF(PromotedVT), VLMax);
12234
12235 // VP_MERGE the two promoted values.
12236 SDValue VPMerge = DAG.getNode(RISCVISD::VMERGE_VL, DL, PromotedVT, Mask,
12237 TrueVal, FalseVal, FalseVal, VL);
12238
12239 // Convert back to mask.
12240 SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
12241 SDValue Result = DAG.getNode(
12242 RISCVISD::SETCC_VL, DL, ContainerVT,
12243 {VPMerge, DAG.getConstant(0, DL, PromotedVT), DAG.getCondCode(ISD::SETNE),
12244 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), TrueMask, VLMax});
12245
12246 if (VT.isFixedLengthVector())
12247 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12248 return Result;
12249}
12250
12251SDValue
12252RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op,
12253 SelectionDAG &DAG) const {
12254 SDLoc DL(Op);
12255
12256 SDValue Op1 = Op.getOperand(0);
12257 SDValue Op2 = Op.getOperand(1);
12258 SDValue Offset = Op.getOperand(2);
12259 SDValue Mask = Op.getOperand(3);
12260 SDValue EVL1 = Op.getOperand(4);
12261 SDValue EVL2 = Op.getOperand(5);
12262
12263 const MVT XLenVT = Subtarget.getXLenVT();
12264 MVT VT = Op.getSimpleValueType();
12265 MVT ContainerVT = VT;
12266 if (VT.isFixedLengthVector()) {
12267 ContainerVT = getContainerForFixedLengthVector(VT);
12268 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12269 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12270 MVT MaskVT = getMaskTypeFor(ContainerVT);
12271 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12272 }
12273
12274 bool IsMaskVector = VT.getVectorElementType() == MVT::i1;
12275 if (IsMaskVector) {
12276 ContainerVT = ContainerVT.changeVectorElementType(MVT::i8);
12277
12278 // Expand input operands
12279 SDValue SplatOneOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12280 DAG.getUNDEF(ContainerVT),
12281 DAG.getConstant(1, DL, XLenVT), EVL1);
12282 SDValue SplatZeroOp1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12283 DAG.getUNDEF(ContainerVT),
12284 DAG.getConstant(0, DL, XLenVT), EVL1);
12285 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op1, SplatOneOp1,
12286 SplatZeroOp1, DAG.getUNDEF(ContainerVT), EVL1);
12287
12288 SDValue SplatOneOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12289 DAG.getUNDEF(ContainerVT),
12290 DAG.getConstant(1, DL, XLenVT), EVL2);
12291 SDValue SplatZeroOp2 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
12292 DAG.getUNDEF(ContainerVT),
12293 DAG.getConstant(0, DL, XLenVT), EVL2);
12294 Op2 = DAG.getNode(RISCVISD::VMERGE_VL, DL, ContainerVT, Op2, SplatOneOp2,
12295 SplatZeroOp2, DAG.getUNDEF(ContainerVT), EVL2);
12296 }
12297
12298 int64_t ImmValue = cast<ConstantSDNode>(Offset)->getSExtValue();
12299 SDValue DownOffset, UpOffset;
12300 if (ImmValue >= 0) {
12301 // The operand is a TargetConstant, we need to rebuild it as a regular
12302 // constant.
12303 DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
12304 UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, DownOffset);
12305 } else {
12306 // The operand is a TargetConstant, we need to rebuild it as a regular
12307 // constant rather than negating the original operand.
12308 UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
12309 DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, EVL1, UpOffset);
12310 }
12311
12312 SDValue SlideDown =
12313 getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
12314 Op1, DownOffset, Mask, UpOffset);
12315 SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, SlideDown, Op2,
12316 UpOffset, Mask, EVL2, RISCVII::TAIL_AGNOSTIC);
12317
12318 if (IsMaskVector) {
12319 // Truncate Result back to a mask vector (Result has same EVL as Op2)
12320 Result = DAG.getNode(
12321 RISCVISD::SETCC_VL, DL, ContainerVT.changeVectorElementType(MVT::i1),
12322 {Result, DAG.getConstant(0, DL, ContainerVT),
12323 DAG.getCondCode(ISD::SETNE), DAG.getUNDEF(getMaskTypeFor(ContainerVT)),
12324 Mask, EVL2});
12325 }
12326
12327 if (!VT.isFixedLengthVector())
12328 return Result;
12329 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12330}
12331
12332SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
12333 SelectionDAG &DAG) const {
12334 SDLoc DL(Op);
12335 SDValue Val = Op.getOperand(0);
12336 SDValue Mask = Op.getOperand(1);
12337 SDValue VL = Op.getOperand(2);
12338 MVT VT = Op.getSimpleValueType();
12339
12340 MVT ContainerVT = VT;
12341 if (VT.isFixedLengthVector()) {
12342 ContainerVT = getContainerForFixedLengthVector(VT);
12343 MVT MaskVT = getMaskTypeFor(ContainerVT);
12344 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12345 }
12346
12347 SDValue Result =
12348 lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
12349
12350 if (!VT.isFixedLengthVector())
12351 return Result;
12352 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12353}
12354
12355SDValue
12356RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op,
12357 SelectionDAG &DAG) const {
12358 SDLoc DL(Op);
12359 MVT VT = Op.getSimpleValueType();
12360 MVT XLenVT = Subtarget.getXLenVT();
12361
12362 SDValue Op1 = Op.getOperand(0);
12363 SDValue Mask = Op.getOperand(1);
12364 SDValue EVL = Op.getOperand(2);
12365
12366 MVT ContainerVT = VT;
12367 if (VT.isFixedLengthVector()) {
12368 ContainerVT = getContainerForFixedLengthVector(VT);
12369 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12370 MVT MaskVT = getMaskTypeFor(ContainerVT);
12371 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12372 }
12373
12374 MVT GatherVT = ContainerVT;
12375 MVT IndicesVT = ContainerVT.changeVectorElementTypeToInteger();
12376 // Check if we are working with mask vectors
12377 bool IsMaskVector = ContainerVT.getVectorElementType() == MVT::i1;
12378 if (IsMaskVector) {
12379 GatherVT = IndicesVT = ContainerVT.changeVectorElementType(MVT::i8);
12380
12381 // Expand input operand
12382 SDValue SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12383 DAG.getUNDEF(IndicesVT),
12384 DAG.getConstant(1, DL, XLenVT), EVL);
12385 SDValue SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12386 DAG.getUNDEF(IndicesVT),
12387 DAG.getConstant(0, DL, XLenVT), EVL);
12388 Op1 = DAG.getNode(RISCVISD::VMERGE_VL, DL, IndicesVT, Op1, SplatOne,
12389 SplatZero, DAG.getUNDEF(IndicesVT), EVL);
12390 }
12391
12392 unsigned EltSize = GatherVT.getScalarSizeInBits();
12393 unsigned MinSize = GatherVT.getSizeInBits().getKnownMinValue();
12394 unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
12395 unsigned MaxVLMAX =
12396 RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
12397
12398 unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
12399 // If this is SEW=8 and VLMAX is unknown or more than 256, we need
12400 // to use vrgatherei16.vv.
12401 // TODO: It's also possible to use vrgatherei16.vv for other types to
12402 // decrease register width for the index calculation.
12403 // NOTE: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
12404 if (MaxVLMAX > 256 && EltSize == 8) {
12405 // If this is LMUL=8, we have to split before using vrgatherei16.vv.
12406 // Split the vector in half and reverse each half using a full register
12407 // reverse.
12408 // Swap the halves and concatenate them.
12409 // Slide the concatenated result by (VLMax - VL).
12410 if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
12411 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(GatherVT);
12412 auto [Lo, Hi] = DAG.SplitVector(Op1, DL);
12413
12414 SDValue LoRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
12415 SDValue HiRev = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
12416
12417 // Reassemble the low and high pieces reversed.
12418 // NOTE: this Result is unmasked (because we do not need masks for
12419 // shuffles). If in the future this has to change, we can use a SELECT_VL
12420 // between Result and UNDEF using the mask originally passed to VP_REVERSE
12421 SDValue Result =
12422 DAG.getNode(ISD::CONCAT_VECTORS, DL, GatherVT, HiRev, LoRev);
12423
12424 // Slide off any elements from past EVL that were reversed into the low
12425 // elements.
12426 unsigned MinElts = GatherVT.getVectorMinNumElements();
12427 SDValue VLMax =
12428 DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), MinElts));
12429 SDValue Diff = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, EVL);
12430
12431 Result = getVSlidedown(DAG, Subtarget, DL, GatherVT,
12432 DAG.getUNDEF(GatherVT), Result, Diff, Mask, EVL);
12433
12434 if (IsMaskVector) {
12435 // Truncate Result back to a mask vector
12436 Result =
12437 DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
12438 {Result, DAG.getConstant(0, DL, GatherVT),
12440 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12441 }
12442
12443 if (!VT.isFixedLengthVector())
12444 return Result;
12445 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12446 }
12447
12448 // Just promote the int type to i16 which will double the LMUL.
12449 IndicesVT = MVT::getVectorVT(MVT::i16, IndicesVT.getVectorElementCount());
12450 GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
12451 }
12452
12453 SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL);
12454 SDValue VecLen =
12455 DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT));
12456 SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT,
12457 DAG.getUNDEF(IndicesVT), VecLen, EVL);
12458 SDValue VRSUB = DAG.getNode(RISCVISD::SUB_VL, DL, IndicesVT, VecLenSplat, VID,
12459 DAG.getUNDEF(IndicesVT), Mask, EVL);
12460 SDValue Result = DAG.getNode(GatherOpc, DL, GatherVT, Op1, VRSUB,
12461 DAG.getUNDEF(GatherVT), Mask, EVL);
12462
12463 if (IsMaskVector) {
12464 // Truncate Result back to a mask vector
12465 Result = DAG.getNode(
12466 RISCVISD::SETCC_VL, DL, ContainerVT,
12467 {Result, DAG.getConstant(0, DL, GatherVT), DAG.getCondCode(ISD::SETNE),
12468 DAG.getUNDEF(getMaskTypeFor(ContainerVT)), Mask, EVL});
12469 }
12470
12471 if (!VT.isFixedLengthVector())
12472 return Result;
12473 return convertFromScalableVector(VT, Result, DAG, Subtarget);
12474}
12475
12476SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op,
12477 SelectionDAG &DAG) const {
12478 MVT VT = Op.getSimpleValueType();
12479 if (VT.getVectorElementType() != MVT::i1)
12480 return lowerVPOp(Op, DAG);
12481
12482 // It is safe to drop mask parameter as masked-off elements are undef.
12483 SDValue Op1 = Op->getOperand(0);
12484 SDValue Op2 = Op->getOperand(1);
12485 SDValue VL = Op->getOperand(3);
12486
12487 MVT ContainerVT = VT;
12488 const bool IsFixed = VT.isFixedLengthVector();
12489 if (IsFixed) {
12490 ContainerVT = getContainerForFixedLengthVector(VT);
12491 Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
12492 Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
12493 }
12494
12495 SDLoc DL(Op);
12496 SDValue Val = DAG.getNode(getRISCVVLOp(Op), DL, ContainerVT, Op1, Op2, VL);
12497 if (!IsFixed)
12498 return Val;
12499 return convertFromScalableVector(VT, Val, DAG, Subtarget);
12500}
12501
12502SDValue RISCVTargetLowering::lowerVPStridedLoad(SDValue Op,
12503 SelectionDAG &DAG) const {
12504 SDLoc DL(Op);
12505 MVT XLenVT = Subtarget.getXLenVT();
12506 MVT VT = Op.getSimpleValueType();
12507 MVT ContainerVT = VT;
12508 if (VT.isFixedLengthVector())
12509 ContainerVT = getContainerForFixedLengthVector(VT);
12510
12511 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12512
12513 auto *VPNode = cast<VPStridedLoadSDNode>(Op);
12514 // Check if the mask is known to be all ones
12515 SDValue Mask = VPNode->getMask();
12516 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12517
12518 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vlse
12519 : Intrinsic::riscv_vlse_mask,
12520 DL, XLenVT);
12521 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID,
12522 DAG.getUNDEF(ContainerVT), VPNode->getBasePtr(),
12523 VPNode->getStride()};
12524 if (!IsUnmasked) {
12525 if (VT.isFixedLengthVector()) {
12526 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12527 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12528 }
12529 Ops.push_back(Mask);
12530 }
12531 Ops.push_back(VPNode->getVectorLength());
12532 if (!IsUnmasked) {
12533 SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
12534 Ops.push_back(Policy);
12535 }
12536
12537 SDValue Result =
12539 VPNode->getMemoryVT(), VPNode->getMemOperand());
12540 SDValue Chain = Result.getValue(1);
12541
12542 if (VT.isFixedLengthVector())
12543 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12544
12545 return DAG.getMergeValues({Result, Chain}, DL);
12546}
12547
12548SDValue RISCVTargetLowering::lowerVPStridedStore(SDValue Op,
12549 SelectionDAG &DAG) const {
12550 SDLoc DL(Op);
12551 MVT XLenVT = Subtarget.getXLenVT();
12552
12553 auto *VPNode = cast<VPStridedStoreSDNode>(Op);
12554 SDValue StoreVal = VPNode->getValue();
12555 MVT VT = StoreVal.getSimpleValueType();
12556 MVT ContainerVT = VT;
12557 if (VT.isFixedLengthVector()) {
12558 ContainerVT = getContainerForFixedLengthVector(VT);
12559 StoreVal = convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
12560 }
12561
12562 // Check if the mask is known to be all ones
12563 SDValue Mask = VPNode->getMask();
12564 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12565
12566 SDValue IntID = DAG.getTargetConstant(IsUnmasked ? Intrinsic::riscv_vsse
12567 : Intrinsic::riscv_vsse_mask,
12568 DL, XLenVT);
12569 SmallVector<SDValue, 8> Ops{VPNode->getChain(), IntID, StoreVal,
12570 VPNode->getBasePtr(), VPNode->getStride()};
12571 if (!IsUnmasked) {
12572 if (VT.isFixedLengthVector()) {
12573 MVT MaskVT = ContainerVT.changeVectorElementType(MVT::i1);
12574 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12575 }
12576 Ops.push_back(Mask);
12577 }
12578 Ops.push_back(VPNode->getVectorLength());
12579
12580 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, VPNode->getVTList(),
12581 Ops, VPNode->getMemoryVT(),
12582 VPNode->getMemOperand());
12583}
12584
12585// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
12586// matched to a RVV indexed load. The RVV indexed load instructions only
12587// support the "unsigned unscaled" addressing mode; indices are implicitly
12588// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12589// signed or scaled indexing is extended to the XLEN value type and scaled
12590// accordingly.
12591SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
12592 SelectionDAG &DAG) const {
12593 SDLoc DL(Op);
12594 MVT VT = Op.getSimpleValueType();
12595
12596 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12597 EVT MemVT = MemSD->getMemoryVT();
12598 MachineMemOperand *MMO = MemSD->getMemOperand();
12599 SDValue Chain = MemSD->getChain();
12600 SDValue BasePtr = MemSD->getBasePtr();
12601
12602 [[maybe_unused]] ISD::LoadExtType LoadExtType;
12603 SDValue Index, Mask, PassThru, VL;
12604
12605 if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
12606 Index = VPGN->getIndex();
12607 Mask = VPGN->getMask();
12608 PassThru = DAG.getUNDEF(VT);
12609 VL = VPGN->getVectorLength();
12610 // VP doesn't support extending loads.
12612 } else {
12613 // Else it must be a MGATHER.
12614 auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
12615 Index = MGN->getIndex();
12616 Mask = MGN->getMask();
12617 PassThru = MGN->getPassThru();
12618 LoadExtType = MGN->getExtensionType();
12619 }
12620
12621 MVT IndexVT = Index.getSimpleValueType();
12622 MVT XLenVT = Subtarget.getXLenVT();
12623
12625 "Unexpected VTs!");
12626 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12627 // Targets have to explicitly opt-in for extending vector loads.
12628 assert(LoadExtType == ISD::NON_EXTLOAD &&
12629 "Unexpected extending MGATHER/VP_GATHER");
12630
12631 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12632 // the selection of the masked intrinsics doesn't do this for us.
12633 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12634
12635 MVT ContainerVT = VT;
12636 if (VT.isFixedLengthVector()) {
12637 ContainerVT = getContainerForFixedLengthVector(VT);
12638 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12639 ContainerVT.getVectorElementCount());
12640
12641 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12642
12643 if (!IsUnmasked) {
12644 MVT MaskVT = getMaskTypeFor(ContainerVT);
12645 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12646 PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
12647 }
12648 }
12649
12650 if (!VL)
12651 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12652
12653 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12654 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12655 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12656 }
12657
12658 unsigned IntID =
12659 IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
12660 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12661 if (IsUnmasked)
12662 Ops.push_back(DAG.getUNDEF(ContainerVT));
12663 else
12664 Ops.push_back(PassThru);
12665 Ops.push_back(BasePtr);
12666 Ops.push_back(Index);
12667 if (!IsUnmasked)
12668 Ops.push_back(Mask);
12669 Ops.push_back(VL);
12670 if (!IsUnmasked)
12672
12673 SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
12674 SDValue Result =
12675 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
12676 Chain = Result.getValue(1);
12677
12678 if (VT.isFixedLengthVector())
12679 Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
12680
12681 return DAG.getMergeValues({Result, Chain}, DL);
12682}
12683
12684// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
12685// matched to a RVV indexed store. The RVV indexed store instructions only
12686// support the "unsigned unscaled" addressing mode; indices are implicitly
12687// zero-extended or truncated to XLEN and are treated as byte offsets. Any
12688// signed or scaled indexing is extended to the XLEN value type and scaled
12689// accordingly.
12690SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
12691 SelectionDAG &DAG) const {
12692 SDLoc DL(Op);
12693 const auto *MemSD = cast<MemSDNode>(Op.getNode());
12694 EVT MemVT = MemSD->getMemoryVT();
12695 MachineMemOperand *MMO = MemSD->getMemOperand();
12696 SDValue Chain = MemSD->getChain();
12697 SDValue BasePtr = MemSD->getBasePtr();
12698
12699 [[maybe_unused]] bool IsTruncatingStore = false;
12700 SDValue Index, Mask, Val, VL;
12701
12702 if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
12703 Index = VPSN->getIndex();
12704 Mask = VPSN->getMask();
12705 Val = VPSN->getValue();
12706 VL = VPSN->getVectorLength();
12707 // VP doesn't support truncating stores.
12708 IsTruncatingStore = false;
12709 } else {
12710 // Else it must be a MSCATTER.
12711 auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
12712 Index = MSN->getIndex();
12713 Mask = MSN->getMask();
12714 Val = MSN->getValue();
12715 IsTruncatingStore = MSN->isTruncatingStore();
12716 }
12717
12718 MVT VT = Val.getSimpleValueType();
12719 MVT IndexVT = Index.getSimpleValueType();
12720 MVT XLenVT = Subtarget.getXLenVT();
12721
12723 "Unexpected VTs!");
12724 assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
12725 // Targets have to explicitly opt-in for extending vector loads and
12726 // truncating vector stores.
12727 assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
12728
12729 // If the mask is known to be all ones, optimize to an unmasked intrinsic;
12730 // the selection of the masked intrinsics doesn't do this for us.
12731 bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
12732
12733 MVT ContainerVT = VT;
12734 if (VT.isFixedLengthVector()) {
12735 ContainerVT = getContainerForFixedLengthVector(VT);
12736 IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
12737 ContainerVT.getVectorElementCount());
12738
12739 Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
12740 Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
12741
12742 if (!IsUnmasked) {
12743 MVT MaskVT = getMaskTypeFor(ContainerVT);
12744 Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
12745 }
12746 }
12747
12748 if (!VL)
12749 VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
12750
12751 if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
12752 IndexVT = IndexVT.changeVectorElementType(XLenVT);
12753 Index = DAG.getNode(ISD::TRUNCATE, DL, IndexVT, Index);
12754 }
12755
12756 unsigned IntID =
12757 IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
12758 SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
12759 Ops.push_back(Val);
12760 Ops.push_back(BasePtr);
12761 Ops.push_back(Index);
12762 if (!IsUnmasked)
12763 Ops.push_back(Mask);
12764 Ops.push_back(VL);
12765
12767 DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
12768}
12769
12770SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
12771 SelectionDAG &DAG) const {
12772 const MVT XLenVT = Subtarget.getXLenVT();
12773 SDLoc DL(Op);
12774 SDValue Chain = Op->getOperand(0);
12775 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12776 SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
12777 SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
12778
12779 // Encoding used for rounding mode in RISC-V differs from that used in
12780 // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
12781 // table, which consists of a sequence of 4-bit fields, each representing
12782 // corresponding FLT_ROUNDS mode.
12783 static const int Table =
12789
12790 SDValue Shift =
12791 DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
12792 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12793 DAG.getConstant(Table, DL, XLenVT), Shift);
12794 SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12795 DAG.getConstant(7, DL, XLenVT));
12796
12797 return DAG.getMergeValues({Masked, Chain}, DL);
12798}
12799
12800SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
12801 SelectionDAG &DAG) const {
12802 const MVT XLenVT = Subtarget.getXLenVT();
12803 SDLoc DL(Op);
12804 SDValue Chain = Op->getOperand(0);
12805 SDValue RMValue = Op->getOperand(1);
12806 SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::frm, DL, XLenVT);
12807
12808 // Encoding used for rounding mode in RISC-V differs from that used in
12809 // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
12810 // a table, which consists of a sequence of 4-bit fields, each representing
12811 // corresponding RISC-V mode.
12812 static const unsigned Table =
12818
12819 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, RMValue);
12820
12821 SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
12822 DAG.getConstant(2, DL, XLenVT));
12823 SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
12824 DAG.getConstant(Table, DL, XLenVT), Shift);
12825 RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
12826 DAG.getConstant(0x7, DL, XLenVT));
12827 return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
12828 RMValue);
12829}
12830
12831SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
12832 SelectionDAG &DAG) const {
12834
12835 bool isRISCV64 = Subtarget.is64Bit();
12836 EVT PtrVT = getPointerTy(DAG.getDataLayout());
12837
12838 int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
12839 return DAG.getFrameIndex(FI, PtrVT);
12840}
12841
12842// Returns the opcode of the target-specific SDNode that implements the 32-bit
12843// form of the given Opcode.
12844static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
12845 switch (Opcode) {
12846 default:
12847 llvm_unreachable("Unexpected opcode");
12848 case ISD::SHL:
12849 return RISCVISD::SLLW;
12850 case ISD::SRA:
12851 return RISCVISD::SRAW;
12852 case ISD::SRL:
12853 return RISCVISD::SRLW;
12854 case ISD::SDIV:
12855 return RISCVISD::DIVW;
12856 case ISD::UDIV:
12857 return RISCVISD::DIVUW;
12858 case ISD::UREM:
12859 return RISCVISD::REMUW;
12860 case ISD::ROTL:
12861 return RISCVISD::ROLW;
12862 case ISD::ROTR:
12863 return RISCVISD::RORW;
12864 }
12865}
12866
12867// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
12868// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
12869// otherwise be promoted to i64, making it difficult to select the
12870// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
12871// type i8/i16/i32 is lost.
12873 unsigned ExtOpc = ISD::ANY_EXTEND) {
12874 SDLoc DL(N);
12875 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
12876 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
12877 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
12878 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
12879 // ReplaceNodeResults requires we maintain the same type for the return value.
12880 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
12881}
12882
12883// Converts the given 32-bit operation to a i64 operation with signed extension
12884// semantic to reduce the signed extension instructions.
12886 SDLoc DL(N);
12887 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
12888 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
12889 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
12890 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
12891 DAG.getValueType(MVT::i32));
12892 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
12893}
12894
12897 SelectionDAG &DAG) const {
12898 SDLoc DL(N);
12899 switch (N->getOpcode()) {
12900 default:
12901 llvm_unreachable("Don't know how to custom type legalize this operation!");
12904 case ISD::FP_TO_SINT:
12905 case ISD::FP_TO_UINT: {
12906 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
12907 "Unexpected custom legalisation");
12908 bool IsStrict = N->isStrictFPOpcode();
12909 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
12910 N->getOpcode() == ISD::STRICT_FP_TO_SINT;
12911 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
12912 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12914 if (!isTypeLegal(Op0.getValueType()))
12915 return;
12916 if (IsStrict) {
12917 SDValue Chain = N->getOperand(0);
12918 // In absense of Zfh, promote f16 to f32, then convert.
12919 if (Op0.getValueType() == MVT::f16 &&
12920 !Subtarget.hasStdExtZfhOrZhinx()) {
12921 Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
12922 {Chain, Op0});
12923 Chain = Op0.getValue(1);
12924 }
12925 unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
12927 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
12928 SDValue Res = DAG.getNode(
12929 Opc, DL, VTs, Chain, Op0,
12930 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12931 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12932 Results.push_back(Res.getValue(1));
12933 return;
12934 }
12935 // For bf16, or f16 in absense of Zfh, promote [b]f16 to f32 and then
12936 // convert.
12937 if ((Op0.getValueType() == MVT::f16 &&
12938 !Subtarget.hasStdExtZfhOrZhinx()) ||
12939 Op0.getValueType() == MVT::bf16)
12940 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12941
12942 unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
12943 SDValue Res =
12944 DAG.getNode(Opc, DL, MVT::i64, Op0,
12945 DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
12946 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12947 return;
12948 }
12949 // If the FP type needs to be softened, emit a library call using the 'si'
12950 // version. If we left it to default legalization we'd end up with 'di'. If
12951 // the FP type doesn't need to be softened just let generic type
12952 // legalization promote the result type.
12953 RTLIB::Libcall LC;
12954 if (IsSigned)
12955 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
12956 else
12957 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
12958 MakeLibCallOptions CallOptions;
12959 EVT OpVT = Op0.getValueType();
12960 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
12961 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
12962 SDValue Result;
12963 std::tie(Result, Chain) =
12964 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
12965 Results.push_back(Result);
12966 if (IsStrict)
12967 Results.push_back(Chain);
12968 break;
12969 }
12970 case ISD::LROUND: {
12971 SDValue Op0 = N->getOperand(0);
12972 EVT Op0VT = Op0.getValueType();
12973 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
12975 if (!isTypeLegal(Op0VT))
12976 return;
12977
12978 // In absense of Zfh, promote f16 to f32, then convert.
12979 if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
12980 Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
12981
12982 SDValue Res =
12983 DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
12984 DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
12985 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
12986 return;
12987 }
12988 // If the FP type needs to be softened, emit a library call to lround. We'll
12989 // need to truncate the result. We assume any value that doesn't fit in i32
12990 // is allowed to return an unspecified value.
12991 RTLIB::Libcall LC =
12992 Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
12993 MakeLibCallOptions CallOptions;
12994 EVT OpVT = Op0.getValueType();
12995 CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
12996 SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
12997 Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
12998 Results.push_back(Result);
12999 break;
13000 }
13003 assert(!Subtarget.is64Bit() && "READCYCLECOUNTER/READSTEADYCOUNTER only "
13004 "has custom type legalization on riscv32");
13005
13006 SDValue LoCounter, HiCounter;
13007 MVT XLenVT = Subtarget.getXLenVT();
13008 if (N->getOpcode() == ISD::READCYCLECOUNTER) {
13009 LoCounter = DAG.getTargetConstant(RISCVSysReg::cycle, DL, XLenVT);
13010 HiCounter = DAG.getTargetConstant(RISCVSysReg::cycleh, DL, XLenVT);
13011 } else {
13012 LoCounter = DAG.getTargetConstant(RISCVSysReg::time, DL, XLenVT);
13013 HiCounter = DAG.getTargetConstant(RISCVSysReg::timeh, DL, XLenVT);
13014 }
13015 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
13017 N->getOperand(0), LoCounter, HiCounter);
13018
13019 Results.push_back(
13020 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
13021 Results.push_back(RCW.getValue(2));
13022 break;
13023 }
13024 case ISD::LOAD: {
13025 if (!ISD::isNON_EXTLoad(N))
13026 return;
13027
13028 // Use a SEXTLOAD instead of the default EXTLOAD. Similar to the
13029 // sext_inreg we emit for ADD/SUB/MUL/SLLI.
13030 LoadSDNode *Ld = cast<LoadSDNode>(N);
13031
13032 SDLoc dl(N);
13033 SDValue Res = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Ld->getChain(),
13034 Ld->getBasePtr(), Ld->getMemoryVT(),
13035 Ld->getMemOperand());
13036 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Res));
13037 Results.push_back(Res.getValue(1));
13038 return;
13039 }
13040 case ISD::MUL: {
13041 unsigned Size = N->getSimpleValueType(0).getSizeInBits();
13042 unsigned XLen = Subtarget.getXLen();
13043 // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
13044 if (Size > XLen) {
13045 assert(Size == (XLen * 2) && "Unexpected custom legalisation");
13046 SDValue LHS = N->getOperand(0);
13047 SDValue RHS = N->getOperand(1);
13048 APInt HighMask = APInt::getHighBitsSet(Size, XLen);
13049
13050 bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
13051 bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
13052 // We need exactly one side to be unsigned.
13053 if (LHSIsU == RHSIsU)
13054 return;
13055
13056 auto MakeMULPair = [&](SDValue S, SDValue U) {
13057 MVT XLenVT = Subtarget.getXLenVT();
13058 S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
13059 U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
13060 SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
13061 SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
13062 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
13063 };
13064
13065 bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
13066 bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
13067
13068 // The other operand should be signed, but still prefer MULH when
13069 // possible.
13070 if (RHSIsU && LHSIsS && !RHSIsS)
13071 Results.push_back(MakeMULPair(LHS, RHS));
13072 else if (LHSIsU && RHSIsS && !LHSIsS)
13073 Results.push_back(MakeMULPair(RHS, LHS));
13074
13075 return;
13076 }
13077 [[fallthrough]];
13078 }
13079 case ISD::ADD:
13080 case ISD::SUB:
13081 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13082 "Unexpected custom legalisation");
13083 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
13084 break;
13085 case ISD::SHL:
13086 case ISD::SRA:
13087 case ISD::SRL:
13088 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13089 "Unexpected custom legalisation");
13090 if (N->getOperand(1).getOpcode() != ISD::Constant) {
13091 // If we can use a BSET instruction, allow default promotion to apply.
13092 if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
13093 isOneConstant(N->getOperand(0)))
13094 break;
13095 Results.push_back(customLegalizeToWOp(N, DAG));
13096 break;
13097 }
13098
13099 // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
13100 // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
13101 // shift amount.
13102 if (N->getOpcode() == ISD::SHL) {
13103 SDLoc DL(N);
13104 SDValue NewOp0 =
13105 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13106 SDValue NewOp1 =
13107 DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
13108 SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
13109 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
13110 DAG.getValueType(MVT::i32));
13111 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13112 }
13113
13114 break;
13115 case ISD::ROTL:
13116 case ISD::ROTR:
13117 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13118 "Unexpected custom legalisation");
13119 assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
13120 Subtarget.hasVendorXTHeadBb()) &&
13121 "Unexpected custom legalization");
13122 if (!isa<ConstantSDNode>(N->getOperand(1)) &&
13123 !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
13124 return;
13125 Results.push_back(customLegalizeToWOp(N, DAG));
13126 break;
13127 case ISD::CTTZ:
13129 case ISD::CTLZ:
13130 case ISD::CTLZ_ZERO_UNDEF: {
13131 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13132 "Unexpected custom legalisation");
13133
13134 SDValue NewOp0 =
13135 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13136 bool IsCTZ =
13137 N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
13138 unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
13139 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
13140 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13141 return;
13142 }
13143 case ISD::SDIV:
13144 case ISD::UDIV:
13145 case ISD::UREM: {
13146 MVT VT = N->getSimpleValueType(0);
13147 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
13148 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
13149 "Unexpected custom legalisation");
13150 // Don't promote division/remainder by constant since we should expand those
13151 // to multiply by magic constant.
13153 if (N->getOperand(1).getOpcode() == ISD::Constant &&
13154 !isIntDivCheap(N->getValueType(0), Attr))
13155 return;
13156
13157 // If the input is i32, use ANY_EXTEND since the W instructions don't read
13158 // the upper 32 bits. For other types we need to sign or zero extend
13159 // based on the opcode.
13160 unsigned ExtOpc = ISD::ANY_EXTEND;
13161 if (VT != MVT::i32)
13162 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
13164
13165 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
13166 break;
13167 }
13168 case ISD::SADDO: {
13169 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13170 "Unexpected custom legalisation");
13171
13172 // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
13173 // use the default legalization.
13174 if (!isa<ConstantSDNode>(N->getOperand(1)))
13175 return;
13176
13177 SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13178 SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
13179 SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
13180 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13181 DAG.getValueType(MVT::i32));
13182
13183 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13184
13185 // For an addition, the result should be less than one of the operands (LHS)
13186 // if and only if the other operand (RHS) is negative, otherwise there will
13187 // be overflow.
13188 // For a subtraction, the result should be less than one of the operands
13189 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
13190 // otherwise there will be overflow.
13191 EVT OType = N->getValueType(1);
13192 SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
13193 SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
13194
13195 SDValue Overflow =
13196 DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
13197 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13198 Results.push_back(Overflow);
13199 return;
13200 }
13201 case ISD::UADDO:
13202 case ISD::USUBO: {
13203 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13204 "Unexpected custom legalisation");
13205 bool IsAdd = N->getOpcode() == ISD::UADDO;
13206 // Create an ADDW or SUBW.
13207 SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13208 SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13209 SDValue Res =
13210 DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
13211 Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
13212 DAG.getValueType(MVT::i32));
13213
13214 SDValue Overflow;
13215 if (IsAdd && isOneConstant(RHS)) {
13216 // Special case uaddo X, 1 overflowed if the addition result is 0.
13217 // The general case (X + C) < C is not necessarily beneficial. Although we
13218 // reduce the live range of X, we may introduce the materialization of
13219 // constant C, especially when the setcc result is used by branch. We have
13220 // no compare with constant and branch instructions.
13221 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
13222 DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
13223 } else if (IsAdd && isAllOnesConstant(RHS)) {
13224 // Special case uaddo X, -1 overflowed if X != 0.
13225 Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
13226 DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
13227 } else {
13228 // Sign extend the LHS and perform an unsigned compare with the ADDW
13229 // result. Since the inputs are sign extended from i32, this is equivalent
13230 // to comparing the lower 32 bits.
13231 LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
13232 Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
13233 IsAdd ? ISD::SETULT : ISD::SETUGT);
13234 }
13235
13236 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13237 Results.push_back(Overflow);
13238 return;
13239 }
13240 case ISD::UADDSAT:
13241 case ISD::USUBSAT: {
13242 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13243 !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
13244 // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
13245 // promotion for UADDO/USUBO.
13246 Results.push_back(expandAddSubSat(N, DAG));
13247 return;
13248 }
13249 case ISD::SADDSAT:
13250 case ISD::SSUBSAT: {
13251 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13252 "Unexpected custom legalisation");
13253 Results.push_back(expandAddSubSat(N, DAG));
13254 return;
13255 }
13256 case ISD::ABS: {
13257 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
13258 "Unexpected custom legalisation");
13259
13260 if (Subtarget.hasStdExtZbb()) {
13261 // Emit a special ABSW node that will be expanded to NEGW+MAX at isel.
13262 // This allows us to remember that the result is sign extended. Expanding
13263 // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits.
13264 SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64,
13265 N->getOperand(0));
13266 SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src);
13267 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs));
13268 return;
13269 }
13270
13271 // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
13272 SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
13273
13274 // Freeze the source so we can increase it's use count.
13275 Src = DAG.getFreeze(Src);
13276
13277 // Copy sign bit to all bits using the sraiw pattern.
13278 SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
13279 DAG.getValueType(MVT::i32));
13280 SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
13281 DAG.getConstant(31, DL, MVT::i64));
13282
13283 SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
13284 NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
13285
13286 // NOTE: The result is only required to be anyextended, but sext is
13287 // consistent with type legalization of sub.
13288 NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
13289 DAG.getValueType(MVT::i32));
13290 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
13291 return;
13292 }
13293 case ISD::BITCAST: {
13294 EVT VT = N->getValueType(0);
13295 assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
13296 SDValue Op0 = N->getOperand(0);
13297 EVT Op0VT = Op0.getValueType();
13298 MVT XLenVT = Subtarget.getXLenVT();
13299 if (VT == MVT::i16 &&
13300 ((Op0VT == MVT::f16 && Subtarget.hasStdExtZfhminOrZhinxmin()) ||
13301 (Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()))) {
13302 SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
13303 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
13304 } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
13305 Subtarget.hasStdExtFOrZfinx()) {
13306 SDValue FPConv =
13307 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
13308 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
13309 } else if (VT == MVT::i64 && Op0VT == MVT::f64 && !Subtarget.is64Bit() &&
13310 Subtarget.hasStdExtDOrZdinx()) {
13311 SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
13312 DAG.getVTList(MVT::i32, MVT::i32), Op0);
13313 SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
13314 NewReg.getValue(0), NewReg.getValue(1));
13315 Results.push_back(RetReg);
13316 } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
13317 isTypeLegal(Op0VT)) {
13318 // Custom-legalize bitcasts from fixed-length vector types to illegal
13319 // scalar types in order to improve codegen. Bitcast the vector to a
13320 // one-element vector type whose element type is the same as the result
13321 // type, and extract the first element.
13322 EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
13323 if (isTypeLegal(BVT)) {
13324 SDValue BVec = DAG.getBitcast(BVT, Op0);
13325 Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
13326 DAG.getVectorIdxConstant(0, DL)));
13327 }
13328 }
13329 break;
13330 }
13331 case RISCVISD::BREV8:
13332 case RISCVISD::ORC_B: {
13333 MVT VT = N->getSimpleValueType(0);
13334 MVT XLenVT = Subtarget.getXLenVT();
13335 assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
13336 "Unexpected custom legalisation");
13337 assert(((N->getOpcode() == RISCVISD::BREV8 && Subtarget.hasStdExtZbkb()) ||
13338 (N->getOpcode() == RISCVISD::ORC_B && Subtarget.hasStdExtZbb())) &&
13339 "Unexpected extension");
13340 SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
13341 SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp);
13342 // ReplaceNodeResults requires we maintain the same type for the return
13343 // value.
13344 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
13345 break;
13346 }
13348 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
13349 // type is illegal (currently only vXi64 RV32).
13350 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
13351 // transferred to the destination register. We issue two of these from the
13352 // upper- and lower- halves of the SEW-bit vector element, slid down to the
13353 // first element.
13354 SDValue Vec = N->getOperand(0);
13355 SDValue Idx = N->getOperand(1);
13356
13357 // The vector type hasn't been legalized yet so we can't issue target
13358 // specific nodes if it needs legalization.
13359 // FIXME: We would manually legalize if it's important.
13360 if (!isTypeLegal(Vec.getValueType()))
13361 return;
13362
13363 MVT VecVT = Vec.getSimpleValueType();
13364
13365 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
13366 VecVT.getVectorElementType() == MVT::i64 &&
13367 "Unexpected EXTRACT_VECTOR_ELT legalization");
13368
13369 // If this is a fixed vector, we need to convert it to a scalable vector.
13370 MVT ContainerVT = VecVT;
13371 if (VecVT.isFixedLengthVector()) {
13372 ContainerVT = getContainerForFixedLengthVector(VecVT);
13373 Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
13374 }
13375
13376 MVT XLenVT = Subtarget.getXLenVT();
13377
13378 // Use a VL of 1 to avoid processing more elements than we need.
13379 auto [Mask, VL] = getDefaultVLOps(1, ContainerVT, DL, DAG, Subtarget);
13380
13381 // Unless the index is known to be 0, we must slide the vector down to get
13382 // the desired element into index 0.
13383 if (!isNullConstant(Idx)) {
13384 Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT,
13385 DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
13386 }
13387
13388 // Extract the lower XLEN bits of the correct vector element.
13389 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13390
13391 // To extract the upper XLEN bits of the vector element, shift the first
13392 // element right by 32 bits and re-extract the lower XLEN bits.
13393 SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
13394 DAG.getUNDEF(ContainerVT),
13395 DAG.getConstant(32, DL, XLenVT), VL);
13396 SDValue LShr32 =
13397 DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec, ThirtyTwoV,
13398 DAG.getUNDEF(ContainerVT), Mask, VL);
13399
13400 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13401
13402 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13403 break;
13404 }
13406 unsigned IntNo = N->getConstantOperandVal(0);
13407 switch (IntNo) {
13408 default:
13410 "Don't know how to custom type legalize this intrinsic!");
13411 case Intrinsic::experimental_get_vector_length: {
13412 SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
13413 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13414 return;
13415 }
13416 case Intrinsic::experimental_cttz_elts: {
13417 SDValue Res = lowerCttzElts(N, DAG, Subtarget);
13418 Results.push_back(
13419 DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res));
13420 return;
13421 }
13422 case Intrinsic::riscv_orc_b:
13423 case Intrinsic::riscv_brev8:
13424 case Intrinsic::riscv_sha256sig0:
13425 case Intrinsic::riscv_sha256sig1:
13426 case Intrinsic::riscv_sha256sum0:
13427 case Intrinsic::riscv_sha256sum1:
13428 case Intrinsic::riscv_sm3p0:
13429 case Intrinsic::riscv_sm3p1: {
13430 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13431 return;
13432 unsigned Opc;
13433 switch (IntNo) {
13434 case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
13435 case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
13436 case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
13437 case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
13438 case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
13439 case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
13440 case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
13441 case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
13442 }
13443
13444 SDValue NewOp =
13445 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13446 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
13447 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13448 return;
13449 }
13450 case Intrinsic::riscv_sm4ks:
13451 case Intrinsic::riscv_sm4ed: {
13452 unsigned Opc =
13453 IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
13454 SDValue NewOp0 =
13455 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13456 SDValue NewOp1 =
13457 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13458 SDValue Res =
13459 DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
13460 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13461 return;
13462 }
13463 case Intrinsic::riscv_mopr: {
13464 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13465 return;
13466 SDValue NewOp =
13467 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13468 SDValue Res = DAG.getNode(
13469 RISCVISD::MOPR, DL, MVT::i64, NewOp,
13470 DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
13471 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13472 return;
13473 }
13474 case Intrinsic::riscv_moprr: {
13475 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13476 return;
13477 SDValue NewOp0 =
13478 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13479 SDValue NewOp1 =
13480 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13481 SDValue Res = DAG.getNode(
13482 RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
13483 DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
13484 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13485 return;
13486 }
13487 case Intrinsic::riscv_clmul: {
13488 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13489 return;
13490
13491 SDValue NewOp0 =
13492 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13493 SDValue NewOp1 =
13494 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13495 SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
13496 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13497 return;
13498 }
13499 case Intrinsic::riscv_clmulh:
13500 case Intrinsic::riscv_clmulr: {
13501 if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
13502 return;
13503
13504 // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
13505 // to the full 128-bit clmul result of multiplying two xlen values.
13506 // Perform clmulr or clmulh on the shifted values. Finally, extract the
13507 // upper 32 bits.
13508 //
13509 // The alternative is to mask the inputs to 32 bits and use clmul, but
13510 // that requires two shifts to mask each input without zext.w.
13511 // FIXME: If the inputs are known zero extended or could be freely
13512 // zero extended, the mask form would be better.
13513 SDValue NewOp0 =
13514 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
13515 SDValue NewOp1 =
13516 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
13517 NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
13518 DAG.getConstant(32, DL, MVT::i64));
13519 NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
13520 DAG.getConstant(32, DL, MVT::i64));
13521 unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
13523 SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
13524 Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
13525 DAG.getConstant(32, DL, MVT::i64));
13526 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
13527 return;
13528 }
13529 case Intrinsic::riscv_vmv_x_s: {
13530 EVT VT = N->getValueType(0);
13531 MVT XLenVT = Subtarget.getXLenVT();
13532 if (VT.bitsLT(XLenVT)) {
13533 // Simple case just extract using vmv.x.s and truncate.
13534 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
13535 Subtarget.getXLenVT(), N->getOperand(1));
13536 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
13537 return;
13538 }
13539
13540 assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
13541 "Unexpected custom legalization");
13542
13543 // We need to do the move in two steps.
13544 SDValue Vec = N->getOperand(1);
13545 MVT VecVT = Vec.getSimpleValueType();
13546
13547 // First extract the lower XLEN bits of the element.
13548 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
13549
13550 // To extract the upper XLEN bits of the vector element, shift the first
13551 // element right by 32 bits and re-extract the lower XLEN bits.
13552 auto [Mask, VL] = getDefaultVLOps(1, VecVT, DL, DAG, Subtarget);
13553
13554 SDValue ThirtyTwoV =
13555 DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
13556 DAG.getConstant(32, DL, XLenVT), VL);
13557 SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV,
13558 DAG.getUNDEF(VecVT), Mask, VL);
13559 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
13560
13561 Results.push_back(
13562 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
13563 break;
13564 }
13565 }
13566 break;
13567 }
13568 case ISD::VECREDUCE_ADD:
13569 case ISD::VECREDUCE_AND:
13570 case ISD::VECREDUCE_OR:
13571 case ISD::VECREDUCE_XOR:
13576 if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
13577 Results.push_back(V);
13578 break;
13579 case ISD::VP_REDUCE_ADD:
13580 case ISD::VP_REDUCE_AND:
13581 case ISD::VP_REDUCE_OR:
13582 case ISD::VP_REDUCE_XOR:
13583 case ISD::VP_REDUCE_SMAX:
13584 case ISD::VP_REDUCE_UMAX:
13585 case ISD::VP_REDUCE_SMIN:
13586 case ISD::VP_REDUCE_UMIN:
13587 if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
13588 Results.push_back(V);
13589 break;
13590 case ISD::GET_ROUNDING: {
13591 SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
13592 SDValue Res = DAG.getNode(ISD::GET_ROUNDING, DL, VTs, N->getOperand(0));
13593 Results.push_back(Res.getValue(0));
13594 Results.push_back(Res.getValue(1));
13595 break;
13596 }
13597 }
13598}
13599
13600/// Given a binary operator, return the *associative* generic ISD::VECREDUCE_OP
13601/// which corresponds to it.
13602static unsigned getVecReduceOpcode(unsigned Opc) {
13603 switch (Opc) {
13604 default:
13605 llvm_unreachable("Unhandled binary to transfrom reduction");
13606 case ISD::ADD:
13607 return ISD::VECREDUCE_ADD;
13608 case ISD::UMAX:
13609 return ISD::VECREDUCE_UMAX;
13610 case ISD::SMAX:
13611 return ISD::VECREDUCE_SMAX;
13612 case ISD::UMIN:
13613 return ISD::VECREDUCE_UMIN;
13614 case ISD::SMIN:
13615 return ISD::VECREDUCE_SMIN;
13616 case ISD::AND:
13617 return ISD::VECREDUCE_AND;
13618 case ISD::OR:
13619 return ISD::VECREDUCE_OR;
13620 case ISD::XOR:
13621 return ISD::VECREDUCE_XOR;
13622 case ISD::FADD:
13623 // Note: This is the associative form of the generic reduction opcode.
13624 return ISD::VECREDUCE_FADD;
13625 }
13626}
13627
13628/// Perform two related transforms whose purpose is to incrementally recognize
13629/// an explode_vector followed by scalar reduction as a vector reduction node.
13630/// This exists to recover from a deficiency in SLP which can't handle
13631/// forests with multiple roots sharing common nodes. In some cases, one
13632/// of the trees will be vectorized, and the other will remain (unprofitably)
13633/// scalarized.
13634static SDValue
13636 const RISCVSubtarget &Subtarget) {
13637
13638 // This transforms need to run before all integer types have been legalized
13639 // to i64 (so that the vector element type matches the add type), and while
13640 // it's safe to introduce odd sized vector types.
13642 return SDValue();
13643
13644 // Without V, this transform isn't useful. We could form the (illegal)
13645 // operations and let them be scalarized again, but there's really no point.
13646 if (!Subtarget.hasVInstructions())
13647 return SDValue();
13648
13649 const SDLoc DL(N);
13650 const EVT VT = N->getValueType(0);
13651 const unsigned Opc = N->getOpcode();
13652
13653 // For FADD, we only handle the case with reassociation allowed. We
13654 // could handle strict reduction order, but at the moment, there's no
13655 // known reason to, and the complexity isn't worth it.
13656 // TODO: Handle fminnum and fmaxnum here
13657 if (!VT.isInteger() &&
13658 (Opc != ISD::FADD || !N->getFlags().hasAllowReassociation()))
13659 return SDValue();
13660
13661 const unsigned ReduceOpc = getVecReduceOpcode(Opc);
13662 assert(Opc == ISD::getVecReduceBaseOpcode(ReduceOpc) &&
13663 "Inconsistent mappings");
13664 SDValue LHS = N->getOperand(0);
13665 SDValue RHS = N->getOperand(1);
13666
13667 if (!LHS.hasOneUse() || !RHS.hasOneUse())
13668 return SDValue();
13669
13670 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13671 std::swap(LHS, RHS);
13672
13673 if (RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13674 !isa<ConstantSDNode>(RHS.getOperand(1)))
13675 return SDValue();
13676
13677 uint64_t RHSIdx = cast<ConstantSDNode>(RHS.getOperand(1))->getLimitedValue();
13678 SDValue SrcVec = RHS.getOperand(0);
13679 EVT SrcVecVT = SrcVec.getValueType();
13680 assert(SrcVecVT.getVectorElementType() == VT);
13681 if (SrcVecVT.isScalableVector())
13682 return SDValue();
13683
13684 if (SrcVecVT.getScalarSizeInBits() > Subtarget.getELen())
13685 return SDValue();
13686
13687 // match binop (extract_vector_elt V, 0), (extract_vector_elt V, 1) to
13688 // reduce_op (extract_subvector [2 x VT] from V). This will form the
13689 // root of our reduction tree. TODO: We could extend this to any two
13690 // adjacent aligned constant indices if desired.
13691 if (LHS.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13692 LHS.getOperand(0) == SrcVec && isa<ConstantSDNode>(LHS.getOperand(1))) {
13693 uint64_t LHSIdx =
13694 cast<ConstantSDNode>(LHS.getOperand(1))->getLimitedValue();
13695 if (0 == std::min(LHSIdx, RHSIdx) && 1 == std::max(LHSIdx, RHSIdx)) {
13696 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, 2);
13697 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13698 DAG.getVectorIdxConstant(0, DL));
13699 return DAG.getNode(ReduceOpc, DL, VT, Vec, N->getFlags());
13700 }
13701 }
13702
13703 // Match (binop (reduce (extract_subvector V, 0),
13704 // (extract_vector_elt V, sizeof(SubVec))))
13705 // into a reduction of one more element from the original vector V.
13706 if (LHS.getOpcode() != ReduceOpc)
13707 return SDValue();
13708
13709 SDValue ReduceVec = LHS.getOperand(0);
13710 if (ReduceVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13711 ReduceVec.hasOneUse() && ReduceVec.getOperand(0) == RHS.getOperand(0) &&
13712 isNullConstant(ReduceVec.getOperand(1)) &&
13713 ReduceVec.getValueType().getVectorNumElements() == RHSIdx) {
13714 // For illegal types (e.g. 3xi32), most will be combined again into a
13715 // wider (hopefully legal) type. If this is a terminal state, we are
13716 // relying on type legalization here to produce something reasonable
13717 // and this lowering quality could probably be improved. (TODO)
13718 EVT ReduceVT = EVT::getVectorVT(*DAG.getContext(), VT, RHSIdx + 1);
13719 SDValue Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ReduceVT, SrcVec,
13720 DAG.getVectorIdxConstant(0, DL));
13721 return DAG.getNode(ReduceOpc, DL, VT, Vec,
13722 ReduceVec->getFlags() & N->getFlags());
13723 }
13724
13725 return SDValue();
13726}
13727
13728
13729// Try to fold (<bop> x, (reduction.<bop> vec, start))
13731 const RISCVSubtarget &Subtarget) {
13732 auto BinOpToRVVReduce = [](unsigned Opc) {
13733 switch (Opc) {
13734 default:
13735 llvm_unreachable("Unhandled binary to transfrom reduction");
13736 case ISD::ADD:
13738 case ISD::UMAX:
13740 case ISD::SMAX:
13742 case ISD::UMIN:
13744 case ISD::SMIN:
13746 case ISD::AND:
13748 case ISD::OR:
13750 case ISD::XOR:
13752 case ISD::FADD:
13754 case ISD::FMAXNUM:
13756 case ISD::FMINNUM:
13758 }
13759 };
13760
13761 auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
13762 return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13763 isNullConstant(V.getOperand(1)) &&
13764 V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
13765 };
13766
13767 unsigned Opc = N->getOpcode();
13768 unsigned ReduceIdx;
13769 if (IsReduction(N->getOperand(0), Opc))
13770 ReduceIdx = 0;
13771 else if (IsReduction(N->getOperand(1), Opc))
13772 ReduceIdx = 1;
13773 else
13774 return SDValue();
13775
13776 // Skip if FADD disallows reassociation but the combiner needs.
13777 if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
13778 return SDValue();
13779
13780 SDValue Extract = N->getOperand(ReduceIdx);
13781 SDValue Reduce = Extract.getOperand(0);
13782 if (!Extract.hasOneUse() || !Reduce.hasOneUse())
13783 return SDValue();
13784
13785 SDValue ScalarV = Reduce.getOperand(2);
13786 EVT ScalarVT = ScalarV.getValueType();
13787 if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
13788 ScalarV.getOperand(0)->isUndef() &&
13789 isNullConstant(ScalarV.getOperand(2)))
13790 ScalarV = ScalarV.getOperand(1);
13791
13792 // Make sure that ScalarV is a splat with VL=1.
13793 if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
13794 ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
13795 ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
13796 return SDValue();
13797
13798 if (!isNonZeroAVL(ScalarV.getOperand(2)))
13799 return SDValue();
13800
13801 // Check the scalar of ScalarV is neutral element
13802 // TODO: Deal with value other than neutral element.
13803 if (!isNeutralConstant(N->getOpcode(), N->getFlags(), ScalarV.getOperand(1),
13804 0))
13805 return SDValue();
13806
13807 // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
13808 // FIXME: We might be able to improve this if operand 0 is undef.
13809 if (!isNonZeroAVL(Reduce.getOperand(5)))
13810 return SDValue();
13811
13812 SDValue NewStart = N->getOperand(1 - ReduceIdx);
13813
13814 SDLoc DL(N);
13815 SDValue NewScalarV =
13816 lowerScalarInsert(NewStart, ScalarV.getOperand(2),
13817 ScalarV.getSimpleValueType(), DL, DAG, Subtarget);
13818
13819 // If we looked through an INSERT_SUBVECTOR we need to restore it.
13820 if (ScalarVT != ScalarV.getValueType())
13821 NewScalarV =
13822 DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
13823 NewScalarV, DAG.getVectorIdxConstant(0, DL));
13824
13825 SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
13826 NewScalarV, Reduce.getOperand(3),
13827 Reduce.getOperand(4), Reduce.getOperand(5)};
13828 SDValue NewReduce =
13829 DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
13830 return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
13831 Extract.getOperand(1));
13832}
13833
13834// Optimize (add (shl x, c0), (shl y, c1)) ->
13835// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
13837 const RISCVSubtarget &Subtarget) {
13838 // Perform this optimization only in the zba extension.
13839 if (!Subtarget.hasStdExtZba())
13840 return SDValue();
13841
13842 // Skip for vector types and larger types.
13843 EVT VT = N->getValueType(0);
13844 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13845 return SDValue();
13846
13847 // The two operand nodes must be SHL and have no other use.
13848 SDValue N0 = N->getOperand(0);
13849 SDValue N1 = N->getOperand(1);
13850 if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
13851 !N0->hasOneUse() || !N1->hasOneUse())
13852 return SDValue();
13853
13854 // Check c0 and c1.
13855 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13856 auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
13857 if (!N0C || !N1C)
13858 return SDValue();
13859 int64_t C0 = N0C->getSExtValue();
13860 int64_t C1 = N1C->getSExtValue();
13861 if (C0 <= 0 || C1 <= 0)
13862 return SDValue();
13863
13864 // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
13865 int64_t Bits = std::min(C0, C1);
13866 int64_t Diff = std::abs(C0 - C1);
13867 if (Diff != 1 && Diff != 2 && Diff != 3)
13868 return SDValue();
13869
13870 // Build nodes.
13871 SDLoc DL(N);
13872 SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
13873 SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
13874 SDValue SHADD = DAG.getNode(RISCVISD::SHL_ADD, DL, VT, NL,
13875 DAG.getConstant(Diff, DL, VT), NS);
13876 return DAG.getNode(ISD::SHL, DL, VT, SHADD, DAG.getConstant(Bits, DL, VT));
13877}
13878
13879// Combine a constant select operand into its use:
13880//
13881// (and (select cond, -1, c), x)
13882// -> (select cond, x, (and x, c)) [AllOnes=1]
13883// (or (select cond, 0, c), x)
13884// -> (select cond, x, (or x, c)) [AllOnes=0]
13885// (xor (select cond, 0, c), x)
13886// -> (select cond, x, (xor x, c)) [AllOnes=0]
13887// (add (select cond, 0, c), x)
13888// -> (select cond, x, (add x, c)) [AllOnes=0]
13889// (sub x, (select cond, 0, c))
13890// -> (select cond, x, (sub x, c)) [AllOnes=0]
13892 SelectionDAG &DAG, bool AllOnes,
13893 const RISCVSubtarget &Subtarget) {
13894 EVT VT = N->getValueType(0);
13895
13896 // Skip vectors.
13897 if (VT.isVector())
13898 return SDValue();
13899
13900 if (!Subtarget.hasConditionalMoveFusion()) {
13901 // (select cond, x, (and x, c)) has custom lowering with Zicond.
13902 if ((!Subtarget.hasStdExtZicond() &&
13903 !Subtarget.hasVendorXVentanaCondOps()) ||
13904 N->getOpcode() != ISD::AND)
13905 return SDValue();
13906
13907 // Maybe harmful when condition code has multiple use.
13908 if (Slct.getOpcode() == ISD::SELECT && !Slct.getOperand(0).hasOneUse())
13909 return SDValue();
13910
13911 // Maybe harmful when VT is wider than XLen.
13912 if (VT.getSizeInBits() > Subtarget.getXLen())
13913 return SDValue();
13914 }
13915
13916 if ((Slct.getOpcode() != ISD::SELECT &&
13917 Slct.getOpcode() != RISCVISD::SELECT_CC) ||
13918 !Slct.hasOneUse())
13919 return SDValue();
13920
13921 auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
13923 };
13924
13925 bool SwapSelectOps;
13926 unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
13927 SDValue TrueVal = Slct.getOperand(1 + OpOffset);
13928 SDValue FalseVal = Slct.getOperand(2 + OpOffset);
13929 SDValue NonConstantVal;
13930 if (isZeroOrAllOnes(TrueVal, AllOnes)) {
13931 SwapSelectOps = false;
13932 NonConstantVal = FalseVal;
13933 } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
13934 SwapSelectOps = true;
13935 NonConstantVal = TrueVal;
13936 } else
13937 return SDValue();
13938
13939 // Slct is now know to be the desired identity constant when CC is true.
13940 TrueVal = OtherOp;
13941 FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
13942 // Unless SwapSelectOps says the condition should be false.
13943 if (SwapSelectOps)
13944 std::swap(TrueVal, FalseVal);
13945
13946 if (Slct.getOpcode() == RISCVISD::SELECT_CC)
13947 return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
13948 {Slct.getOperand(0), Slct.getOperand(1),
13949 Slct.getOperand(2), TrueVal, FalseVal});
13950
13951 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
13952 {Slct.getOperand(0), TrueVal, FalseVal});
13953}
13954
13955// Attempt combineSelectAndUse on each operand of a commutative operator N.
13957 bool AllOnes,
13958 const RISCVSubtarget &Subtarget) {
13959 SDValue N0 = N->getOperand(0);
13960 SDValue N1 = N->getOperand(1);
13961 if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes, Subtarget))
13962 return Result;
13963 if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes, Subtarget))
13964 return Result;
13965 return SDValue();
13966}
13967
13968// Transform (add (mul x, c0), c1) ->
13969// (add (mul (add x, c1/c0), c0), c1%c0).
13970// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
13971// that should be excluded is when c0*(c1/c0) is simm12, which will lead
13972// to an infinite loop in DAGCombine if transformed.
13973// Or transform (add (mul x, c0), c1) ->
13974// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
13975// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
13976// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
13977// lead to an infinite loop in DAGCombine if transformed.
13978// Or transform (add (mul x, c0), c1) ->
13979// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
13980// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
13981// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
13982// lead to an infinite loop in DAGCombine if transformed.
13983// Or transform (add (mul x, c0), c1) ->
13984// (mul (add x, c1/c0), c0).
13985// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
13987 const RISCVSubtarget &Subtarget) {
13988 // Skip for vector types and larger types.
13989 EVT VT = N->getValueType(0);
13990 if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
13991 return SDValue();
13992 // The first operand node must be a MUL and has no other use.
13993 SDValue N0 = N->getOperand(0);
13994 if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
13995 return SDValue();
13996 // Check if c0 and c1 match above conditions.
13997 auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13998 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
13999 if (!N0C || !N1C)
14000 return SDValue();
14001 // If N0C has multiple uses it's possible one of the cases in
14002 // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
14003 // in an infinite loop.
14004 if (!N0C->hasOneUse())
14005 return SDValue();
14006 int64_t C0 = N0C->getSExtValue();
14007 int64_t C1 = N1C->getSExtValue();
14008 int64_t CA, CB;
14009 if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
14010 return SDValue();
14011 // Search for proper CA (non-zero) and CB that both are simm12.
14012 if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
14013 !isInt<12>(C0 * (C1 / C0))) {
14014 CA = C1 / C0;
14015 CB = C1 % C0;
14016 } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
14017 isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
14018 CA = C1 / C0 + 1;
14019 CB = C1 % C0 - C0;
14020 } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
14021 isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
14022 CA = C1 / C0 - 1;
14023 CB = C1 % C0 + C0;
14024 } else
14025 return SDValue();
14026 // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
14027 SDLoc DL(N);
14028 SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
14029 DAG.getSignedConstant(CA, DL, VT));
14030 SDValue New1 =
14031 DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getSignedConstant(C0, DL, VT));
14032 return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getSignedConstant(CB, DL, VT));
14033}
14034
14035// add (zext, zext) -> zext (add (zext, zext))
14036// sub (zext, zext) -> sext (sub (zext, zext))
14037// mul (zext, zext) -> zext (mul (zext, zext))
14038// sdiv (zext, zext) -> zext (sdiv (zext, zext))
14039// udiv (zext, zext) -> zext (udiv (zext, zext))
14040// srem (zext, zext) -> zext (srem (zext, zext))
14041// urem (zext, zext) -> zext (urem (zext, zext))
14042//
14043// where the sum of the extend widths match, and the the range of the bin op
14044// fits inside the width of the narrower bin op. (For profitability on rvv, we
14045// use a power of two for both inner and outer extend.)
14047
14048 EVT VT = N->getValueType(0);
14049 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
14050 return SDValue();
14051
14052 SDValue N0 = N->getOperand(0);
14053 SDValue N1 = N->getOperand(1);
14055 return SDValue();
14056 if (!N0.hasOneUse() || !N1.hasOneUse())
14057 return SDValue();
14058
14059 SDValue Src0 = N0.getOperand(0);
14060 SDValue Src1 = N1.getOperand(0);
14061 EVT SrcVT = Src0.getValueType();
14062 if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT) ||
14063 SrcVT != Src1.getValueType() || SrcVT.getScalarSizeInBits() < 8 ||
14064 SrcVT.getScalarSizeInBits() >= VT.getScalarSizeInBits() / 2)
14065 return SDValue();
14066
14067 LLVMContext &C = *DAG.getContext();
14069 EVT NarrowVT = EVT::getVectorVT(C, ElemVT, VT.getVectorElementCount());
14070
14071 Src0 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src0), NarrowVT, Src0);
14072 Src1 = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Src1), NarrowVT, Src1);
14073
14074 // Src0 and Src1 are zero extended, so they're always positive if signed.
14075 //
14076 // sub can produce a negative from two positive operands, so it needs sign
14077 // extended. Other nodes produce a positive from two positive operands, so
14078 // zero extend instead.
14079 unsigned OuterExtend =
14080 N->getOpcode() == ISD::SUB ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
14081
14082 return DAG.getNode(
14083 OuterExtend, SDLoc(N), VT,
14084 DAG.getNode(N->getOpcode(), SDLoc(N), NarrowVT, Src0, Src1));
14085}
14086
14087// Try to turn (add (xor bool, 1) -1) into (neg bool).
14089 SDValue N0 = N->getOperand(0);
14090 SDValue N1 = N->getOperand(1);
14091 EVT VT = N->getValueType(0);
14092 SDLoc DL(N);
14093
14094 // RHS should be -1.
14095 if (!isAllOnesConstant(N1))
14096 return SDValue();
14097
14098 // Look for (xor X, 1).
14099 if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)))
14100 return SDValue();
14101
14102 // First xor input should be 0 or 1.
14104 if (!DAG.MaskedValueIsZero(N0.getOperand(0), Mask))
14105 return SDValue();
14106
14107 // Emit a negate of the setcc.
14108 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
14109 N0.getOperand(0));
14110}
14111
14114 const RISCVSubtarget &Subtarget) {
14115 SelectionDAG &DAG = DCI.DAG;
14116 if (SDValue V = combineAddOfBooleanXor(N, DAG))
14117 return V;
14118 if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
14119 return V;
14120 if (!DCI.isBeforeLegalize() && !DCI.isCalledByLegalizer())
14121 if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
14122 return V;
14123 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14124 return V;
14125 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14126 return V;
14127 if (SDValue V = combineBinOpOfZExt(N, DAG))
14128 return V;
14129
14130 // fold (add (select lhs, rhs, cc, 0, y), x) ->
14131 // (select lhs, rhs, cc, x, (add x, y))
14132 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14133}
14134
14135// Try to turn a sub boolean RHS and constant LHS into an addi.
14137 SDValue N0 = N->getOperand(0);
14138 SDValue N1 = N->getOperand(1);
14139 EVT VT = N->getValueType(0);
14140 SDLoc DL(N);
14141
14142 // Require a constant LHS.
14143 auto *N0C = dyn_cast<ConstantSDNode>(N0);
14144 if (!N0C)
14145 return SDValue();
14146
14147 // All our optimizations involve subtracting 1 from the immediate and forming
14148 // an ADDI. Make sure the new immediate is valid for an ADDI.
14149 APInt ImmValMinus1 = N0C->getAPIntValue() - 1;
14150 if (!ImmValMinus1.isSignedIntN(12))
14151 return SDValue();
14152
14153 SDValue NewLHS;
14154 if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) {
14155 // (sub constant, (setcc x, y, eq/neq)) ->
14156 // (add (setcc x, y, neq/eq), constant - 1)
14157 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14158 EVT SetCCOpVT = N1.getOperand(0).getValueType();
14159 if (!isIntEqualitySetCC(CCVal) || !SetCCOpVT.isInteger())
14160 return SDValue();
14161 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
14162 NewLHS =
14163 DAG.getSetCC(SDLoc(N1), VT, N1.getOperand(0), N1.getOperand(1), CCVal);
14164 } else if (N1.getOpcode() == ISD::XOR && isOneConstant(N1.getOperand(1)) &&
14165 N1.getOperand(0).getOpcode() == ISD::SETCC) {
14166 // (sub C, (xor (setcc), 1)) -> (add (setcc), C-1).
14167 // Since setcc returns a bool the xor is equivalent to 1-setcc.
14168 NewLHS = N1.getOperand(0);
14169 } else
14170 return SDValue();
14171
14172 SDValue NewRHS = DAG.getConstant(ImmValMinus1, DL, VT);
14173 return DAG.getNode(ISD::ADD, DL, VT, NewLHS, NewRHS);
14174}
14175
14176// Looks for (sub (shl X, 8-Y), (shr X, Y)) where the Y-th bit in each byte is
14177// potentially set. It is fine for Y to be 0, meaning that (sub (shl X, 8), X)
14178// is also valid. Replace with (orc.b X). For example, 0b0000_1000_0000_1000 is
14179// valid with Y=3, while 0b0000_1000_0000_0100 is not.
14181 const RISCVSubtarget &Subtarget) {
14182 if (!Subtarget.hasStdExtZbb())
14183 return SDValue();
14184
14185 EVT VT = N->getValueType(0);
14186
14187 if (VT != Subtarget.getXLenVT() && VT != MVT::i32 && VT != MVT::i16)
14188 return SDValue();
14189
14190 SDValue N0 = N->getOperand(0);
14191 SDValue N1 = N->getOperand(1);
14192
14193 if (N0->getOpcode() != ISD::SHL)
14194 return SDValue();
14195
14196 auto *ShAmtCLeft = dyn_cast<ConstantSDNode>(N0.getOperand(1));
14197 if (!ShAmtCLeft)
14198 return SDValue();
14199 unsigned ShiftedAmount = 8 - ShAmtCLeft->getZExtValue();
14200
14201 if (ShiftedAmount >= 8)
14202 return SDValue();
14203
14204 SDValue LeftShiftOperand = N0->getOperand(0);
14205 SDValue RightShiftOperand = N1;
14206
14207 if (ShiftedAmount != 0) { // Right operand must be a right shift.
14208 if (N1->getOpcode() != ISD::SRL)
14209 return SDValue();
14210 auto *ShAmtCRight = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14211 if (!ShAmtCRight || ShAmtCRight->getZExtValue() != ShiftedAmount)
14212 return SDValue();
14213 RightShiftOperand = N1.getOperand(0);
14214 }
14215
14216 // At least one shift should have a single use.
14217 if (!N0.hasOneUse() && (ShiftedAmount == 0 || !N1.hasOneUse()))
14218 return SDValue();
14219
14220 if (LeftShiftOperand != RightShiftOperand)
14221 return SDValue();
14222
14223 APInt Mask = APInt::getSplat(VT.getSizeInBits(), APInt(8, 0x1));
14224 Mask <<= ShiftedAmount;
14225 // Check that X has indeed the right shape (only the Y-th bit can be set in
14226 // every byte).
14227 if (!DAG.MaskedValueIsZero(LeftShiftOperand, ~Mask))
14228 return SDValue();
14229
14230 return DAG.getNode(RISCVISD::ORC_B, SDLoc(N), VT, LeftShiftOperand);
14231}
14232
14234 const RISCVSubtarget &Subtarget) {
14235 if (SDValue V = combineSubOfBoolean(N, DAG))
14236 return V;
14237
14238 EVT VT = N->getValueType(0);
14239 SDValue N0 = N->getOperand(0);
14240 SDValue N1 = N->getOperand(1);
14241 // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
14242 if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
14243 isNullConstant(N1.getOperand(1))) {
14244 ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
14245 if (CCVal == ISD::SETLT) {
14246 SDLoc DL(N);
14247 unsigned ShAmt = N0.getValueSizeInBits() - 1;
14248 return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
14249 DAG.getConstant(ShAmt, DL, VT));
14250 }
14251 }
14252
14253 if (SDValue V = combineBinOpOfZExt(N, DAG))
14254 return V;
14255 if (SDValue V = combineSubShiftToOrcB(N, DAG, Subtarget))
14256 return V;
14257
14258 // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
14259 // (select lhs, rhs, cc, x, (sub x, y))
14260 return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
14261}
14262
14263// Apply DeMorgan's law to (and/or (xor X, 1), (xor Y, 1)) if X and Y are 0/1.
14264// Legalizing setcc can introduce xors like this. Doing this transform reduces
14265// the number of xors and may allow the xor to fold into a branch condition.
14267 SDValue N0 = N->getOperand(0);
14268 SDValue N1 = N->getOperand(1);
14269 bool IsAnd = N->getOpcode() == ISD::AND;
14270
14271 if (N0.getOpcode() != ISD::XOR || N1.getOpcode() != ISD::XOR)
14272 return SDValue();
14273
14274 if (!N0.hasOneUse() || !N1.hasOneUse())
14275 return SDValue();
14276
14277 SDValue N01 = N0.getOperand(1);
14278 SDValue N11 = N1.getOperand(1);
14279
14280 // For AND, SimplifyDemandedBits may have turned one of the (xor X, 1) into
14281 // (xor X, -1) based on the upper bits of the other operand being 0. If the
14282 // operation is And, allow one of the Xors to use -1.
14283 if (isOneConstant(N01)) {
14284 if (!isOneConstant(N11) && !(IsAnd && isAllOnesConstant(N11)))
14285 return SDValue();
14286 } else if (isOneConstant(N11)) {
14287 // N01 and N11 being 1 was already handled. Handle N11==1 and N01==-1.
14288 if (!(IsAnd && isAllOnesConstant(N01)))
14289 return SDValue();
14290 } else
14291 return SDValue();
14292
14293 EVT VT = N->getValueType(0);
14294
14295 SDValue N00 = N0.getOperand(0);
14296 SDValue N10 = N1.getOperand(0);
14297
14298 // The LHS of the xors needs to be 0/1.
14300 if (!DAG.MaskedValueIsZero(N00, Mask) || !DAG.MaskedValueIsZero(N10, Mask))
14301 return SDValue();
14302
14303 // Invert the opcode and insert a new xor.
14304 SDLoc DL(N);
14305 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
14306 SDValue Logic = DAG.getNode(Opc, DL, VT, N00, N10);
14307 return DAG.getNode(ISD::XOR, DL, VT, Logic, DAG.getConstant(1, DL, VT));
14308}
14309
14310// Fold (vXi8 (trunc (vselect (setltu, X, 256), X, (sext (setgt X, 0))))) to
14311// (vXi8 (trunc (smin (smax X, 0), 255))). This represents saturating a signed
14312// value to an unsigned value. This will be lowered to vmax and series of
14313// vnclipu instructions later. This can be extended to other truncated types
14314// other than i8 by replacing 256 and 255 with the equivalent constants for the
14315// type.
14317 EVT VT = N->getValueType(0);
14318 SDValue N0 = N->getOperand(0);
14319 EVT SrcVT = N0.getValueType();
14320
14321 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14322 if (!VT.isVector() || !TLI.isTypeLegal(VT) || !TLI.isTypeLegal(SrcVT))
14323 return SDValue();
14324
14325 if (N0.getOpcode() != ISD::VSELECT || !N0.hasOneUse())
14326 return SDValue();
14327
14328 SDValue Cond = N0.getOperand(0);
14329 SDValue True = N0.getOperand(1);
14330 SDValue False = N0.getOperand(2);
14331
14332 if (Cond.getOpcode() != ISD::SETCC)
14333 return SDValue();
14334
14335 // FIXME: Support the version of this pattern with the select operands
14336 // swapped.
14337 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14338 if (CCVal != ISD::SETULT)
14339 return SDValue();
14340
14341 SDValue CondLHS = Cond.getOperand(0);
14342 SDValue CondRHS = Cond.getOperand(1);
14343
14344 if (CondLHS != True)
14345 return SDValue();
14346
14347 unsigned ScalarBits = VT.getScalarSizeInBits();
14348
14349 // FIXME: Support other constants.
14350 ConstantSDNode *CondRHSC = isConstOrConstSplat(CondRHS);
14351 if (!CondRHSC || CondRHSC->getAPIntValue() != (1ULL << ScalarBits))
14352 return SDValue();
14353
14354 if (False.getOpcode() != ISD::SIGN_EXTEND)
14355 return SDValue();
14356
14357 False = False.getOperand(0);
14358
14359 if (False.getOpcode() != ISD::SETCC || False.getOperand(0) != True)
14360 return SDValue();
14361
14362 ConstantSDNode *FalseRHSC = isConstOrConstSplat(False.getOperand(1));
14363 if (!FalseRHSC || !FalseRHSC->isZero())
14364 return SDValue();
14365
14366 ISD::CondCode CCVal2 = cast<CondCodeSDNode>(False.getOperand(2))->get();
14367 if (CCVal2 != ISD::SETGT)
14368 return SDValue();
14369
14370 // Emit the signed to unsigned saturation pattern.
14371 SDLoc DL(N);
14372 SDValue Max =
14373 DAG.getNode(ISD::SMAX, DL, SrcVT, True, DAG.getConstant(0, DL, SrcVT));
14374 SDValue Min =
14375 DAG.getNode(ISD::SMIN, DL, SrcVT, Max,
14376 DAG.getConstant((1ULL << ScalarBits) - 1, DL, SrcVT));
14377 return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
14378}
14379
14381 const RISCVSubtarget &Subtarget) {
14382 SDValue N0 = N->getOperand(0);
14383 EVT VT = N->getValueType(0);
14384
14385 // Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
14386 // extending X. This is safe since we only need the LSB after the shift and
14387 // shift amounts larger than 31 would produce poison. If we wait until
14388 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14389 // to use a BEXT instruction.
14390 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() && VT == MVT::i1 &&
14391 N0.getValueType() == MVT::i32 && N0.getOpcode() == ISD::SRL &&
14392 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14393 SDLoc DL(N0);
14394 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14395 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14396 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14397 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Srl);
14398 }
14399
14400 return combineTruncSelectToSMaxUSat(N, DAG);
14401}
14402
14403// Combines two comparison operation and logic operation to one selection
14404// operation(min, max) and logic operation. Returns new constructed Node if
14405// conditions for optimization are satisfied.
14408 const RISCVSubtarget &Subtarget) {
14409 SelectionDAG &DAG = DCI.DAG;
14410
14411 SDValue N0 = N->getOperand(0);
14412 // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
14413 // extending X. This is safe since we only need the LSB after the shift and
14414 // shift amounts larger than 31 would produce poison. If we wait until
14415 // type legalization, we'll create RISCVISD::SRLW and we can't recover it
14416 // to use a BEXT instruction.
14417 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14418 N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
14419 N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
14420 N0.hasOneUse()) {
14421 SDLoc DL(N);
14422 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14423 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14424 SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
14425 SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
14426 DAG.getConstant(1, DL, MVT::i64));
14427 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14428 }
14429
14430 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14431 return V;
14432 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14433 return V;
14434
14435 if (DCI.isAfterLegalizeDAG())
14436 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14437 return V;
14438
14439 // fold (and (select lhs, rhs, cc, -1, y), x) ->
14440 // (select lhs, rhs, cc, x, (and x, y))
14441 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true, Subtarget);
14442}
14443
14444// Try to pull an xor with 1 through a select idiom that uses czero_eqz/nez.
14445// FIXME: Generalize to other binary operators with same operand.
14447 SelectionDAG &DAG) {
14448 assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
14449
14450 if (N0.getOpcode() != RISCVISD::CZERO_EQZ ||
14452 !N0.hasOneUse() || !N1.hasOneUse())
14453 return SDValue();
14454
14455 // Should have the same condition.
14456 SDValue Cond = N0.getOperand(1);
14457 if (Cond != N1.getOperand(1))
14458 return SDValue();
14459
14460 SDValue TrueV = N0.getOperand(0);
14461 SDValue FalseV = N1.getOperand(0);
14462
14463 if (TrueV.getOpcode() != ISD::XOR || FalseV.getOpcode() != ISD::XOR ||
14464 TrueV.getOperand(1) != FalseV.getOperand(1) ||
14465 !isOneConstant(TrueV.getOperand(1)) ||
14466 !TrueV.hasOneUse() || !FalseV.hasOneUse())
14467 return SDValue();
14468
14469 EVT VT = N->getValueType(0);
14470 SDLoc DL(N);
14471
14472 SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
14473 Cond);
14474 SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
14475 Cond);
14476 SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
14477 return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
14478}
14479
14481 const RISCVSubtarget &Subtarget) {
14482 SelectionDAG &DAG = DCI.DAG;
14483
14484 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14485 return V;
14486 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14487 return V;
14488
14489 if (DCI.isAfterLegalizeDAG())
14490 if (SDValue V = combineDeMorganOfBoolean(N, DAG))
14491 return V;
14492
14493 // Look for Or of CZERO_EQZ/NEZ with same condition which is the select idiom.
14494 // We may be able to pull a common operation out of the true and false value.
14495 SDValue N0 = N->getOperand(0);
14496 SDValue N1 = N->getOperand(1);
14497 if (SDValue V = combineOrOfCZERO(N, N0, N1, DAG))
14498 return V;
14499 if (SDValue V = combineOrOfCZERO(N, N1, N0, DAG))
14500 return V;
14501
14502 // fold (or (select cond, 0, y), x) ->
14503 // (select cond, x, (or x, y))
14504 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14505}
14506
14508 const RISCVSubtarget &Subtarget) {
14509 SDValue N0 = N->getOperand(0);
14510 SDValue N1 = N->getOperand(1);
14511
14512 // Pre-promote (i32 (xor (shl -1, X), ~0)) on RV64 with Zbs so we can use
14513 // (ADDI (BSET X0, X), -1). If we wait until/ type legalization, we'll create
14514 // RISCVISD:::SLLW and we can't recover it to use a BSET instruction.
14515 if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
14516 N->getValueType(0) == MVT::i32 && isAllOnesConstant(N1) &&
14517 N0.getOpcode() == ISD::SHL && isAllOnesConstant(N0.getOperand(0)) &&
14518 !isa<ConstantSDNode>(N0.getOperand(1)) && N0.hasOneUse()) {
14519 SDLoc DL(N);
14520 SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
14521 SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
14522 SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
14523 SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
14524 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
14525 }
14526
14527 // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
14528 // NOTE: Assumes ROL being legal means ROLW is legal.
14529 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14530 if (N0.getOpcode() == RISCVISD::SLLW &&
14532 TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
14533 SDLoc DL(N);
14534 return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
14535 DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
14536 }
14537
14538 // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
14539 if (N0.getOpcode() == ISD::SETCC && isOneConstant(N1) && N0.hasOneUse()) {
14540 auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
14541 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
14542 if (ConstN00 && CC == ISD::SETLT) {
14543 EVT VT = N0.getValueType();
14544 SDLoc DL(N0);
14545 const APInt &Imm = ConstN00->getAPIntValue();
14546 if ((Imm + 1).isSignedIntN(12))
14547 return DAG.getSetCC(DL, VT, N0.getOperand(1),
14548 DAG.getConstant(Imm + 1, DL, VT), CC);
14549 }
14550 }
14551
14552 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
14553 return V;
14554 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
14555 return V;
14556
14557 // fold (xor (select cond, 0, y), x) ->
14558 // (select cond, x, (xor x, y))
14559 return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
14560}
14561
14562// Try to expand a scalar multiply to a faster sequence.
14565 const RISCVSubtarget &Subtarget) {
14566
14567 EVT VT = N->getValueType(0);
14568
14569 // LI + MUL is usually smaller than the alternative sequence.
14571 return SDValue();
14572
14573 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14574 return SDValue();
14575
14576 if (VT != Subtarget.getXLenVT())
14577 return SDValue();
14578
14579 const bool HasShlAdd =
14580 Subtarget.hasStdExtZba() || Subtarget.hasVendorXTHeadBa();
14581
14582 ConstantSDNode *CNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
14583 if (!CNode)
14584 return SDValue();
14585 uint64_t MulAmt = CNode->getZExtValue();
14586
14587 // WARNING: The code below is knowingly incorrect with regards to undef semantics.
14588 // We're adding additional uses of X here, and in principle, we should be freezing
14589 // X before doing so. However, adding freeze here causes real regressions, and no
14590 // other target properly freezes X in these cases either.
14591 SDValue X = N->getOperand(0);
14592
14593 if (HasShlAdd) {
14594 for (uint64_t Divisor : {3, 5, 9}) {
14595 if (MulAmt % Divisor != 0)
14596 continue;
14597 uint64_t MulAmt2 = MulAmt / Divisor;
14598 // 3/5/9 * 2^N -> shl (shXadd X, X), N
14599 if (isPowerOf2_64(MulAmt2)) {
14600 SDLoc DL(N);
14601 SDValue X = N->getOperand(0);
14602 // Put the shift first if we can fold a zext into the
14603 // shift forming a slli.uw.
14604 if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
14605 X.getConstantOperandVal(1) == UINT64_C(0xffffffff)) {
14606 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, X,
14607 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14608 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Shl,
14609 DAG.getConstant(Log2_64(Divisor - 1), DL, VT),
14610 Shl);
14611 }
14612 // Otherwise, put rhe shl second so that it can fold with following
14613 // instructions (e.g. sext or add).
14614 SDValue Mul359 =
14615 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14616 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14617 return DAG.getNode(ISD::SHL, DL, VT, Mul359,
14618 DAG.getConstant(Log2_64(MulAmt2), DL, VT));
14619 }
14620
14621 // 3/5/9 * 3/5/9 -> shXadd (shYadd X, X), (shYadd X, X)
14622 if (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9) {
14623 SDLoc DL(N);
14624 SDValue Mul359 =
14625 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14626 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14627 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14628 DAG.getConstant(Log2_64(MulAmt2 - 1), DL, VT),
14629 Mul359);
14630 }
14631 }
14632
14633 // If this is a power 2 + 2/4/8, we can use a shift followed by a single
14634 // shXadd. First check if this a sum of two power of 2s because that's
14635 // easy. Then count how many zeros are up to the first bit.
14636 if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
14637 unsigned ScaleShift = llvm::countr_zero(MulAmt);
14638 if (ScaleShift >= 1 && ScaleShift < 4) {
14639 unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
14640 SDLoc DL(N);
14641 SDValue Shift1 =
14642 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14643 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14644 DAG.getConstant(ScaleShift, DL, VT), Shift1);
14645 }
14646 }
14647
14648 // 2^(1,2,3) * 3,5,9 + 1 -> (shXadd (shYadd x, x), x)
14649 // This is the two instruction form, there are also three instruction
14650 // variants we could implement. e.g.
14651 // (2^(1,2,3) * 3,5,9 + 1) << C2
14652 // 2^(C1>3) * 3,5,9 +/- 1
14653 for (uint64_t Divisor : {3, 5, 9}) {
14654 uint64_t C = MulAmt - 1;
14655 if (C <= Divisor)
14656 continue;
14657 unsigned TZ = llvm::countr_zero(C);
14658 if ((C >> TZ) == Divisor && (TZ == 1 || TZ == 2 || TZ == 3)) {
14659 SDLoc DL(N);
14660 SDValue Mul359 =
14661 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14662 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14663 return DAG.getNode(RISCVISD::SHL_ADD, DL, VT, Mul359,
14664 DAG.getConstant(TZ, DL, VT), X);
14665 }
14666 }
14667
14668 // 2^n + 2/4/8 + 1 -> (add (shl X, C1), (shXadd X, X))
14669 if (MulAmt > 2 && isPowerOf2_64((MulAmt - 1) & (MulAmt - 2))) {
14670 unsigned ScaleShift = llvm::countr_zero(MulAmt - 1);
14671 if (ScaleShift >= 1 && ScaleShift < 4) {
14672 unsigned ShiftAmt = Log2_64(((MulAmt - 1) & (MulAmt - 2)));
14673 SDLoc DL(N);
14674 SDValue Shift1 =
14675 DAG.getNode(ISD::SHL, DL, VT, X, DAG.getConstant(ShiftAmt, DL, VT));
14676 return DAG.getNode(ISD::ADD, DL, VT, Shift1,
14677 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14678 DAG.getConstant(ScaleShift, DL, VT), X));
14679 }
14680 }
14681
14682 // 2^N - 3/5/9 --> (sub (shl X, C1), (shXadd X, x))
14683 for (uint64_t Offset : {3, 5, 9}) {
14684 if (isPowerOf2_64(MulAmt + Offset)) {
14685 SDLoc DL(N);
14686 SDValue Shift1 =
14687 DAG.getNode(ISD::SHL, DL, VT, X,
14688 DAG.getConstant(Log2_64(MulAmt + Offset), DL, VT));
14689 SDValue Mul359 =
14690 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14691 DAG.getConstant(Log2_64(Offset - 1), DL, VT), X);
14692 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Mul359);
14693 }
14694 }
14695 }
14696
14697 // 2^N - 2^M -> (sub (shl X, C1), (shl X, C2))
14698 uint64_t MulAmtLowBit = MulAmt & (-MulAmt);
14699 if (isPowerOf2_64(MulAmt + MulAmtLowBit)) {
14700 uint64_t ShiftAmt1 = MulAmt + MulAmtLowBit;
14701 SDLoc DL(N);
14702 SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14703 DAG.getConstant(Log2_64(ShiftAmt1), DL, VT));
14704 SDValue Shift2 =
14705 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
14706 DAG.getConstant(Log2_64(MulAmtLowBit), DL, VT));
14707 return DAG.getNode(ISD::SUB, DL, VT, Shift1, Shift2);
14708 }
14709
14710 if (HasShlAdd) {
14711 for (uint64_t Divisor : {3, 5, 9}) {
14712 if (MulAmt % Divisor != 0)
14713 continue;
14714 uint64_t MulAmt2 = MulAmt / Divisor;
14715 // 3/5/9 * 3/5/9 * 2^N - In particular, this covers multiples
14716 // of 25 which happen to be quite common.
14717 for (uint64_t Divisor2 : {3, 5, 9}) {
14718 if (MulAmt2 % Divisor2 != 0)
14719 continue;
14720 uint64_t MulAmt3 = MulAmt2 / Divisor2;
14721 if (isPowerOf2_64(MulAmt3)) {
14722 SDLoc DL(N);
14723 SDValue Mul359A =
14724 DAG.getNode(RISCVISD::SHL_ADD, DL, VT, X,
14725 DAG.getConstant(Log2_64(Divisor - 1), DL, VT), X);
14726 SDValue Mul359B = DAG.getNode(
14727 RISCVISD::SHL_ADD, DL, VT, Mul359A,
14728 DAG.getConstant(Log2_64(Divisor2 - 1), DL, VT), Mul359A);
14729 return DAG.getNode(ISD::SHL, DL, VT, Mul359B,
14730 DAG.getConstant(Log2_64(MulAmt3), DL, VT));
14731 }
14732 }
14733 }
14734 }
14735
14736 return SDValue();
14737}
14738
14739// Combine vXi32 (mul (and (lshr X, 15), 0x10001), 0xffff) ->
14740// (bitcast (sra (v2Xi16 (bitcast X)), 15))
14741// Same for other equivalent types with other equivalent constants.
14743 EVT VT = N->getValueType(0);
14744 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14745
14746 // Do this for legal vectors unless they are i1 or i8 vectors.
14747 if (!VT.isVector() || !TLI.isTypeLegal(VT) || VT.getScalarSizeInBits() < 16)
14748 return SDValue();
14749
14750 if (N->getOperand(0).getOpcode() != ISD::AND ||
14751 N->getOperand(0).getOperand(0).getOpcode() != ISD::SRL)
14752 return SDValue();
14753
14754 SDValue And = N->getOperand(0);
14755 SDValue Srl = And.getOperand(0);
14756
14757 APInt V1, V2, V3;
14758 if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), V1) ||
14759 !ISD::isConstantSplatVector(And.getOperand(1).getNode(), V2) ||
14761 return SDValue();
14762
14763 unsigned HalfSize = VT.getScalarSizeInBits() / 2;
14764 if (!V1.isMask(HalfSize) || V2 != (1ULL | 1ULL << HalfSize) ||
14765 V3 != (HalfSize - 1))
14766 return SDValue();
14767
14768 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
14769 EVT::getIntegerVT(*DAG.getContext(), HalfSize),
14770 VT.getVectorElementCount() * 2);
14771 SDLoc DL(N);
14772 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, HalfVT, Srl.getOperand(0));
14773 SDValue Sra = DAG.getNode(ISD::SRA, DL, HalfVT, Cast,
14774 DAG.getConstant(HalfSize - 1, DL, HalfVT));
14775 return DAG.getNode(ISD::BITCAST, DL, VT, Sra);
14776}
14777
14780 const RISCVSubtarget &Subtarget) {
14781 EVT VT = N->getValueType(0);
14782 if (!VT.isVector())
14783 return expandMul(N, DAG, DCI, Subtarget);
14784
14785 SDLoc DL(N);
14786 SDValue N0 = N->getOperand(0);
14787 SDValue N1 = N->getOperand(1);
14788 SDValue MulOper;
14789 unsigned AddSubOpc;
14790
14791 // vmadd: (mul (add x, 1), y) -> (add (mul x, y), y)
14792 // (mul x, add (y, 1)) -> (add x, (mul x, y))
14793 // vnmsub: (mul (sub 1, x), y) -> (sub y, (mul x, y))
14794 // (mul x, (sub 1, y)) -> (sub x, (mul x, y))
14795 auto IsAddSubWith1 = [&](SDValue V) -> bool {
14796 AddSubOpc = V->getOpcode();
14797 if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
14798 SDValue Opnd = V->getOperand(1);
14799 MulOper = V->getOperand(0);
14800 if (AddSubOpc == ISD::SUB)
14801 std::swap(Opnd, MulOper);
14802 if (isOneOrOneSplat(Opnd))
14803 return true;
14804 }
14805 return false;
14806 };
14807
14808 if (IsAddSubWith1(N0)) {
14809 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
14810 return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
14811 }
14812
14813 if (IsAddSubWith1(N1)) {
14814 SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
14815 return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
14816 }
14817
14818 if (SDValue V = combineBinOpOfZExt(N, DAG))
14819 return V;
14820
14822 return V;
14823
14824 return SDValue();
14825}
14826
14827/// According to the property that indexed load/store instructions zero-extend
14828/// their indices, try to narrow the type of index operand.
14829static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
14830 if (isIndexTypeSigned(IndexType))
14831 return false;
14832
14833 if (!N->hasOneUse())
14834 return false;
14835
14836 EVT VT = N.getValueType();
14837 SDLoc DL(N);
14838
14839 // In general, what we're doing here is seeing if we can sink a truncate to
14840 // a smaller element type into the expression tree building our index.
14841 // TODO: We can generalize this and handle a bunch more cases if useful.
14842
14843 // Narrow a buildvector to the narrowest element type. This requires less
14844 // work and less register pressure at high LMUL, and creates smaller constants
14845 // which may be cheaper to materialize.
14846 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
14847 KnownBits Known = DAG.computeKnownBits(N);
14848 unsigned ActiveBits = std::max(8u, Known.countMaxActiveBits());
14849 LLVMContext &C = *DAG.getContext();
14850 EVT ResultVT = EVT::getIntegerVT(C, ActiveBits).getRoundIntegerType(C);
14851 if (ResultVT.bitsLT(VT.getVectorElementType())) {
14852 N = DAG.getNode(ISD::TRUNCATE, DL,
14853 VT.changeVectorElementType(ResultVT), N);
14854 return true;
14855 }
14856 }
14857
14858 // Handle the pattern (shl (zext x to ty), C) and bits(x) + C < bits(ty).
14859 if (N.getOpcode() != ISD::SHL)
14860 return false;
14861
14862 SDValue N0 = N.getOperand(0);
14863 if (N0.getOpcode() != ISD::ZERO_EXTEND &&
14865 return false;
14866 if (!N0->hasOneUse())
14867 return false;
14868
14869 APInt ShAmt;
14870 SDValue N1 = N.getOperand(1);
14871 if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
14872 return false;
14873
14874 SDValue Src = N0.getOperand(0);
14875 EVT SrcVT = Src.getValueType();
14876 unsigned SrcElen = SrcVT.getScalarSizeInBits();
14877 unsigned ShAmtV = ShAmt.getZExtValue();
14878 unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
14879 NewElen = std::max(NewElen, 8U);
14880
14881 // Skip if NewElen is not narrower than the original extended type.
14882 if (NewElen >= N0.getValueType().getScalarSizeInBits())
14883 return false;
14884
14885 EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
14886 EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
14887
14888 SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
14889 SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
14890 N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
14891 return true;
14892}
14893
14894// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
14895// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
14896// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
14897// can become a sext.w instead of a shift pair.
14899 const RISCVSubtarget &Subtarget) {
14900 SDValue N0 = N->getOperand(0);
14901 SDValue N1 = N->getOperand(1);
14902 EVT VT = N->getValueType(0);
14903 EVT OpVT = N0.getValueType();
14904
14905 if (OpVT != MVT::i64 || !Subtarget.is64Bit())
14906 return SDValue();
14907
14908 // RHS needs to be a constant.
14909 auto *N1C = dyn_cast<ConstantSDNode>(N1);
14910 if (!N1C)
14911 return SDValue();
14912
14913 // LHS needs to be (and X, 0xffffffff).
14914 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
14915 !isa<ConstantSDNode>(N0.getOperand(1)) ||
14916 N0.getConstantOperandVal(1) != UINT64_C(0xffffffff))
14917 return SDValue();
14918
14919 // Looking for an equality compare.
14920 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
14921 if (!isIntEqualitySetCC(Cond))
14922 return SDValue();
14923
14924 // Don't do this if the sign bit is provably zero, it will be turned back into
14925 // an AND.
14926 APInt SignMask = APInt::getOneBitSet(64, 31);
14927 if (DAG.MaskedValueIsZero(N0.getOperand(0), SignMask))
14928 return SDValue();
14929
14930 const APInt &C1 = N1C->getAPIntValue();
14931
14932 SDLoc dl(N);
14933 // If the constant is larger than 2^32 - 1 it is impossible for both sides
14934 // to be equal.
14935 if (C1.getActiveBits() > 32)
14936 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
14937
14938 SDValue SExtOp = DAG.getNode(ISD::SIGN_EXTEND_INREG, N, OpVT,
14939 N0.getOperand(0), DAG.getValueType(MVT::i32));
14940 return DAG.getSetCC(dl, VT, SExtOp, DAG.getConstant(C1.trunc(32).sext(64),
14941 dl, OpVT), Cond);
14942}
14943
14944static SDValue
14946 const RISCVSubtarget &Subtarget) {
14947 SDValue Src = N->getOperand(0);
14948 EVT VT = N->getValueType(0);
14949 EVT SrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14950 unsigned Opc = Src.getOpcode();
14951
14952 // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
14953 // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
14954 if (Opc == RISCVISD::FMV_X_ANYEXTH && SrcVT.bitsGE(MVT::i16) &&
14955 Subtarget.hasStdExtZfhmin())
14956 return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
14957 Src.getOperand(0));
14958
14959 // Fold (sext_inreg (shl X, Y), i32) -> (sllw X, Y) iff Y u< 32
14960 if (Opc == ISD::SHL && Subtarget.is64Bit() && SrcVT == MVT::i32 &&
14961 VT == MVT::i64 && !isa<ConstantSDNode>(Src.getOperand(1)) &&
14962 DAG.computeKnownBits(Src.getOperand(1)).countMaxActiveBits() <= 5)
14963 return DAG.getNode(RISCVISD::SLLW, SDLoc(N), VT, Src.getOperand(0),
14964 Src.getOperand(1));
14965
14966 return SDValue();
14967}
14968
14969namespace {
14970// Forward declaration of the structure holding the necessary information to
14971// apply a combine.
14972struct CombineResult;
14973
14974enum ExtKind : uint8_t { ZExt = 1 << 0, SExt = 1 << 1, FPExt = 1 << 2 };
14975/// Helper class for folding sign/zero extensions.
14976/// In particular, this class is used for the following combines:
14977/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
14978/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
14979/// mul | mul_vl -> vwmul(u) | vwmul_su
14980/// shl | shl_vl -> vwsll
14981/// fadd -> vfwadd | vfwadd_w
14982/// fsub -> vfwsub | vfwsub_w
14983/// fmul -> vfwmul
14984/// An object of this class represents an operand of the operation we want to
14985/// combine.
14986/// E.g., when trying to combine `mul_vl a, b`, we will have one instance of
14987/// NodeExtensionHelper for `a` and one for `b`.
14988///
14989/// This class abstracts away how the extension is materialized and
14990/// how its number of users affect the combines.
14991///
14992/// In particular:
14993/// - VWADD_W is conceptually == add(op0, sext(op1))
14994/// - VWADDU_W == add(op0, zext(op1))
14995/// - VWSUB_W == sub(op0, sext(op1))
14996/// - VWSUBU_W == sub(op0, zext(op1))
14997/// - VFWADD_W == fadd(op0, fpext(op1))
14998/// - VFWSUB_W == fsub(op0, fpext(op1))
14999/// And VMV_V_X_VL, depending on the value, is conceptually equivalent to
15000/// zext|sext(smaller_value).
15001struct NodeExtensionHelper {
15002 /// Records if this operand is like being zero extended.
15003 bool SupportsZExt;
15004 /// Records if this operand is like being sign extended.
15005 /// Note: SupportsZExt and SupportsSExt are not mutually exclusive. For
15006 /// instance, a splat constant (e.g., 3), would support being both sign and
15007 /// zero extended.
15008 bool SupportsSExt;
15009 /// Records if this operand is like being floating-Point extended.
15010 bool SupportsFPExt;
15011 /// This boolean captures whether we care if this operand would still be
15012 /// around after the folding happens.
15013 bool EnforceOneUse;
15014 /// Original value that this NodeExtensionHelper represents.
15015 SDValue OrigOperand;
15016
15017 /// Get the value feeding the extension or the value itself.
15018 /// E.g., for zext(a), this would return a.
15019 SDValue getSource() const {
15020 switch (OrigOperand.getOpcode()) {
15021 case ISD::ZERO_EXTEND:
15022 case ISD::SIGN_EXTEND:
15023 case RISCVISD::VSEXT_VL:
15024 case RISCVISD::VZEXT_VL:
15026 return OrigOperand.getOperand(0);
15027 default:
15028 return OrigOperand;
15029 }
15030 }
15031
15032 /// Check if this instance represents a splat.
15033 bool isSplat() const {
15034 return OrigOperand.getOpcode() == RISCVISD::VMV_V_X_VL ||
15035 OrigOperand.getOpcode() == ISD::SPLAT_VECTOR;
15036 }
15037
15038 /// Get the extended opcode.
15039 unsigned getExtOpc(ExtKind SupportsExt) const {
15040 switch (SupportsExt) {
15041 case ExtKind::SExt:
15042 return RISCVISD::VSEXT_VL;
15043 case ExtKind::ZExt:
15044 return RISCVISD::VZEXT_VL;
15045 case ExtKind::FPExt:
15047 }
15048 llvm_unreachable("Unknown ExtKind enum");
15049 }
15050
15051 /// Get or create a value that can feed \p Root with the given extension \p
15052 /// SupportsExt. If \p SExt is std::nullopt, this returns the source of this
15053 /// operand. \see ::getSource().
15054 SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
15055 const RISCVSubtarget &Subtarget,
15056 std::optional<ExtKind> SupportsExt) const {
15057 if (!SupportsExt.has_value())
15058 return OrigOperand;
15059
15060 MVT NarrowVT = getNarrowType(Root, *SupportsExt);
15061
15062 SDValue Source = getSource();
15063 assert(Subtarget.getTargetLowering()->isTypeLegal(Source.getValueType()));
15064 if (Source.getValueType() == NarrowVT)
15065 return Source;
15066
15067 // vfmadd_vl -> vfwmadd_vl can take bf16 operands
15068 if (Source.getValueType().getVectorElementType() == MVT::bf16) {
15069 assert(Root->getSimpleValueType(0).getVectorElementType() == MVT::f32 &&
15070 Root->getOpcode() == RISCVISD::VFMADD_VL);
15071 return Source;
15072 }
15073
15074 unsigned ExtOpc = getExtOpc(*SupportsExt);
15075
15076 // If we need an extension, we should be changing the type.
15077 SDLoc DL(OrigOperand);
15078 auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
15079 switch (OrigOperand.getOpcode()) {
15080 case ISD::ZERO_EXTEND:
15081 case ISD::SIGN_EXTEND:
15082 case RISCVISD::VSEXT_VL:
15083 case RISCVISD::VZEXT_VL:
15085 return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
15086 case ISD::SPLAT_VECTOR:
15087 return DAG.getSplat(NarrowVT, DL, Source.getOperand(0));
15089 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
15090 DAG.getUNDEF(NarrowVT), Source.getOperand(1), VL);
15092 Source = Source.getOperand(1);
15093 assert(Source.getOpcode() == ISD::FP_EXTEND && "Unexpected source");
15094 Source = Source.getOperand(0);
15095 assert(Source.getValueType() == NarrowVT.getVectorElementType());
15096 return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, NarrowVT,
15097 DAG.getUNDEF(NarrowVT), Source, VL);
15098 default:
15099 // Other opcodes can only come from the original LHS of VW(ADD|SUB)_W_VL
15100 // and that operand should already have the right NarrowVT so no
15101 // extension should be required at this point.
15102 llvm_unreachable("Unsupported opcode");
15103 }
15104 }
15105
15106 /// Helper function to get the narrow type for \p Root.
15107 /// The narrow type is the type of \p Root where we divided the size of each
15108 /// element by 2. E.g., if Root's type <2xi16> -> narrow type <2xi8>.
15109 /// \pre Both the narrow type and the original type should be legal.
15110 static MVT getNarrowType(const SDNode *Root, ExtKind SupportsExt) {
15111 MVT VT = Root->getSimpleValueType(0);
15112
15113 // Determine the narrow size.
15114 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15115
15116 MVT EltVT = SupportsExt == ExtKind::FPExt
15117 ? MVT::getFloatingPointVT(NarrowSize)
15118 : MVT::getIntegerVT(NarrowSize);
15119
15120 assert((int)NarrowSize >= (SupportsExt == ExtKind::FPExt ? 16 : 8) &&
15121 "Trying to extend something we can't represent");
15122 MVT NarrowVT = MVT::getVectorVT(EltVT, VT.getVectorElementCount());
15123 return NarrowVT;
15124 }
15125
15126 /// Get the opcode to materialize:
15127 /// Opcode(sext(a), sext(b)) -> newOpcode(a, b)
15128 static unsigned getSExtOpcode(unsigned Opcode) {
15129 switch (Opcode) {
15130 case ISD::ADD:
15131 case RISCVISD::ADD_VL:
15134 case ISD::OR:
15135 return RISCVISD::VWADD_VL;
15136 case ISD::SUB:
15137 case RISCVISD::SUB_VL:
15140 return RISCVISD::VWSUB_VL;
15141 case ISD::MUL:
15142 case RISCVISD::MUL_VL:
15143 return RISCVISD::VWMUL_VL;
15144 default:
15145 llvm_unreachable("Unexpected opcode");
15146 }
15147 }
15148
15149 /// Get the opcode to materialize:
15150 /// Opcode(zext(a), zext(b)) -> newOpcode(a, b)
15151 static unsigned getZExtOpcode(unsigned Opcode) {
15152 switch (Opcode) {
15153 case ISD::ADD:
15154 case RISCVISD::ADD_VL:
15157 case ISD::OR:
15158 return RISCVISD::VWADDU_VL;
15159 case ISD::SUB:
15160 case RISCVISD::SUB_VL:
15163 return RISCVISD::VWSUBU_VL;
15164 case ISD::MUL:
15165 case RISCVISD::MUL_VL:
15166 return RISCVISD::VWMULU_VL;
15167 case ISD::SHL:
15168 case RISCVISD::SHL_VL:
15169 return RISCVISD::VWSLL_VL;
15170 default:
15171 llvm_unreachable("Unexpected opcode");
15172 }
15173 }
15174
15175 /// Get the opcode to materialize:
15176 /// Opcode(fpext(a), fpext(b)) -> newOpcode(a, b)
15177 static unsigned getFPExtOpcode(unsigned Opcode) {
15178 switch (Opcode) {
15179 case RISCVISD::FADD_VL:
15181 return RISCVISD::VFWADD_VL;
15182 case RISCVISD::FSUB_VL:
15184 return RISCVISD::VFWSUB_VL;
15185 case RISCVISD::FMUL_VL:
15186 return RISCVISD::VFWMUL_VL;
15188 return RISCVISD::VFWMADD_VL;
15190 return RISCVISD::VFWMSUB_VL;
15192 return RISCVISD::VFWNMADD_VL;
15194 return RISCVISD::VFWNMSUB_VL;
15195 default:
15196 llvm_unreachable("Unexpected opcode");
15197 }
15198 }
15199
15200 /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
15201 /// newOpcode(a, b).
15202 static unsigned getSUOpcode(unsigned Opcode) {
15203 assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
15204 "SU is only supported for MUL");
15205 return RISCVISD::VWMULSU_VL;
15206 }
15207
15208 /// Get the opcode to materialize
15209 /// \p Opcode(a, s|z|fpext(b)) -> newOpcode(a, b).
15210 static unsigned getWOpcode(unsigned Opcode, ExtKind SupportsExt) {
15211 switch (Opcode) {
15212 case ISD::ADD:
15213 case RISCVISD::ADD_VL:
15214 case ISD::OR:
15215 return SupportsExt == ExtKind::SExt ? RISCVISD::VWADD_W_VL
15217 case ISD::SUB:
15218 case RISCVISD::SUB_VL:
15219 return SupportsExt == ExtKind::SExt ? RISCVISD::VWSUB_W_VL
15221 case RISCVISD::FADD_VL:
15222 return RISCVISD::VFWADD_W_VL;
15223 case RISCVISD::FSUB_VL:
15224 return RISCVISD::VFWSUB_W_VL;
15225 default:
15226 llvm_unreachable("Unexpected opcode");
15227 }
15228 }
15229
15230 using CombineToTry = std::function<std::optional<CombineResult>(
15231 SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
15232 const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
15233 const RISCVSubtarget &)>;
15234
15235 /// Check if this node needs to be fully folded or extended for all users.
15236 bool needToPromoteOtherUsers() const { return EnforceOneUse; }
15237
15238 void fillUpExtensionSupportForSplat(SDNode *Root, SelectionDAG &DAG,
15239 const RISCVSubtarget &Subtarget) {
15240 unsigned Opc = OrigOperand.getOpcode();
15241 MVT VT = OrigOperand.getSimpleValueType();
15242
15243 assert((Opc == ISD::SPLAT_VECTOR || Opc == RISCVISD::VMV_V_X_VL) &&
15244 "Unexpected Opcode");
15245
15246 // The pasthru must be undef for tail agnostic.
15247 if (Opc == RISCVISD::VMV_V_X_VL && !OrigOperand.getOperand(0).isUndef())
15248 return;
15249
15250 // Get the scalar value.
15251 SDValue Op = Opc == ISD::SPLAT_VECTOR ? OrigOperand.getOperand(0)
15252 : OrigOperand.getOperand(1);
15253
15254 // See if we have enough sign bits or zero bits in the scalar to use a
15255 // widening opcode by splatting to smaller element size.
15256 unsigned EltBits = VT.getScalarSizeInBits();
15257 unsigned ScalarBits = Op.getValueSizeInBits();
15258 // If we're not getting all bits from the element, we need special handling.
15259 if (ScalarBits < EltBits) {
15260 // This should only occur on RV32.
15261 assert(Opc == RISCVISD::VMV_V_X_VL && EltBits == 64 && ScalarBits == 32 &&
15262 !Subtarget.is64Bit() && "Unexpected splat");
15263 // vmv.v.x sign extends narrow inputs.
15264 SupportsSExt = true;
15265
15266 // If the input is positive, then sign extend is also zero extend.
15267 if (DAG.SignBitIsZero(Op))
15268 SupportsZExt = true;
15269
15270 EnforceOneUse = false;
15271 return;
15272 }
15273
15274 unsigned NarrowSize = EltBits / 2;
15275 // If the narrow type cannot be expressed with a legal VMV,
15276 // this is not a valid candidate.
15277 if (NarrowSize < 8)
15278 return;
15279
15280 if (DAG.ComputeMaxSignificantBits(Op) <= NarrowSize)
15281 SupportsSExt = true;
15282
15283 if (DAG.MaskedValueIsZero(Op,
15284 APInt::getBitsSetFrom(ScalarBits, NarrowSize)))
15285 SupportsZExt = true;
15286
15287 EnforceOneUse = false;
15288 }
15289
15290 bool isSupportedFPExtend(SDNode *Root, MVT NarrowEltVT,
15291 const RISCVSubtarget &Subtarget) {
15292 // Any f16 extension will neeed zvfh
15293 if (NarrowEltVT == MVT::f16 && !Subtarget.hasVInstructionsF16())
15294 return false;
15295 // The only bf16 extension we can do is vfmadd_vl -> vfwmadd_vl with
15296 // zvfbfwma
15297 if (NarrowEltVT == MVT::bf16 && (!Subtarget.hasStdExtZvfbfwma() ||
15298 Root->getOpcode() != RISCVISD::VFMADD_VL))
15299 return false;
15300 return true;
15301 }
15302
15303 /// Helper method to set the various fields of this struct based on the
15304 /// type of \p Root.
15305 void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
15306 const RISCVSubtarget &Subtarget) {
15307 SupportsZExt = false;
15308 SupportsSExt = false;
15309 SupportsFPExt = false;
15310 EnforceOneUse = true;
15311 unsigned Opc = OrigOperand.getOpcode();
15312 // For the nodes we handle below, we end up using their inputs directly: see
15313 // getSource(). However since they either don't have a passthru or we check
15314 // that their passthru is undef, we can safely ignore their mask and VL.
15315 switch (Opc) {
15316 case ISD::ZERO_EXTEND:
15317 case ISD::SIGN_EXTEND: {
15318 MVT VT = OrigOperand.getSimpleValueType();
15319 if (!VT.isVector())
15320 break;
15321
15322 SDValue NarrowElt = OrigOperand.getOperand(0);
15323 MVT NarrowVT = NarrowElt.getSimpleValueType();
15324 // i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
15325 if (NarrowVT.getVectorElementType() == MVT::i1)
15326 break;
15327
15328 SupportsZExt = Opc == ISD::ZERO_EXTEND;
15329 SupportsSExt = Opc == ISD::SIGN_EXTEND;
15330 break;
15331 }
15332 case RISCVISD::VZEXT_VL:
15333 SupportsZExt = true;
15334 break;
15335 case RISCVISD::VSEXT_VL:
15336 SupportsSExt = true;
15337 break;
15339 MVT NarrowEltVT =
15341 if (!isSupportedFPExtend(Root, NarrowEltVT, Subtarget))
15342 break;
15343 SupportsFPExt = true;
15344 break;
15345 }
15346 case ISD::SPLAT_VECTOR:
15348 fillUpExtensionSupportForSplat(Root, DAG, Subtarget);
15349 break;
15350 case RISCVISD::VFMV_V_F_VL: {
15351 MVT VT = OrigOperand.getSimpleValueType();
15352
15353 if (!OrigOperand.getOperand(0).isUndef())
15354 break;
15355
15356 SDValue Op = OrigOperand.getOperand(1);
15357 if (Op.getOpcode() != ISD::FP_EXTEND)
15358 break;
15359
15360 if (!isSupportedFPExtend(Root, Op.getOperand(0).getSimpleValueType(),
15361 Subtarget))
15362 break;
15363
15364 unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
15365 unsigned ScalarBits = Op.getOperand(0).getValueSizeInBits();
15366 if (NarrowSize != ScalarBits)
15367 break;
15368
15369 SupportsFPExt = true;
15370 break;
15371 }
15372 default:
15373 break;
15374 }
15375 }
15376
15377 /// Check if \p Root supports any extension folding combines.
15378 static bool isSupportedRoot(const SDNode *Root,
15379 const RISCVSubtarget &Subtarget) {
15380 switch (Root->getOpcode()) {
15381 case ISD::ADD:
15382 case ISD::SUB:
15383 case ISD::MUL: {
15384 return Root->getValueType(0).isScalableVector();
15385 }
15386 case ISD::OR: {
15387 return Root->getValueType(0).isScalableVector() &&
15388 Root->getFlags().hasDisjoint();
15389 }
15390 // Vector Widening Integer Add/Sub/Mul Instructions
15391 case RISCVISD::ADD_VL:
15392 case RISCVISD::MUL_VL:
15395 case RISCVISD::SUB_VL:
15398 // Vector Widening Floating-Point Add/Sub/Mul Instructions
15399 case RISCVISD::FADD_VL:
15400 case RISCVISD::FSUB_VL:
15401 case RISCVISD::FMUL_VL:
15404 return true;
15405 case ISD::SHL:
15406 return Root->getValueType(0).isScalableVector() &&
15407 Subtarget.hasStdExtZvbb();
15408 case RISCVISD::SHL_VL:
15409 return Subtarget.hasStdExtZvbb();
15414 return true;
15415 default:
15416 return false;
15417 }
15418 }
15419
15420 /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
15421 NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
15422 const RISCVSubtarget &Subtarget) {
15423 assert(isSupportedRoot(Root, Subtarget) &&
15424 "Trying to build an helper with an "
15425 "unsupported root");
15426 assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
15428 OrigOperand = Root->getOperand(OperandIdx);
15429
15430 unsigned Opc = Root->getOpcode();
15431 switch (Opc) {
15432 // We consider
15433 // VW<ADD|SUB>_W(LHS, RHS) -> <ADD|SUB>(LHS, SEXT(RHS))
15434 // VW<ADD|SUB>U_W(LHS, RHS) -> <ADD|SUB>(LHS, ZEXT(RHS))
15435 // VFW<ADD|SUB>_W(LHS, RHS) -> F<ADD|SUB>(LHS, FPEXT(RHS))
15442 if (OperandIdx == 1) {
15443 SupportsZExt =
15445 SupportsSExt =
15447 SupportsFPExt =
15449 // There's no existing extension here, so we don't have to worry about
15450 // making sure it gets removed.
15451 EnforceOneUse = false;
15452 break;
15453 }
15454 [[fallthrough]];
15455 default:
15456 fillUpExtensionSupport(Root, DAG, Subtarget);
15457 break;
15458 }
15459 }
15460
15461 /// Helper function to get the Mask and VL from \p Root.
15462 static std::pair<SDValue, SDValue>
15463 getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
15464 const RISCVSubtarget &Subtarget) {
15465 assert(isSupportedRoot(Root, Subtarget) && "Unexpected root");
15466 switch (Root->getOpcode()) {
15467 case ISD::ADD:
15468 case ISD::SUB:
15469 case ISD::MUL:
15470 case ISD::OR:
15471 case ISD::SHL: {
15472 SDLoc DL(Root);
15473 MVT VT = Root->getSimpleValueType(0);
15474 return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
15475 }
15476 default:
15477 return std::make_pair(Root->getOperand(3), Root->getOperand(4));
15478 }
15479 }
15480
15481 /// Helper function to check if \p N is commutative with respect to the
15482 /// foldings that are supported by this class.
15483 static bool isCommutative(const SDNode *N) {
15484 switch (N->getOpcode()) {
15485 case ISD::ADD:
15486 case ISD::MUL:
15487 case ISD::OR:
15488 case RISCVISD::ADD_VL:
15489 case RISCVISD::MUL_VL:
15492 case RISCVISD::FADD_VL:
15493 case RISCVISD::FMUL_VL:
15499 return true;
15500 case ISD::SUB:
15501 case RISCVISD::SUB_VL:
15504 case RISCVISD::FSUB_VL:
15506 case ISD::SHL:
15507 case RISCVISD::SHL_VL:
15508 return false;
15509 default:
15510 llvm_unreachable("Unexpected opcode");
15511 }
15512 }
15513
15514 /// Get a list of combine to try for folding extensions in \p Root.
15515 /// Note that each returned CombineToTry function doesn't actually modify
15516 /// anything. Instead they produce an optional CombineResult that if not None,
15517 /// need to be materialized for the combine to be applied.
15518 /// \see CombineResult::materialize.
15519 /// If the related CombineToTry function returns std::nullopt, that means the
15520 /// combine didn't match.
15521 static SmallVector<CombineToTry> getSupportedFoldings(const SDNode *Root);
15522};
15523
15524/// Helper structure that holds all the necessary information to materialize a
15525/// combine that does some extension folding.
15526struct CombineResult {
15527 /// Opcode to be generated when materializing the combine.
15528 unsigned TargetOpcode;
15529 // No value means no extension is needed.
15530 std::optional<ExtKind> LHSExt;
15531 std::optional<ExtKind> RHSExt;
15532 /// Root of the combine.
15533 SDNode *Root;
15534 /// LHS of the TargetOpcode.
15535 NodeExtensionHelper LHS;
15536 /// RHS of the TargetOpcode.
15537 NodeExtensionHelper RHS;
15538
15539 CombineResult(unsigned TargetOpcode, SDNode *Root,
15540 const NodeExtensionHelper &LHS, std::optional<ExtKind> LHSExt,
15541 const NodeExtensionHelper &RHS, std::optional<ExtKind> RHSExt)
15542 : TargetOpcode(TargetOpcode), LHSExt(LHSExt), RHSExt(RHSExt), Root(Root),
15543 LHS(LHS), RHS(RHS) {}
15544
15545 /// Return a value that uses TargetOpcode and that can be used to replace
15546 /// Root.
15547 /// The actual replacement is *not* done in that method.
15548 SDValue materialize(SelectionDAG &DAG,
15549 const RISCVSubtarget &Subtarget) const {
15550 SDValue Mask, VL, Passthru;
15551 std::tie(Mask, VL) =
15552 NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
15553 switch (Root->getOpcode()) {
15554 default:
15555 Passthru = Root->getOperand(2);
15556 break;
15557 case ISD::ADD:
15558 case ISD::SUB:
15559 case ISD::MUL:
15560 case ISD::OR:
15561 case ISD::SHL:
15562 Passthru = DAG.getUNDEF(Root->getValueType(0));
15563 break;
15564 }
15565 return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
15566 LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, LHSExt),
15567 RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, RHSExt),
15568 Passthru, Mask, VL);
15569 }
15570};
15571
15572/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15573/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15574/// are zext) and LHS and RHS can be folded into Root.
15575/// AllowExtMask define which form `ext` can take in this pattern.
15576///
15577/// \note If the pattern can match with both zext and sext, the returned
15578/// CombineResult will feature the zext result.
15579///
15580/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15581/// can be used to apply the pattern.
15582static std::optional<CombineResult>
15583canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
15584 const NodeExtensionHelper &RHS,
15585 uint8_t AllowExtMask, SelectionDAG &DAG,
15586 const RISCVSubtarget &Subtarget) {
15587 if ((AllowExtMask & ExtKind::ZExt) && LHS.SupportsZExt && RHS.SupportsZExt)
15588 return CombineResult(NodeExtensionHelper::getZExtOpcode(Root->getOpcode()),
15589 Root, LHS, /*LHSExt=*/{ExtKind::ZExt}, RHS,
15590 /*RHSExt=*/{ExtKind::ZExt});
15591 if ((AllowExtMask & ExtKind::SExt) && LHS.SupportsSExt && RHS.SupportsSExt)
15592 return CombineResult(NodeExtensionHelper::getSExtOpcode(Root->getOpcode()),
15593 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15594 /*RHSExt=*/{ExtKind::SExt});
15595 if ((AllowExtMask & ExtKind::FPExt) && LHS.SupportsFPExt && RHS.SupportsFPExt)
15596 return CombineResult(NodeExtensionHelper::getFPExtOpcode(Root->getOpcode()),
15597 Root, LHS, /*LHSExt=*/{ExtKind::FPExt}, RHS,
15598 /*RHSExt=*/{ExtKind::FPExt});
15599 return std::nullopt;
15600}
15601
15602/// Check if \p Root follows a pattern Root(ext(LHS), ext(RHS))
15603/// where `ext` is the same for both LHS and RHS (i.e., both are sext or both
15604/// are zext) and LHS and RHS can be folded into Root.
15605///
15606/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15607/// can be used to apply the pattern.
15608static std::optional<CombineResult>
15609canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
15610 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15611 const RISCVSubtarget &Subtarget) {
15612 return canFoldToVWWithSameExtensionImpl(
15613 Root, LHS, RHS, ExtKind::ZExt | ExtKind::SExt | ExtKind::FPExt, DAG,
15614 Subtarget);
15615}
15616
15617/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
15618///
15619/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15620/// can be used to apply the pattern.
15621static std::optional<CombineResult>
15622canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
15623 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15624 const RISCVSubtarget &Subtarget) {
15625 if (RHS.SupportsFPExt)
15626 return CombineResult(
15627 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::FPExt),
15628 Root, LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::FPExt});
15629
15630 // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
15631 // sext/zext?
15632 // Control this behavior behind an option (AllowSplatInVW_W) for testing
15633 // purposes.
15634 if (RHS.SupportsZExt && (!RHS.isSplat() || AllowSplatInVW_W))
15635 return CombineResult(
15636 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::ZExt), Root,
15637 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::ZExt});
15638 if (RHS.SupportsSExt && (!RHS.isSplat() || AllowSplatInVW_W))
15639 return CombineResult(
15640 NodeExtensionHelper::getWOpcode(Root->getOpcode(), ExtKind::SExt), Root,
15641 LHS, /*LHSExt=*/std::nullopt, RHS, /*RHSExt=*/{ExtKind::SExt});
15642 return std::nullopt;
15643}
15644
15645/// Check if \p Root follows a pattern Root(sext(LHS), sext(RHS))
15646///
15647/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15648/// can be used to apply the pattern.
15649static std::optional<CombineResult>
15650canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15651 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15652 const RISCVSubtarget &Subtarget) {
15653 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::SExt, DAG,
15654 Subtarget);
15655}
15656
15657/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
15658///
15659/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15660/// can be used to apply the pattern.
15661static std::optional<CombineResult>
15662canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15663 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15664 const RISCVSubtarget &Subtarget) {
15665 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::ZExt, DAG,
15666 Subtarget);
15667}
15668
15669/// Check if \p Root follows a pattern Root(fpext(LHS), fpext(RHS))
15670///
15671/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15672/// can be used to apply the pattern.
15673static std::optional<CombineResult>
15674canFoldToVWWithFPEXT(SDNode *Root, const NodeExtensionHelper &LHS,
15675 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15676 const RISCVSubtarget &Subtarget) {
15677 return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, ExtKind::FPExt, DAG,
15678 Subtarget);
15679}
15680
15681/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
15682///
15683/// \returns std::nullopt if the pattern doesn't match or a CombineResult that
15684/// can be used to apply the pattern.
15685static std::optional<CombineResult>
15686canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
15687 const NodeExtensionHelper &RHS, SelectionDAG &DAG,
15688 const RISCVSubtarget &Subtarget) {
15689
15690 if (!LHS.SupportsSExt || !RHS.SupportsZExt)
15691 return std::nullopt;
15692 return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
15693 Root, LHS, /*LHSExt=*/{ExtKind::SExt}, RHS,
15694 /*RHSExt=*/{ExtKind::ZExt});
15695}
15696
15698NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
15699 SmallVector<CombineToTry> Strategies;
15700 switch (Root->getOpcode()) {
15701 case ISD::ADD:
15702 case ISD::SUB:
15703 case ISD::OR:
15704 case RISCVISD::ADD_VL:
15705 case RISCVISD::SUB_VL:
15706 case RISCVISD::FADD_VL:
15707 case RISCVISD::FSUB_VL:
15708 // add|sub|fadd|fsub-> vwadd(u)|vwsub(u)|vfwadd|vfwsub
15709 Strategies.push_back(canFoldToVWWithSameExtension);
15710 // add|sub|fadd|fsub -> vwadd(u)_w|vwsub(u)_w}|vfwadd_w|vfwsub_w
15711 Strategies.push_back(canFoldToVW_W);
15712 break;
15713 case RISCVISD::FMUL_VL:
15718 Strategies.push_back(canFoldToVWWithSameExtension);
15719 break;
15720 case ISD::MUL:
15721 case RISCVISD::MUL_VL:
15722 // mul -> vwmul(u)
15723 Strategies.push_back(canFoldToVWWithSameExtension);
15724 // mul -> vwmulsu
15725 Strategies.push_back(canFoldToVW_SU);
15726 break;
15727 case ISD::SHL:
15728 case RISCVISD::SHL_VL:
15729 // shl -> vwsll
15730 Strategies.push_back(canFoldToVWWithZEXT);
15731 break;
15734 // vwadd_w|vwsub_w -> vwadd|vwsub
15735 Strategies.push_back(canFoldToVWWithSEXT);
15736 break;
15739 // vwaddu_w|vwsubu_w -> vwaddu|vwsubu
15740 Strategies.push_back(canFoldToVWWithZEXT);
15741 break;
15744 // vfwadd_w|vfwsub_w -> vfwadd|vfwsub
15745 Strategies.push_back(canFoldToVWWithFPEXT);
15746 break;
15747 default:
15748 llvm_unreachable("Unexpected opcode");
15749 }
15750 return Strategies;
15751}
15752} // End anonymous namespace.
15753
15754/// Combine a binary or FMA operation to its equivalent VW or VW_W form.
15755/// The supported combines are:
15756/// add | add_vl | or disjoint -> vwadd(u) | vwadd(u)_w
15757/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
15758/// mul | mul_vl -> vwmul(u) | vwmul_su
15759/// shl | shl_vl -> vwsll
15760/// fadd_vl -> vfwadd | vfwadd_w
15761/// fsub_vl -> vfwsub | vfwsub_w
15762/// fmul_vl -> vfwmul
15763/// vwadd_w(u) -> vwadd(u)
15764/// vwsub_w(u) -> vwsub(u)
15765/// vfwadd_w -> vfwadd
15766/// vfwsub_w -> vfwsub
15769 const RISCVSubtarget &Subtarget) {
15770 SelectionDAG &DAG = DCI.DAG;
15771 if (DCI.isBeforeLegalize())
15772 return SDValue();
15773
15774 if (!NodeExtensionHelper::isSupportedRoot(N, Subtarget))
15775 return SDValue();
15776
15777 SmallVector<SDNode *> Worklist;
15778 SmallSet<SDNode *, 8> Inserted;
15779 Worklist.push_back(N);
15780 Inserted.insert(N);
15781 SmallVector<CombineResult> CombinesToApply;
15782
15783 while (!Worklist.empty()) {
15784 SDNode *Root = Worklist.pop_back_val();
15785
15786 NodeExtensionHelper LHS(Root, 0, DAG, Subtarget);
15787 NodeExtensionHelper RHS(Root, 1, DAG, Subtarget);
15788 auto AppendUsersIfNeeded = [&Worklist, &Subtarget,
15789 &Inserted](const NodeExtensionHelper &Op) {
15790 if (Op.needToPromoteOtherUsers()) {
15791 for (SDUse &Use : Op.OrigOperand->uses()) {
15792 SDNode *TheUser = Use.getUser();
15793 if (!NodeExtensionHelper::isSupportedRoot(TheUser, Subtarget))
15794 return false;
15795 // We only support the first 2 operands of FMA.
15796 if (Use.getOperandNo() >= 2)
15797 return false;
15798 if (Inserted.insert(TheUser).second)
15799 Worklist.push_back(TheUser);
15800 }
15801 }
15802 return true;
15803 };
15804
15805 // Control the compile time by limiting the number of node we look at in
15806 // total.
15807 if (Inserted.size() > ExtensionMaxWebSize)
15808 return SDValue();
15809
15811 NodeExtensionHelper::getSupportedFoldings(Root);
15812
15813 assert(!FoldingStrategies.empty() && "Nothing to be folded");
15814 bool Matched = false;
15815 for (int Attempt = 0;
15816 (Attempt != 1 + NodeExtensionHelper::isCommutative(Root)) && !Matched;
15817 ++Attempt) {
15818
15819 for (NodeExtensionHelper::CombineToTry FoldingStrategy :
15820 FoldingStrategies) {
15821 std::optional<CombineResult> Res =
15822 FoldingStrategy(Root, LHS, RHS, DAG, Subtarget);
15823 if (Res) {
15824 Matched = true;
15825 CombinesToApply.push_back(*Res);
15826 // All the inputs that are extended need to be folded, otherwise
15827 // we would be leaving the old input (since it is may still be used),
15828 // and the new one.
15829 if (Res->LHSExt.has_value())
15830 if (!AppendUsersIfNeeded(LHS))
15831 return SDValue();
15832 if (Res->RHSExt.has_value())
15833 if (!AppendUsersIfNeeded(RHS))
15834 return SDValue();
15835 break;
15836 }
15837 }
15838 std::swap(LHS, RHS);
15839 }
15840 // Right now we do an all or nothing approach.
15841 if (!Matched)
15842 return SDValue();
15843 }
15844 // Store the value for the replacement of the input node separately.
15845 SDValue InputRootReplacement;
15846 // We do the RAUW after we materialize all the combines, because some replaced
15847 // nodes may be feeding some of the yet-to-be-replaced nodes. Put differently,
15848 // some of these nodes may appear in the NodeExtensionHelpers of some of the
15849 // yet-to-be-visited CombinesToApply roots.
15851 ValuesToReplace.reserve(CombinesToApply.size());
15852 for (CombineResult Res : CombinesToApply) {
15853 SDValue NewValue = Res.materialize(DAG, Subtarget);
15854 if (!InputRootReplacement) {
15855 assert(Res.Root == N &&
15856 "First element is expected to be the current node");
15857 InputRootReplacement = NewValue;
15858 } else {
15859 ValuesToReplace.emplace_back(SDValue(Res.Root, 0), NewValue);
15860 }
15861 }
15862 for (std::pair<SDValue, SDValue> OldNewValues : ValuesToReplace) {
15863 DAG.ReplaceAllUsesOfValueWith(OldNewValues.first, OldNewValues.second);
15864 DCI.AddToWorklist(OldNewValues.second.getNode());
15865 }
15866 return InputRootReplacement;
15867}
15868
15869// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
15870// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
15871// y will be the Passthru and cond will be the Mask.
15873 unsigned Opc = N->getOpcode();
15876
15877 SDValue Y = N->getOperand(0);
15878 SDValue MergeOp = N->getOperand(1);
15879 unsigned MergeOpc = MergeOp.getOpcode();
15880
15881 if (MergeOpc != RISCVISD::VMERGE_VL && MergeOpc != ISD::VSELECT)
15882 return SDValue();
15883
15884 SDValue X = MergeOp->getOperand(1);
15885
15886 if (!MergeOp.hasOneUse())
15887 return SDValue();
15888
15889 // Passthru should be undef
15890 SDValue Passthru = N->getOperand(2);
15891 if (!Passthru.isUndef())
15892 return SDValue();
15893
15894 // Mask should be all ones
15895 SDValue Mask = N->getOperand(3);
15896 if (Mask.getOpcode() != RISCVISD::VMSET_VL)
15897 return SDValue();
15898
15899 // False value of MergeOp should be all zeros
15900 SDValue Z = MergeOp->getOperand(2);
15901
15902 if (Z.getOpcode() == ISD::INSERT_SUBVECTOR &&
15903 (isNullOrNullSplat(Z.getOperand(0)) || Z.getOperand(0).isUndef()))
15904 Z = Z.getOperand(1);
15905
15906 if (!ISD::isConstantSplatVectorAllZeros(Z.getNode()))
15907 return SDValue();
15908
15909 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0),
15910 {Y, X, Y, MergeOp->getOperand(0), N->getOperand(4)},
15911 N->getFlags());
15912}
15913
15916 const RISCVSubtarget &Subtarget) {
15917 [[maybe_unused]] unsigned Opc = N->getOpcode();
15920
15921 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
15922 return V;
15923
15924 return combineVWADDSUBWSelect(N, DCI.DAG);
15925}
15926
15927// Helper function for performMemPairCombine.
15928// Try to combine the memory loads/stores LSNode1 and LSNode2
15929// into a single memory pair operation.
15931 LSBaseSDNode *LSNode2, SDValue BasePtr,
15932 uint64_t Imm) {
15934 SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
15935
15936 if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
15937 SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
15938 return SDValue();
15939
15941 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15942
15943 // The new operation has twice the width.
15944 MVT XLenVT = Subtarget.getXLenVT();
15945 EVT MemVT = LSNode1->getMemoryVT();
15946 EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
15947 MachineMemOperand *MMO = LSNode1->getMemOperand();
15949 MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
15950
15951 if (LSNode1->getOpcode() == ISD::LOAD) {
15952 auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
15953 unsigned Opcode;
15954 if (MemVT == MVT::i32)
15955 Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
15956 else
15957 Opcode = RISCVISD::TH_LDD;
15958
15959 SDValue Res = DAG.getMemIntrinsicNode(
15960 Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
15961 {LSNode1->getChain(), BasePtr,
15962 DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15963 NewMemVT, NewMMO);
15964
15965 SDValue Node1 =
15966 DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
15967 SDValue Node2 =
15968 DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
15969
15970 DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
15971 return Node1;
15972 } else {
15973 unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
15974
15975 SDValue Res = DAG.getMemIntrinsicNode(
15976 Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
15977 {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
15978 BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
15979 NewMemVT, NewMMO);
15980
15981 DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
15982 return Res;
15983 }
15984}
15985
15986// Try to combine two adjacent loads/stores to a single pair instruction from
15987// the XTHeadMemPair vendor extension.
15990 SelectionDAG &DAG = DCI.DAG;
15992 const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
15993
15994 // Target does not support load/store pair.
15995 if (!Subtarget.hasVendorXTHeadMemPair())
15996 return SDValue();
15997
15998 LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
15999 EVT MemVT = LSNode1->getMemoryVT();
16000 unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
16001
16002 // No volatile, indexed or atomic loads/stores.
16003 if (!LSNode1->isSimple() || LSNode1->isIndexed())
16004 return SDValue();
16005
16006 // Function to get a base + constant representation from a memory value.
16007 auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
16008 if (Ptr->getOpcode() == ISD::ADD)
16009 if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
16010 return {Ptr->getOperand(0), C1->getZExtValue()};
16011 return {Ptr, 0};
16012 };
16013
16014 auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
16015
16016 SDValue Chain = N->getOperand(0);
16017 for (SDUse &Use : Chain->uses()) {
16018 if (Use.getUser() != N && Use.getResNo() == 0 &&
16019 Use.getUser()->getOpcode() == N->getOpcode()) {
16020 LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
16021
16022 // No volatile, indexed or atomic loads/stores.
16023 if (!LSNode2->isSimple() || LSNode2->isIndexed())
16024 continue;
16025
16026 // Check if LSNode1 and LSNode2 have the same type and extension.
16027 if (LSNode1->getOpcode() == ISD::LOAD)
16028 if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
16029 cast<LoadSDNode>(LSNode1)->getExtensionType())
16030 continue;
16031
16032 if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
16033 continue;
16034
16035 auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
16036
16037 // Check if the base pointer is the same for both instruction.
16038 if (Base1 != Base2)
16039 continue;
16040
16041 // Check if the offsets match the XTHeadMemPair encoding contraints.
16042 bool Valid = false;
16043 if (MemVT == MVT::i32) {
16044 // Check for adjacent i32 values and a 2-bit index.
16045 if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
16046 Valid = true;
16047 } else if (MemVT == MVT::i64) {
16048 // Check for adjacent i64 values and a 2-bit index.
16049 if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
16050 Valid = true;
16051 }
16052
16053 if (!Valid)
16054 continue;
16055
16056 // Try to combine.
16057 if (SDValue Res =
16058 tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
16059 return Res;
16060 }
16061 }
16062
16063 return SDValue();
16064}
16065
16066// Fold
16067// (fp_to_int (froundeven X)) -> fcvt X, rne
16068// (fp_to_int (ftrunc X)) -> fcvt X, rtz
16069// (fp_to_int (ffloor X)) -> fcvt X, rdn
16070// (fp_to_int (fceil X)) -> fcvt X, rup
16071// (fp_to_int (fround X)) -> fcvt X, rmm
16072// (fp_to_int (frint X)) -> fcvt X
16075 const RISCVSubtarget &Subtarget) {
16076 SelectionDAG &DAG = DCI.DAG;
16077 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16078 MVT XLenVT = Subtarget.getXLenVT();
16079
16080 SDValue Src = N->getOperand(0);
16081
16082 // Don't do this for strict-fp Src.
16083 if (Src->isStrictFPOpcode())
16084 return SDValue();
16085
16086 // Ensure the FP type is legal.
16087 if (!TLI.isTypeLegal(Src.getValueType()))
16088 return SDValue();
16089
16090 // Don't do this for f16 with Zfhmin and not Zfh.
16091 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16092 return SDValue();
16093
16094 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16095 // If the result is invalid, we didn't find a foldable instruction.
16096 if (FRM == RISCVFPRndMode::Invalid)
16097 return SDValue();
16098
16099 SDLoc DL(N);
16100 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
16101 EVT VT = N->getValueType(0);
16102
16103 if (VT.isVector() && TLI.isTypeLegal(VT)) {
16104 MVT SrcVT = Src.getSimpleValueType();
16105 MVT SrcContainerVT = SrcVT;
16106 MVT ContainerVT = VT.getSimpleVT();
16107 SDValue XVal = Src.getOperand(0);
16108
16109 // For widening and narrowing conversions we just combine it into a
16110 // VFCVT_..._VL node, as there are no specific VFWCVT/VFNCVT VL nodes. They
16111 // end up getting lowered to their appropriate pseudo instructions based on
16112 // their operand types
16113 if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits() * 2 ||
16114 VT.getScalarSizeInBits() * 2 < SrcVT.getScalarSizeInBits())
16115 return SDValue();
16116
16117 // Make fixed-length vectors scalable first
16118 if (SrcVT.isFixedLengthVector()) {
16119 SrcContainerVT = getContainerForFixedLengthVector(DAG, SrcVT, Subtarget);
16120 XVal = convertToScalableVector(SrcContainerVT, XVal, DAG, Subtarget);
16121 ContainerVT =
16122 getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
16123 }
16124
16125 auto [Mask, VL] =
16126 getDefaultVLOps(SrcVT, SrcContainerVT, DL, DAG, Subtarget);
16127
16128 SDValue FpToInt;
16129 if (FRM == RISCVFPRndMode::RTZ) {
16130 // Use the dedicated trunc static rounding mode if we're truncating so we
16131 // don't need to generate calls to fsrmi/fsrm
16132 unsigned Opc =
16134 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask, VL);
16135 } else {
16136 unsigned Opc =
16138 FpToInt = DAG.getNode(Opc, DL, ContainerVT, XVal, Mask,
16139 DAG.getTargetConstant(FRM, DL, XLenVT), VL);
16140 }
16141
16142 // If converted from fixed-length to scalable, convert back
16143 if (VT.isFixedLengthVector())
16144 FpToInt = convertFromScalableVector(VT, FpToInt, DAG, Subtarget);
16145
16146 return FpToInt;
16147 }
16148
16149 // Only handle XLen or i32 types. Other types narrower than XLen will
16150 // eventually be legalized to XLenVT.
16151 if (VT != MVT::i32 && VT != XLenVT)
16152 return SDValue();
16153
16154 unsigned Opc;
16155 if (VT == XLenVT)
16156 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16157 else
16159
16160 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
16161 DAG.getTargetConstant(FRM, DL, XLenVT));
16162 return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
16163}
16164
16165// Fold
16166// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
16167// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
16168// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
16169// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
16170// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
16171// (fp_to_int_sat (frint X)) -> (select X == nan, 0, (fcvt X, dyn))
16174 const RISCVSubtarget &Subtarget) {
16175 SelectionDAG &DAG = DCI.DAG;
16176 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16177 MVT XLenVT = Subtarget.getXLenVT();
16178
16179 // Only handle XLen types. Other types narrower than XLen will eventually be
16180 // legalized to XLenVT.
16181 EVT DstVT = N->getValueType(0);
16182 if (DstVT != XLenVT)
16183 return SDValue();
16184
16185 SDValue Src = N->getOperand(0);
16186
16187 // Don't do this for strict-fp Src.
16188 if (Src->isStrictFPOpcode())
16189 return SDValue();
16190
16191 // Ensure the FP type is also legal.
16192 if (!TLI.isTypeLegal(Src.getValueType()))
16193 return SDValue();
16194
16195 // Don't do this for f16 with Zfhmin and not Zfh.
16196 if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
16197 return SDValue();
16198
16199 EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16200
16201 RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
16202 if (FRM == RISCVFPRndMode::Invalid)
16203 return SDValue();
16204
16205 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
16206
16207 unsigned Opc;
16208 if (SatVT == DstVT)
16209 Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
16210 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
16212 else
16213 return SDValue();
16214 // FIXME: Support other SatVTs by clamping before or after the conversion.
16215
16216 Src = Src.getOperand(0);
16217
16218 SDLoc DL(N);
16219 SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
16220 DAG.getTargetConstant(FRM, DL, XLenVT));
16221
16222 // fcvt.wu.* sign extends bit 31 on RV64. FP_TO_UINT_SAT expects to zero
16223 // extend.
16224 if (Opc == RISCVISD::FCVT_WU_RV64)
16225 FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
16226
16227 // RISC-V FP-to-int conversions saturate to the destination register size, but
16228 // don't produce 0 for nan.
16229 SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
16230 return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
16231}
16232
16233// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
16234// smaller than XLenVT.
16236 const RISCVSubtarget &Subtarget) {
16237 assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
16238
16239 SDValue Src = N->getOperand(0);
16240 if (Src.getOpcode() != ISD::BSWAP)
16241 return SDValue();
16242
16243 EVT VT = N->getValueType(0);
16244 if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
16245 !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
16246 return SDValue();
16247
16248 SDLoc DL(N);
16249 return DAG.getNode(RISCVISD::BREV8, DL, VT, Src.getOperand(0));
16250}
16251
16253 const RISCVSubtarget &Subtarget) {
16254 // Fold:
16255 // vp.reverse(vp.load(ADDR, MASK)) -> vp.strided.load(ADDR, -1, MASK)
16256
16257 // Check if its first operand is a vp.load.
16258 auto *VPLoad = dyn_cast<VPLoadSDNode>(N->getOperand(0));
16259 if (!VPLoad)
16260 return SDValue();
16261
16262 EVT LoadVT = VPLoad->getValueType(0);
16263 // We do not have a strided_load version for masks, and the evl of vp.reverse
16264 // and vp.load should always be the same.
16265 if (!LoadVT.getVectorElementType().isByteSized() ||
16266 N->getOperand(2) != VPLoad->getVectorLength() ||
16267 !N->getOperand(0).hasOneUse())
16268 return SDValue();
16269
16270 // Check if the mask of outer vp.reverse are all 1's.
16271 if (!isOneOrOneSplat(N->getOperand(1)))
16272 return SDValue();
16273
16274 SDValue LoadMask = VPLoad->getMask();
16275 // If Mask is all ones, then load is unmasked and can be reversed.
16276 if (!isOneOrOneSplat(LoadMask)) {
16277 // If the mask is not all ones, we can reverse the load if the mask was also
16278 // reversed by an unmasked vp.reverse with the same EVL.
16279 if (LoadMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16280 !isOneOrOneSplat(LoadMask.getOperand(1)) ||
16281 LoadMask.getOperand(2) != VPLoad->getVectorLength())
16282 return SDValue();
16283 LoadMask = LoadMask.getOperand(0);
16284 }
16285
16286 // Base = LoadAddr + (NumElem - 1) * ElemWidthByte
16287 SDLoc DL(N);
16288 MVT XLenVT = Subtarget.getXLenVT();
16289 SDValue NumElem = VPLoad->getVectorLength();
16290 uint64_t ElemWidthByte = VPLoad->getValueType(0).getScalarSizeInBits() / 8;
16291
16292 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16293 DAG.getConstant(1, DL, XLenVT));
16294 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16295 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16296 SDValue Base = DAG.getNode(ISD::ADD, DL, XLenVT, VPLoad->getBasePtr(), Temp2);
16297 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16298
16300 MachinePointerInfo PtrInfo(VPLoad->getAddressSpace());
16302 PtrInfo, VPLoad->getMemOperand()->getFlags(),
16303 LocationSize::beforeOrAfterPointer(), VPLoad->getAlign());
16304
16305 SDValue Ret = DAG.getStridedLoadVP(
16306 LoadVT, DL, VPLoad->getChain(), Base, Stride, LoadMask,
16307 VPLoad->getVectorLength(), MMO, VPLoad->isExpandingLoad());
16308
16309 DAG.ReplaceAllUsesOfValueWith(SDValue(VPLoad, 1), Ret.getValue(1));
16310
16311 return Ret;
16312}
16313
16315 const RISCVSubtarget &Subtarget) {
16316 // Fold:
16317 // vp.store(vp.reverse(VAL), ADDR, MASK) -> vp.strided.store(VAL, NEW_ADDR,
16318 // -1, MASK)
16319 auto *VPStore = cast<VPStoreSDNode>(N);
16320
16321 if (VPStore->getValue().getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE)
16322 return SDValue();
16323
16324 SDValue VPReverse = VPStore->getValue();
16325 EVT ReverseVT = VPReverse->getValueType(0);
16326
16327 // We do not have a strided_store version for masks, and the evl of vp.reverse
16328 // and vp.store should always be the same.
16329 if (!ReverseVT.getVectorElementType().isByteSized() ||
16330 VPStore->getVectorLength() != VPReverse.getOperand(2) ||
16331 !VPReverse.hasOneUse())
16332 return SDValue();
16333
16334 SDValue StoreMask = VPStore->getMask();
16335 // If Mask is all ones, then load is unmasked and can be reversed.
16336 if (!isOneOrOneSplat(StoreMask)) {
16337 // If the mask is not all ones, we can reverse the store if the mask was
16338 // also reversed by an unmasked vp.reverse with the same EVL.
16339 if (StoreMask.getOpcode() != ISD::EXPERIMENTAL_VP_REVERSE ||
16340 !isOneOrOneSplat(StoreMask.getOperand(1)) ||
16341 StoreMask.getOperand(2) != VPStore->getVectorLength())
16342 return SDValue();
16343 StoreMask = StoreMask.getOperand(0);
16344 }
16345
16346 // Base = StoreAddr + (NumElem - 1) * ElemWidthByte
16347 SDLoc DL(N);
16348 MVT XLenVT = Subtarget.getXLenVT();
16349 SDValue NumElem = VPStore->getVectorLength();
16350 uint64_t ElemWidthByte = VPReverse.getValueType().getScalarSizeInBits() / 8;
16351
16352 SDValue Temp1 = DAG.getNode(ISD::SUB, DL, XLenVT, NumElem,
16353 DAG.getConstant(1, DL, XLenVT));
16354 SDValue Temp2 = DAG.getNode(ISD::MUL, DL, XLenVT, Temp1,
16355 DAG.getConstant(ElemWidthByte, DL, XLenVT));
16356 SDValue Base =
16357 DAG.getNode(ISD::ADD, DL, XLenVT, VPStore->getBasePtr(), Temp2);
16358 SDValue Stride = DAG.getConstant(-ElemWidthByte, DL, XLenVT);
16359
16361 MachinePointerInfo PtrInfo(VPStore->getAddressSpace());
16363 PtrInfo, VPStore->getMemOperand()->getFlags(),
16364 LocationSize::beforeOrAfterPointer(), VPStore->getAlign());
16365
16366 return DAG.getStridedStoreVP(
16367 VPStore->getChain(), DL, VPReverse.getOperand(0), Base,
16368 VPStore->getOffset(), Stride, StoreMask, VPStore->getVectorLength(),
16369 VPStore->getMemoryVT(), MMO, VPStore->getAddressingMode(),
16370 VPStore->isTruncatingStore(), VPStore->isCompressingStore());
16371}
16372
16373// Convert from one FMA opcode to another based on whether we are negating the
16374// multiply result and/or the accumulator.
16375// NOTE: Only supports RVV operations with VL.
16376static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
16377 // Negating the multiply result changes ADD<->SUB and toggles 'N'.
16378 if (NegMul) {
16379 // clang-format off
16380 switch (Opcode) {
16381 default: llvm_unreachable("Unexpected opcode");
16382 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16383 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16384 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16385 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16390 }
16391 // clang-format on
16392 }
16393
16394 // Negating the accumulator changes ADD<->SUB.
16395 if (NegAcc) {
16396 // clang-format off
16397 switch (Opcode) {
16398 default: llvm_unreachable("Unexpected opcode");
16399 case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
16400 case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
16401 case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
16402 case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
16407 }
16408 // clang-format on
16409 }
16410
16411 return Opcode;
16412}
16413
16415 // Fold FNEG_VL into FMA opcodes.
16416 // The first operand of strict-fp is chain.
16417 bool IsStrict =
16418 DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode());
16419 unsigned Offset = IsStrict ? 1 : 0;
16420 SDValue A = N->getOperand(0 + Offset);
16421 SDValue B = N->getOperand(1 + Offset);
16422 SDValue C = N->getOperand(2 + Offset);
16423 SDValue Mask = N->getOperand(3 + Offset);
16424 SDValue VL = N->getOperand(4 + Offset);
16425
16426 auto invertIfNegative = [&Mask, &VL](SDValue &V) {
16427 if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
16428 V.getOperand(2) == VL) {
16429 // Return the negated input.
16430 V = V.getOperand(0);
16431 return true;
16432 }
16433
16434 return false;
16435 };
16436
16437 bool NegA = invertIfNegative(A);
16438 bool NegB = invertIfNegative(B);
16439 bool NegC = invertIfNegative(C);
16440
16441 // If no operands are negated, we're done.
16442 if (!NegA && !NegB && !NegC)
16443 return SDValue();
16444
16445 unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
16446 if (IsStrict)
16447 return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
16448 {N->getOperand(0), A, B, C, Mask, VL});
16449 return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
16450 VL);
16451}
16452
16455 const RISCVSubtarget &Subtarget) {
16456 SelectionDAG &DAG = DCI.DAG;
16457
16459 return V;
16460
16461 // FIXME: Ignore strict opcodes for now.
16462 if (DAG.getSelectionDAGInfo().isTargetStrictFPOpcode(N->getOpcode()))
16463 return SDValue();
16464
16465 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
16466}
16467
16469 const RISCVSubtarget &Subtarget) {
16470 assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
16471
16472 EVT VT = N->getValueType(0);
16473
16474 if (VT != Subtarget.getXLenVT())
16475 return SDValue();
16476
16477 if (!isa<ConstantSDNode>(N->getOperand(1)))
16478 return SDValue();
16479 uint64_t ShAmt = N->getConstantOperandVal(1);
16480
16481 SDValue N0 = N->getOperand(0);
16482
16483 // Combine (sra (sext_inreg (shl X, C1), iX), C2) ->
16484 // (sra (shl X, C1+(XLen-iX)), C2+(XLen-iX)) so it gets selected as SLLI+SRAI.
16485 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && N0.hasOneUse()) {
16486 unsigned ExtSize =
16487 cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
16488 if (ShAmt < ExtSize && N0.getOperand(0).getOpcode() == ISD::SHL &&
16489 N0.getOperand(0).hasOneUse() &&
16490 isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
16491 uint64_t LShAmt = N0.getOperand(0).getConstantOperandVal(1);
16492 if (LShAmt < ExtSize) {
16493 unsigned Size = VT.getSizeInBits();
16494 SDLoc ShlDL(N0.getOperand(0));
16495 SDValue Shl =
16496 DAG.getNode(ISD::SHL, ShlDL, VT, N0.getOperand(0).getOperand(0),
16497 DAG.getConstant(LShAmt + (Size - ExtSize), ShlDL, VT));
16498 SDLoc DL(N);
16499 return DAG.getNode(ISD::SRA, DL, VT, Shl,
16500 DAG.getConstant(ShAmt + (Size - ExtSize), DL, VT));
16501 }
16502 }
16503 }
16504
16505 if (ShAmt > 32 || VT != MVT::i64)
16506 return SDValue();
16507
16508 // Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
16509 // FIXME: Should this be a generic combine? There's a similar combine on X86.
16510 //
16511 // Also try these folds where an add or sub is in the middle.
16512 // (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
16513 // (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
16514 SDValue Shl;
16515 ConstantSDNode *AddC = nullptr;
16516
16517 // We might have an ADD or SUB between the SRA and SHL.
16518 bool IsAdd = N0.getOpcode() == ISD::ADD;
16519 if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
16520 // Other operand needs to be a constant we can modify.
16521 AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
16522 if (!AddC)
16523 return SDValue();
16524
16525 // AddC needs to have at least 32 trailing zeros.
16526 if (llvm::countr_zero(AddC->getZExtValue()) < 32)
16527 return SDValue();
16528
16529 // All users should be a shift by constant less than or equal to 32. This
16530 // ensures we'll do this optimization for each of them to produce an
16531 // add/sub+sext_inreg they can all share.
16532 for (SDNode *U : N0->users()) {
16533 if (U->getOpcode() != ISD::SRA ||
16534 !isa<ConstantSDNode>(U->getOperand(1)) ||
16535 U->getConstantOperandVal(1) > 32)
16536 return SDValue();
16537 }
16538
16539 Shl = N0.getOperand(IsAdd ? 0 : 1);
16540 } else {
16541 // Not an ADD or SUB.
16542 Shl = N0;
16543 }
16544
16545 // Look for a shift left by 32.
16546 if (Shl.getOpcode() != ISD::SHL || !isa<ConstantSDNode>(Shl.getOperand(1)) ||
16547 Shl.getConstantOperandVal(1) != 32)
16548 return SDValue();
16549
16550 // We if we didn't look through an add/sub, then the shl should have one use.
16551 // If we did look through an add/sub, the sext_inreg we create is free so
16552 // we're only creating 2 new instructions. It's enough to only remove the
16553 // original sra+add/sub.
16554 if (!AddC && !Shl.hasOneUse())
16555 return SDValue();
16556
16557 SDLoc DL(N);
16558 SDValue In = Shl.getOperand(0);
16559
16560 // If we looked through an ADD or SUB, we need to rebuild it with the shifted
16561 // constant.
16562 if (AddC) {
16563 SDValue ShiftedAddC =
16564 DAG.getConstant(AddC->getZExtValue() >> 32, DL, MVT::i64);
16565 if (IsAdd)
16566 In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
16567 else
16568 In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
16569 }
16570
16571 SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
16572 DAG.getValueType(MVT::i32));
16573 if (ShAmt == 32)
16574 return SExt;
16575
16576 return DAG.getNode(
16577 ISD::SHL, DL, MVT::i64, SExt,
16578 DAG.getConstant(32 - ShAmt, DL, MVT::i64));
16579}
16580
16581// Invert (and/or (set cc X, Y), (xor Z, 1)) to (or/and (set !cc X, Y)), Z) if
16582// the result is used as the conditon of a br_cc or select_cc we can invert,
16583// inverting the setcc is free, and Z is 0/1. Caller will invert the
16584// br_cc/select_cc.
16586 bool IsAnd = Cond.getOpcode() == ISD::AND;
16587 if (!IsAnd && Cond.getOpcode() != ISD::OR)
16588 return SDValue();
16589
16590 if (!Cond.hasOneUse())
16591 return SDValue();
16592
16593 SDValue Setcc = Cond.getOperand(0);
16594 SDValue Xor = Cond.getOperand(1);
16595 // Canonicalize setcc to LHS.
16596 if (Setcc.getOpcode() != ISD::SETCC)
16597 std::swap(Setcc, Xor);
16598 // LHS should be a setcc and RHS should be an xor.
16599 if (Setcc.getOpcode() != ISD::SETCC || !Setcc.hasOneUse() ||
16600 Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
16601 return SDValue();
16602
16603 // If the condition is an And, SimplifyDemandedBits may have changed
16604 // (xor Z, 1) to (not Z).
16605 SDValue Xor1 = Xor.getOperand(1);
16606 if (!isOneConstant(Xor1) && !(IsAnd && isAllOnesConstant(Xor1)))
16607 return SDValue();
16608
16609 EVT VT = Cond.getValueType();
16610 SDValue Xor0 = Xor.getOperand(0);
16611
16612 // The LHS of the xor needs to be 0/1.
16614 if (!DAG.MaskedValueIsZero(Xor0, Mask))
16615 return SDValue();
16616
16617 // We can only invert integer setccs.
16618 EVT SetCCOpVT = Setcc.getOperand(0).getValueType();
16619 if (!SetCCOpVT.isScalarInteger())
16620 return SDValue();
16621
16622 ISD::CondCode CCVal = cast<CondCodeSDNode>(Setcc.getOperand(2))->get();
16623 if (ISD::isIntEqualitySetCC(CCVal)) {
16624 CCVal = ISD::getSetCCInverse(CCVal, SetCCOpVT);
16625 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(0),
16626 Setcc.getOperand(1), CCVal);
16627 } else if (CCVal == ISD::SETLT && isNullConstant(Setcc.getOperand(0))) {
16628 // Invert (setlt 0, X) by converting to (setlt X, 1).
16629 Setcc = DAG.getSetCC(SDLoc(Setcc), VT, Setcc.getOperand(1),
16630 DAG.getConstant(1, SDLoc(Setcc), VT), CCVal);
16631 } else if (CCVal == ISD::SETLT && isOneConstant(Setcc.getOperand(1))) {
16632 // (setlt X, 1) by converting to (setlt 0, X).
16633 Setcc = DAG.getSetCC(SDLoc(Setcc), VT,
16634 DAG.getConstant(0, SDLoc(Setcc), VT),
16635 Setcc.getOperand(0), CCVal);
16636 } else
16637 return SDValue();
16638
16639 unsigned Opc = IsAnd ? ISD::OR : ISD::AND;
16640 return DAG.getNode(Opc, SDLoc(Cond), VT, Setcc, Xor.getOperand(0));
16641}
16642
16643// Perform common combines for BR_CC and SELECT_CC condtions.
16644static bool combine_CC(SDValue &LHS, SDValue &RHS, SDValue &CC, const SDLoc &DL,
16645 SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
16646 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
16647
16648 // As far as arithmetic right shift always saves the sign,
16649 // shift can be omitted.
16650 // Fold setlt (sra X, N), 0 -> setlt X, 0 and
16651 // setge (sra X, N), 0 -> setge X, 0
16652 if (isNullConstant(RHS) && (CCVal == ISD::SETGE || CCVal == ISD::SETLT) &&
16653 LHS.getOpcode() == ISD::SRA) {
16654 LHS = LHS.getOperand(0);
16655 return true;
16656 }
16657
16658 if (!ISD::isIntEqualitySetCC(CCVal))
16659 return false;
16660
16661 // Fold ((setlt X, Y), 0, ne) -> (X, Y, lt)
16662 // Sometimes the setcc is introduced after br_cc/select_cc has been formed.
16663 if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
16664 LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
16665 // If we're looking for eq 0 instead of ne 0, we need to invert the
16666 // condition.
16667 bool Invert = CCVal == ISD::SETEQ;
16668 CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
16669 if (Invert)
16670 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16671
16672 RHS = LHS.getOperand(1);
16673 LHS = LHS.getOperand(0);
16674 translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
16675
16676 CC = DAG.getCondCode(CCVal);
16677 return true;
16678 }
16679
16680 // Fold ((xor X, Y), 0, eq/ne) -> (X, Y, eq/ne)
16681 if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS)) {
16682 RHS = LHS.getOperand(1);
16683 LHS = LHS.getOperand(0);
16684 return true;
16685 }
16686
16687 // Fold ((srl (and X, 1<<C), C), 0, eq/ne) -> ((shl X, XLen-1-C), 0, ge/lt)
16688 if (isNullConstant(RHS) && LHS.getOpcode() == ISD::SRL && LHS.hasOneUse() &&
16689 LHS.getOperand(1).getOpcode() == ISD::Constant) {
16690 SDValue LHS0 = LHS.getOperand(0);
16691 if (LHS0.getOpcode() == ISD::AND &&
16692 LHS0.getOperand(1).getOpcode() == ISD::Constant) {
16693 uint64_t Mask = LHS0.getConstantOperandVal(1);
16694 uint64_t ShAmt = LHS.getConstantOperandVal(1);
16695 if (isPowerOf2_64(Mask) && Log2_64(Mask) == ShAmt) {
16696 CCVal = CCVal == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
16697 CC = DAG.getCondCode(CCVal);
16698
16699 ShAmt = LHS.getValueSizeInBits() - 1 - ShAmt;
16700 LHS = LHS0.getOperand(0);
16701 if (ShAmt != 0)
16702 LHS =
16703 DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS0.getOperand(0),
16704 DAG.getConstant(ShAmt, DL, LHS.getValueType()));
16705 return true;
16706 }
16707 }
16708 }
16709
16710 // (X, 1, setne) -> // (X, 0, seteq) if we can prove X is 0/1.
16711 // This can occur when legalizing some floating point comparisons.
16712 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
16713 if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
16714 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16715 CC = DAG.getCondCode(CCVal);
16716 RHS = DAG.getConstant(0, DL, LHS.getValueType());
16717 return true;
16718 }
16719
16720 if (isNullConstant(RHS)) {
16721 if (SDValue NewCond = tryDemorganOfBooleanCondition(LHS, DAG)) {
16722 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
16723 CC = DAG.getCondCode(CCVal);
16724 LHS = NewCond;
16725 return true;
16726 }
16727 }
16728
16729 return false;
16730}
16731
16732// Fold
16733// (select C, (add Y, X), Y) -> (add Y, (select C, X, 0)).
16734// (select C, (sub Y, X), Y) -> (sub Y, (select C, X, 0)).
16735// (select C, (or Y, X), Y) -> (or Y, (select C, X, 0)).
16736// (select C, (xor Y, X), Y) -> (xor Y, (select C, X, 0)).
16738 SDValue TrueVal, SDValue FalseVal,
16739 bool Swapped) {
16740 bool Commutative = true;
16741 unsigned Opc = TrueVal.getOpcode();
16742 switch (Opc) {
16743 default:
16744 return SDValue();
16745 case ISD::SHL:
16746 case ISD::SRA:
16747 case ISD::SRL:
16748 case ISD::SUB:
16749 Commutative = false;
16750 break;
16751 case ISD::ADD:
16752 case ISD::OR:
16753 case ISD::XOR:
16754 break;
16755 }
16756
16757 if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
16758 return SDValue();
16759
16760 unsigned OpToFold;
16761 if (FalseVal == TrueVal.getOperand(0))
16762 OpToFold = 0;
16763 else if (Commutative && FalseVal == TrueVal.getOperand(1))
16764 OpToFold = 1;
16765 else
16766 return SDValue();
16767
16768 EVT VT = N->getValueType(0);
16769 SDLoc DL(N);
16770 SDValue OtherOp = TrueVal.getOperand(1 - OpToFold);
16771 EVT OtherOpVT = OtherOp.getValueType();
16772 SDValue IdentityOperand =
16773 DAG.getNeutralElement(Opc, DL, OtherOpVT, N->getFlags());
16774 if (!Commutative)
16775 IdentityOperand = DAG.getConstant(0, DL, OtherOpVT);
16776 assert(IdentityOperand && "No identity operand!");
16777
16778 if (Swapped)
16779 std::swap(OtherOp, IdentityOperand);
16780 SDValue NewSel =
16781 DAG.getSelect(DL, OtherOpVT, N->getOperand(0), OtherOp, IdentityOperand);
16782 return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
16783}
16784
16785// This tries to get rid of `select` and `icmp` that are being used to handle
16786// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
16788 SDValue Cond = N->getOperand(0);
16789
16790 // This represents either CTTZ or CTLZ instruction.
16791 SDValue CountZeroes;
16792
16793 SDValue ValOnZero;
16794
16795 if (Cond.getOpcode() != ISD::SETCC)
16796 return SDValue();
16797
16798 if (!isNullConstant(Cond->getOperand(1)))
16799 return SDValue();
16800
16801 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
16802 if (CCVal == ISD::CondCode::SETEQ) {
16803 CountZeroes = N->getOperand(2);
16804 ValOnZero = N->getOperand(1);
16805 } else if (CCVal == ISD::CondCode::SETNE) {
16806 CountZeroes = N->getOperand(1);
16807 ValOnZero = N->getOperand(2);
16808 } else {
16809 return SDValue();
16810 }
16811
16812 if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
16813 CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
16814 CountZeroes = CountZeroes.getOperand(0);
16815
16816 if (CountZeroes.getOpcode() != ISD::CTTZ &&
16817 CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
16818 CountZeroes.getOpcode() != ISD::CTLZ &&
16819 CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
16820 return SDValue();
16821
16822 if (!isNullConstant(ValOnZero))
16823 return SDValue();
16824
16825 SDValue CountZeroesArgument = CountZeroes->getOperand(0);
16826 if (Cond->getOperand(0) != CountZeroesArgument)
16827 return SDValue();
16828
16829 if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
16830 CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
16831 CountZeroes.getValueType(), CountZeroesArgument);
16832 } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
16833 CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
16834 CountZeroes.getValueType(), CountZeroesArgument);
16835 }
16836
16837 unsigned BitWidth = CountZeroes.getValueSizeInBits();
16838 SDValue BitWidthMinusOne =
16839 DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
16840
16841 auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
16842 CountZeroes, BitWidthMinusOne);
16843 return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
16844}
16845
16847 const RISCVSubtarget &Subtarget) {
16848 SDValue Cond = N->getOperand(0);
16849 SDValue True = N->getOperand(1);
16850 SDValue False = N->getOperand(2);
16851 SDLoc DL(N);
16852 EVT VT = N->getValueType(0);
16853 EVT CondVT = Cond.getValueType();
16854
16855 if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
16856 return SDValue();
16857
16858 // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
16859 // BEXTI, where C is power of 2.
16860 if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
16861 (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
16862 SDValue LHS = Cond.getOperand(0);
16863 SDValue RHS = Cond.getOperand(1);
16864 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16865 if (CC == ISD::SETEQ && LHS.getOpcode() == ISD::AND &&
16866 isa<ConstantSDNode>(LHS.getOperand(1)) && isNullConstant(RHS)) {
16867 const APInt &MaskVal = LHS.getConstantOperandAPInt(1);
16868 if (MaskVal.isPowerOf2() && !MaskVal.isSignedIntN(12))
16869 return DAG.getSelect(DL, VT,
16870 DAG.getSetCC(DL, CondVT, LHS, RHS, ISD::SETNE),
16871 False, True);
16872 }
16873 }
16874 return SDValue();
16875}
16876
16878 const RISCVSubtarget &Subtarget) {
16879 if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
16880 return Folded;
16881
16882 if (SDValue V = useInversedSetcc(N, DAG, Subtarget))
16883 return V;
16884
16885 if (Subtarget.hasConditionalMoveFusion())
16886 return SDValue();
16887
16888 SDValue TrueVal = N->getOperand(1);
16889 SDValue FalseVal = N->getOperand(2);
16890 if (SDValue V = tryFoldSelectIntoOp(N, DAG, TrueVal, FalseVal, /*Swapped*/false))
16891 return V;
16892 return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
16893}
16894
16895/// If we have a build_vector where each lane is binop X, C, where C
16896/// is a constant (but not necessarily the same constant on all lanes),
16897/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
16898/// We assume that materializing a constant build vector will be no more
16899/// expensive that performing O(n) binops.
16901 const RISCVSubtarget &Subtarget,
16902 const RISCVTargetLowering &TLI) {
16903 SDLoc DL(N);
16904 EVT VT = N->getValueType(0);
16905
16906 assert(!VT.isScalableVector() && "unexpected build vector");
16907
16908 if (VT.getVectorNumElements() == 1)
16909 return SDValue();
16910
16911 const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
16912 if (!TLI.isBinOp(Opcode))
16913 return SDValue();
16914
16915 if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
16916 return SDValue();
16917
16918 // This BUILD_VECTOR involves an implicit truncation, and sinking
16919 // truncates through binops is non-trivial.
16920 if (N->op_begin()->getValueType() != VT.getVectorElementType())
16921 return SDValue();
16922
16923 SmallVector<SDValue> LHSOps;
16924 SmallVector<SDValue> RHSOps;
16925 for (SDValue Op : N->ops()) {
16926 if (Op.isUndef()) {
16927 // We can't form a divide or remainder from undef.
16928 if (!DAG.isSafeToSpeculativelyExecute(Opcode))
16929 return SDValue();
16930
16931 LHSOps.push_back(Op);
16932 RHSOps.push_back(Op);
16933 continue;
16934 }
16935
16936 // TODO: We can handle operations which have an neutral rhs value
16937 // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
16938 // of profit in a more explicit manner.
16939 if (Op.getOpcode() != Opcode || !Op.hasOneUse())
16940 return SDValue();
16941
16942 LHSOps.push_back(Op.getOperand(0));
16943 if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
16944 !isa<ConstantFPSDNode>(Op.getOperand(1)))
16945 return SDValue();
16946 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16947 // have different LHS and RHS types.
16948 if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
16949 return SDValue();
16950
16951 RHSOps.push_back(Op.getOperand(1));
16952 }
16953
16954 return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
16955 DAG.getBuildVector(VT, DL, RHSOps));
16956}
16957
16959 const RISCVSubtarget &Subtarget,
16960 const RISCVTargetLowering &TLI) {
16961 SDValue InVec = N->getOperand(0);
16962 SDValue InVal = N->getOperand(1);
16963 SDValue EltNo = N->getOperand(2);
16964 SDLoc DL(N);
16965
16966 EVT VT = InVec.getValueType();
16967 if (VT.isScalableVector())
16968 return SDValue();
16969
16970 if (!InVec.hasOneUse())
16971 return SDValue();
16972
16973 // Given insert_vector_elt (binop a, VecC), (same_binop b, C2), Elt
16974 // move the insert_vector_elts into the arms of the binop. Note that
16975 // the new RHS must be a constant.
16976 const unsigned InVecOpcode = InVec->getOpcode();
16977 if (InVecOpcode == InVal->getOpcode() && TLI.isBinOp(InVecOpcode) &&
16978 InVal.hasOneUse()) {
16979 SDValue InVecLHS = InVec->getOperand(0);
16980 SDValue InVecRHS = InVec->getOperand(1);
16981 SDValue InValLHS = InVal->getOperand(0);
16982 SDValue InValRHS = InVal->getOperand(1);
16983
16985 return SDValue();
16986 if (!isa<ConstantSDNode>(InValRHS) && !isa<ConstantFPSDNode>(InValRHS))
16987 return SDValue();
16988 // FIXME: Return failure if the RHS type doesn't match the LHS. Shifts may
16989 // have different LHS and RHS types.
16990 if (InVec.getOperand(0).getValueType() != InVec.getOperand(1).getValueType())
16991 return SDValue();
16993 InVecLHS, InValLHS, EltNo);
16995 InVecRHS, InValRHS, EltNo);
16996 return DAG.getNode(InVecOpcode, DL, VT, LHS, RHS);
16997 }
16998
16999 // Given insert_vector_elt (concat_vectors ...), InVal, Elt
17000 // move the insert_vector_elt to the source operand of the concat_vector.
17001 if (InVec.getOpcode() != ISD::CONCAT_VECTORS)
17002 return SDValue();
17003
17004 auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17005 if (!IndexC)
17006 return SDValue();
17007 unsigned Elt = IndexC->getZExtValue();
17008
17009 EVT ConcatVT = InVec.getOperand(0).getValueType();
17010 if (ConcatVT.getVectorElementType() != InVal.getValueType())
17011 return SDValue();
17012 unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17013 SDValue NewIdx = DAG.getVectorIdxConstant(Elt % ConcatNumElts, DL);
17014
17015 unsigned ConcatOpIdx = Elt / ConcatNumElts;
17016 SDValue ConcatOp = InVec.getOperand(ConcatOpIdx);
17017 ConcatOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ConcatVT,
17018 ConcatOp, InVal, NewIdx);
17019
17020 SmallVector<SDValue> ConcatOps;
17021 ConcatOps.append(InVec->op_begin(), InVec->op_end());
17022 ConcatOps[ConcatOpIdx] = ConcatOp;
17023 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17024}
17025
17026// If we're concatenating a series of vector loads like
17027// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
17028// Then we can turn this into a strided load by widening the vector elements
17029// vlse32 p, stride=n
17031 const RISCVSubtarget &Subtarget,
17032 const RISCVTargetLowering &TLI) {
17033 SDLoc DL(N);
17034 EVT VT = N->getValueType(0);
17035
17036 // Only perform this combine on legal MVTs.
17037 if (!TLI.isTypeLegal(VT))
17038 return SDValue();
17039
17040 // TODO: Potentially extend this to scalable vectors
17041 if (VT.isScalableVector())
17042 return SDValue();
17043
17044 auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
17045 if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
17046 !SDValue(BaseLd, 0).hasOneUse())
17047 return SDValue();
17048
17049 EVT BaseLdVT = BaseLd->getValueType(0);
17050
17051 // Go through the loads and check that they're strided
17053 Lds.push_back(BaseLd);
17054 Align Align = BaseLd->getAlign();
17055 for (SDValue Op : N->ops().drop_front()) {
17056 auto *Ld = dyn_cast<LoadSDNode>(Op);
17057 if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
17058 Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
17059 Ld->getValueType(0) != BaseLdVT)
17060 return SDValue();
17061
17062 Lds.push_back(Ld);
17063
17064 // The common alignment is the most restrictive (smallest) of all the loads
17065 Align = std::min(Align, Ld->getAlign());
17066 }
17067
17068 using PtrDiff = std::pair<std::variant<int64_t, SDValue>, bool>;
17069 auto GetPtrDiff = [&DAG](LoadSDNode *Ld1,
17070 LoadSDNode *Ld2) -> std::optional<PtrDiff> {
17071 // If the load ptrs can be decomposed into a common (Base + Index) with a
17072 // common constant stride, then return the constant stride.
17073 BaseIndexOffset BIO1 = BaseIndexOffset::match(Ld1, DAG);
17074 BaseIndexOffset BIO2 = BaseIndexOffset::match(Ld2, DAG);
17075 if (BIO1.equalBaseIndex(BIO2, DAG))
17076 return {{BIO2.getOffset() - BIO1.getOffset(), false}};
17077
17078 // Otherwise try to match (add LastPtr, Stride) or (add NextPtr, Stride)
17079 SDValue P1 = Ld1->getBasePtr();
17080 SDValue P2 = Ld2->getBasePtr();
17081 if (P2.getOpcode() == ISD::ADD && P2.getOperand(0) == P1)
17082 return {{P2.getOperand(1), false}};
17083 if (P1.getOpcode() == ISD::ADD && P1.getOperand(0) == P2)
17084 return {{P1.getOperand(1), true}};
17085
17086 return std::nullopt;
17087 };
17088
17089 // Get the distance between the first and second loads
17090 auto BaseDiff = GetPtrDiff(Lds[0], Lds[1]);
17091 if (!BaseDiff)
17092 return SDValue();
17093
17094 // Check all the loads are the same distance apart
17095 for (auto *It = Lds.begin() + 1; It != Lds.end() - 1; It++)
17096 if (GetPtrDiff(*It, *std::next(It)) != BaseDiff)
17097 return SDValue();
17098
17099 // TODO: At this point, we've successfully matched a generalized gather
17100 // load. Maybe we should emit that, and then move the specialized
17101 // matchers above and below into a DAG combine?
17102
17103 // Get the widened scalar type, e.g. v4i8 -> i64
17104 unsigned WideScalarBitWidth =
17105 BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
17106 MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
17107
17108 // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
17109 MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
17110 if (!TLI.isTypeLegal(WideVecVT))
17111 return SDValue();
17112
17113 // Check that the operation is legal
17114 if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
17115 return SDValue();
17116
17117 auto [StrideVariant, MustNegateStride] = *BaseDiff;
17118 SDValue Stride =
17119 std::holds_alternative<SDValue>(StrideVariant)
17120 ? std::get<SDValue>(StrideVariant)
17121 : DAG.getSignedConstant(std::get<int64_t>(StrideVariant), DL,
17122 Lds[0]->getOffset().getValueType());
17123 if (MustNegateStride)
17124 Stride = DAG.getNegative(Stride, DL, Stride.getValueType());
17125
17126 SDValue AllOneMask =
17127 DAG.getSplat(WideVecVT.changeVectorElementType(MVT::i1), DL,
17128 DAG.getConstant(1, DL, MVT::i1));
17129
17130 uint64_t MemSize;
17131 if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
17132 ConstStride && ConstStride->getSExtValue() >= 0)
17133 // total size = (elsize * n) + (stride - elsize) * (n-1)
17134 // = elsize + stride * (n-1)
17135 MemSize = WideScalarVT.getSizeInBits() +
17136 ConstStride->getSExtValue() * (N->getNumOperands() - 1);
17137 else
17138 // If Stride isn't constant, then we can't know how much it will load
17140
17142 BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
17143 Align);
17144
17145 SDValue StridedLoad = DAG.getStridedLoadVP(
17146 WideVecVT, DL, BaseLd->getChain(), BaseLd->getBasePtr(), Stride,
17147 AllOneMask,
17148 DAG.getConstant(N->getNumOperands(), DL, Subtarget.getXLenVT()), MMO);
17149
17150 for (SDValue Ld : N->ops())
17151 DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
17152
17153 return DAG.getBitcast(VT.getSimpleVT(), StridedLoad);
17154}
17155
17156/// Custom legalize <N x i128> or <N x i256> to <M x ELEN>. This runs
17157/// during the combine phase before type legalization, and relies on
17158/// DAGCombine not undoing the transform if isShuffleMaskLegal returns false
17159/// for the source mask.
17161 const RISCVSubtarget &Subtarget,
17162 const RISCVTargetLowering &TLI) {
17163 SDLoc DL(N);
17164 EVT VT = N->getValueType(0);
17165 const unsigned ElementSize = VT.getScalarSizeInBits();
17166 SDValue V1 = N->getOperand(0);
17167 SDValue V2 = N->getOperand(1);
17168 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask();
17169
17170 if (TLI.isTypeLegal(VT) || ElementSize <= Subtarget.getELen() ||
17171 !isPowerOf2_64(ElementSize) || VT.getVectorNumElements() % 2 != 0 ||
17172 VT.isFloatingPoint() || TLI.isShuffleMaskLegal(Mask, VT))
17173 return SDValue();
17174
17175 SmallVector<int, 8> NewMask;
17176 narrowShuffleMaskElts(2, Mask, NewMask);
17177
17178 LLVMContext &C = *DAG.getContext();
17179 EVT NewEltVT = EVT::getIntegerVT(C, ElementSize / 2);
17180 EVT NewVT = EVT::getVectorVT(C, NewEltVT, VT.getVectorNumElements() * 2);
17181 SDValue Res = DAG.getVectorShuffle(NewVT, DL, DAG.getBitcast(NewVT, V1),
17182 DAG.getBitcast(NewVT, V2), NewMask);
17183 return DAG.getBitcast(VT, Res);
17184}
17185
17186
17188 const RISCVSubtarget &Subtarget) {
17189
17190 assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
17191
17192 if (N->getValueType(0).isFixedLengthVector())
17193 return SDValue();
17194
17195 SDValue Addend = N->getOperand(0);
17196 SDValue MulOp = N->getOperand(1);
17197
17198 if (N->getOpcode() == RISCVISD::ADD_VL) {
17199 SDValue AddPassthruOp = N->getOperand(2);
17200 if (!AddPassthruOp.isUndef())
17201 return SDValue();
17202 }
17203
17204 auto IsVWMulOpc = [](unsigned Opc) {
17205 switch (Opc) {
17206 case RISCVISD::VWMUL_VL:
17209 return true;
17210 default:
17211 return false;
17212 }
17213 };
17214
17215 if (!IsVWMulOpc(MulOp.getOpcode()))
17216 std::swap(Addend, MulOp);
17217
17218 if (!IsVWMulOpc(MulOp.getOpcode()))
17219 return SDValue();
17220
17221 SDValue MulPassthruOp = MulOp.getOperand(2);
17222
17223 if (!MulPassthruOp.isUndef())
17224 return SDValue();
17225
17226 auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
17227 const RISCVSubtarget &Subtarget) {
17228 if (N->getOpcode() == ISD::ADD) {
17229 SDLoc DL(N);
17230 return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
17231 Subtarget);
17232 }
17233 return std::make_pair(N->getOperand(3), N->getOperand(4));
17234 }(N, DAG, Subtarget);
17235
17236 SDValue MulMask = MulOp.getOperand(3);
17237 SDValue MulVL = MulOp.getOperand(4);
17238
17239 if (AddMask != MulMask || AddVL != MulVL)
17240 return SDValue();
17241
17242 unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
17243 static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
17244 "Unexpected opcode after VWMACC_VL");
17245 static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
17246 "Unexpected opcode after VWMACC_VL!");
17247 static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
17248 "Unexpected opcode after VWMUL_VL!");
17249 static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
17250 "Unexpected opcode after VWMUL_VL!");
17251
17252 SDLoc DL(N);
17253 EVT VT = N->getValueType(0);
17254 SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
17255 AddVL};
17256 return DAG.getNode(Opc, DL, VT, Ops);
17257}
17258
17260 ISD::MemIndexType &IndexType,
17262 if (!DCI.isBeforeLegalize())
17263 return false;
17264
17265 SelectionDAG &DAG = DCI.DAG;
17266 const MVT XLenVT =
17267 DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
17268
17269 const EVT IndexVT = Index.getValueType();
17270
17271 // RISC-V indexed loads only support the "unsigned unscaled" addressing
17272 // mode, so anything else must be manually legalized.
17273 if (!isIndexTypeSigned(IndexType))
17274 return false;
17275
17276 if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
17277 // Any index legalization should first promote to XLenVT, so we don't lose
17278 // bits when scaling. This may create an illegal index type so we let
17279 // LLVM's legalization take care of the splitting.
17280 // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
17281 Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
17282 IndexVT.changeVectorElementType(XLenVT), Index);
17283 }
17284 IndexType = ISD::UNSIGNED_SCALED;
17285 return true;
17286}
17287
17288/// Match the index vector of a scatter or gather node as the shuffle mask
17289/// which performs the rearrangement if possible. Will only match if
17290/// all lanes are touched, and thus replacing the scatter or gather with
17291/// a unit strided access and shuffle is legal.
17292static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
17293 SmallVector<int> &ShuffleMask) {
17294 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17295 return false;
17296 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17297 return false;
17298
17299 const unsigned ElementSize = VT.getScalarStoreSize();
17300 const unsigned NumElems = VT.getVectorNumElements();
17301
17302 // Create the shuffle mask and check all bits active
17303 assert(ShuffleMask.empty());
17304 BitVector ActiveLanes(NumElems);
17305 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17306 // TODO: We've found an active bit of UB, and could be
17307 // more aggressive here if desired.
17308 if (Index->getOperand(i)->isUndef())
17309 return false;
17310 uint64_t C = Index->getConstantOperandVal(i);
17311 if (C % ElementSize != 0)
17312 return false;
17313 C = C / ElementSize;
17314 if (C >= NumElems)
17315 return false;
17316 ShuffleMask.push_back(C);
17317 ActiveLanes.set(C);
17318 }
17319 return ActiveLanes.all();
17320}
17321
17322/// Match the index of a gather or scatter operation as an operation
17323/// with twice the element width and half the number of elements. This is
17324/// generally profitable (if legal) because these operations are linear
17325/// in VL, so even if we cause some extract VTYPE/VL toggles, we still
17326/// come out ahead.
17327static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
17328 Align BaseAlign, const RISCVSubtarget &ST) {
17329 if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
17330 return false;
17331 if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
17332 return false;
17333
17334 // Attempt a doubling. If we can use a element type 4x or 8x in
17335 // size, this will happen via multiply iterations of the transform.
17336 const unsigned NumElems = VT.getVectorNumElements();
17337 if (NumElems % 2 != 0)
17338 return false;
17339
17340 const unsigned ElementSize = VT.getScalarStoreSize();
17341 const unsigned WiderElementSize = ElementSize * 2;
17342 if (WiderElementSize > ST.getELen()/8)
17343 return false;
17344
17345 if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
17346 return false;
17347
17348 for (unsigned i = 0; i < Index->getNumOperands(); i++) {
17349 // TODO: We've found an active bit of UB, and could be
17350 // more aggressive here if desired.
17351 if (Index->getOperand(i)->isUndef())
17352 return false;
17353 // TODO: This offset check is too strict if we support fully
17354 // misaligned memory operations.
17355 uint64_t C = Index->getConstantOperandVal(i);
17356 if (i % 2 == 0) {
17357 if (C % WiderElementSize != 0)
17358 return false;
17359 continue;
17360 }
17361 uint64_t Last = Index->getConstantOperandVal(i-1);
17362 if (C != Last + ElementSize)
17363 return false;
17364 }
17365 return true;
17366}
17367
17368// trunc (sra sext (X), zext (Y)) -> sra (X, smin (Y, scalarsize(Y) - 1))
17369// This would be benefit for the cases where X and Y are both the same value
17370// type of low precision vectors. Since the truncate would be lowered into
17371// n-levels TRUNCATE_VECTOR_VL to satisfy RVV's SEW*2->SEW truncate
17372// restriction, such pattern would be expanded into a series of "vsetvli"
17373// and "vnsrl" instructions later to reach this point.
17375 SDValue Mask = N->getOperand(1);
17376 SDValue VL = N->getOperand(2);
17377
17378 bool IsVLMAX = isAllOnesConstant(VL) ||
17379 (isa<RegisterSDNode>(VL) &&
17380 cast<RegisterSDNode>(VL)->getReg() == RISCV::X0);
17381 if (!IsVLMAX || Mask.getOpcode() != RISCVISD::VMSET_VL ||
17382 Mask.getOperand(0) != VL)
17383 return SDValue();
17384
17385 auto IsTruncNode = [&](SDValue V) {
17386 return V.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17387 V.getOperand(1) == Mask && V.getOperand(2) == VL;
17388 };
17389
17390 SDValue Op = N->getOperand(0);
17391
17392 // We need to first find the inner level of TRUNCATE_VECTOR_VL node
17393 // to distinguish such pattern.
17394 while (IsTruncNode(Op)) {
17395 if (!Op.hasOneUse())
17396 return SDValue();
17397 Op = Op.getOperand(0);
17398 }
17399
17400 if (Op.getOpcode() != ISD::SRA || !Op.hasOneUse())
17401 return SDValue();
17402
17403 SDValue N0 = Op.getOperand(0);
17404 SDValue N1 = Op.getOperand(1);
17405 if (N0.getOpcode() != ISD::SIGN_EXTEND || !N0.hasOneUse() ||
17406 N1.getOpcode() != ISD::ZERO_EXTEND || !N1.hasOneUse())
17407 return SDValue();
17408
17409 SDValue N00 = N0.getOperand(0);
17410 SDValue N10 = N1.getOperand(0);
17411 if (!N00.getValueType().isVector() ||
17412 N00.getValueType() != N10.getValueType() ||
17413 N->getValueType(0) != N10.getValueType())
17414 return SDValue();
17415
17416 unsigned MaxShAmt = N10.getValueType().getScalarSizeInBits() - 1;
17417 SDValue SMin =
17418 DAG.getNode(ISD::SMIN, SDLoc(N1), N->getValueType(0), N10,
17419 DAG.getConstant(MaxShAmt, SDLoc(N1), N->getValueType(0)));
17420 return DAG.getNode(ISD::SRA, SDLoc(N), N->getValueType(0), N00, SMin);
17421}
17422
17423// Combine (truncate_vector_vl (umin X, C)) -> (vnclipu_vl X) if C is the
17424// maximum value for the truncated type.
17425// Combine (truncate_vector_vl (smin (smax X, C2), C1)) -> (vnclip_vl X) if C1
17426// is the signed maximum value for the truncated type and C2 is the signed
17427// minimum value.
17429 const RISCVSubtarget &Subtarget) {
17430 assert(N->getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL);
17431
17432 MVT VT = N->getSimpleValueType(0);
17433
17434 SDValue Mask = N->getOperand(1);
17435 SDValue VL = N->getOperand(2);
17436
17437 auto MatchMinMax = [&VL, &Mask](SDValue V, unsigned Opc, unsigned OpcVL,
17438 APInt &SplatVal) {
17439 if (V.getOpcode() != Opc &&
17440 !(V.getOpcode() == OpcVL && V.getOperand(2).isUndef() &&
17441 V.getOperand(3) == Mask && V.getOperand(4) == VL))
17442 return SDValue();
17443
17444 SDValue Op = V.getOperand(1);
17445
17446 // Peek through conversion between fixed and scalable vectors.
17447 if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
17448 isNullConstant(Op.getOperand(2)) &&
17449 Op.getOperand(1).getValueType().isFixedLengthVector() &&
17450 Op.getOperand(1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17451 Op.getOperand(1).getOperand(0).getValueType() == Op.getValueType() &&
17452 isNullConstant(Op.getOperand(1).getOperand(1)))
17453 Op = Op.getOperand(1).getOperand(0);
17454
17455 if (ISD::isConstantSplatVector(Op.getNode(), SplatVal))
17456 return V.getOperand(0);
17457
17458 if (Op.getOpcode() == RISCVISD::VMV_V_X_VL && Op.getOperand(0).isUndef() &&
17459 Op.getOperand(2) == VL) {
17460 if (auto *Op1 = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
17461 SplatVal =
17462 Op1->getAPIntValue().sextOrTrunc(Op.getScalarValueSizeInBits());
17463 return V.getOperand(0);
17464 }
17465 }
17466
17467 return SDValue();
17468 };
17469
17470 SDLoc DL(N);
17471
17472 auto DetectUSatPattern = [&](SDValue V) {
17473 APInt LoC, HiC;
17474
17475 // Simple case, V is a UMIN.
17476 if (SDValue UMinOp = MatchMinMax(V, ISD::UMIN, RISCVISD::UMIN_VL, HiC))
17477 if (HiC.isMask(VT.getScalarSizeInBits()))
17478 return UMinOp;
17479
17480 // If we have an SMAX that removes negative numbers first, then we can match
17481 // SMIN instead of UMIN.
17482 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17483 if (SDValue SMaxOp =
17484 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17485 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()))
17486 return SMinOp;
17487
17488 // If we have an SMIN before an SMAX and the SMAX constant is less than or
17489 // equal to the SMIN constant, we can use vnclipu if we insert a new SMAX
17490 // first.
17491 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17492 if (SDValue SMinOp =
17493 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17494 if (LoC.isNonNegative() && HiC.isMask(VT.getScalarSizeInBits()) &&
17495 HiC.uge(LoC))
17496 return DAG.getNode(RISCVISD::SMAX_VL, DL, V.getValueType(), SMinOp,
17497 V.getOperand(1), DAG.getUNDEF(V.getValueType()),
17498 Mask, VL);
17499
17500 return SDValue();
17501 };
17502
17503 auto DetectSSatPattern = [&](SDValue V) {
17504 unsigned NumDstBits = VT.getScalarSizeInBits();
17505 unsigned NumSrcBits = V.getScalarValueSizeInBits();
17506 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
17507 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
17508
17509 APInt HiC, LoC;
17510 if (SDValue SMinOp = MatchMinMax(V, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17511 if (SDValue SMaxOp =
17512 MatchMinMax(SMinOp, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17513 if (HiC == SignedMax && LoC == SignedMin)
17514 return SMaxOp;
17515
17516 if (SDValue SMaxOp = MatchMinMax(V, ISD::SMAX, RISCVISD::SMAX_VL, LoC))
17517 if (SDValue SMinOp =
17518 MatchMinMax(SMaxOp, ISD::SMIN, RISCVISD::SMIN_VL, HiC))
17519 if (HiC == SignedMax && LoC == SignedMin)
17520 return SMinOp;
17521
17522 return SDValue();
17523 };
17524
17525 SDValue Src = N->getOperand(0);
17526
17527 // Look through multiple layers of truncates.
17528 while (Src.getOpcode() == RISCVISD::TRUNCATE_VECTOR_VL &&
17529 Src.getOperand(1) == Mask && Src.getOperand(2) == VL &&
17530 Src.hasOneUse())
17531 Src = Src.getOperand(0);
17532
17533 SDValue Val;
17534 unsigned ClipOpc;
17535 if ((Val = DetectUSatPattern(Src)))
17537 else if ((Val = DetectSSatPattern(Src)))
17539 else
17540 return SDValue();
17541
17542 MVT ValVT = Val.getSimpleValueType();
17543
17544 do {
17545 MVT ValEltVT = MVT::getIntegerVT(ValVT.getScalarSizeInBits() / 2);
17546 ValVT = ValVT.changeVectorElementType(ValEltVT);
17547 Val = DAG.getNode(ClipOpc, DL, ValVT, Val, Mask, VL);
17548 } while (ValVT != VT);
17549
17550 return Val;
17551}
17552
17553// Convert
17554// (iX ctpop (bitcast (vXi1 A)))
17555// ->
17556// (zext (vcpop.m (nxvYi1 (insert_subvec (vXi1 A)))))
17557// FIXME: It's complicated to match all the variations of this after type
17558// legalization so we only handle the pre-type legalization pattern, but that
17559// requires the fixed vector type to be legal.
17561 const RISCVSubtarget &Subtarget) {
17562 EVT VT = N->getValueType(0);
17563 if (!VT.isScalarInteger())
17564 return SDValue();
17565
17566 SDValue Src = N->getOperand(0);
17567
17568 // Peek through zero_extend. It doesn't change the count.
17569 if (Src.getOpcode() == ISD::ZERO_EXTEND)
17570 Src = Src.getOperand(0);
17571
17572 if (Src.getOpcode() != ISD::BITCAST)
17573 return SDValue();
17574
17575 Src = Src.getOperand(0);
17576 EVT SrcEVT = Src.getValueType();
17577 if (!SrcEVT.isSimple())
17578 return SDValue();
17579
17580 MVT SrcMVT = SrcEVT.getSimpleVT();
17581 // Make sure the input is an i1 vector.
17582 if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
17583 return SDValue();
17584
17585 if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
17586 return SDValue();
17587
17588 MVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
17589 Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
17590
17591 SDLoc DL(N);
17592 auto [Mask, VL] = getDefaultVLOps(SrcMVT, ContainerVT, DL, DAG, Subtarget);
17593
17594 MVT XLenVT = Subtarget.getXLenVT();
17595 SDValue Pop = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Src, Mask, VL);
17596 return DAG.getZExtOrTrunc(Pop, DL, VT);
17597}
17598
17600 DAGCombinerInfo &DCI) const {
17601 SelectionDAG &DAG = DCI.DAG;
17602 const MVT XLenVT = Subtarget.getXLenVT();
17603 SDLoc DL(N);
17604
17605 // Helper to call SimplifyDemandedBits on an operand of N where only some low
17606 // bits are demanded. N will be added to the Worklist if it was not deleted.
17607 // Caller should return SDValue(N, 0) if this returns true.
17608 auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
17609 SDValue Op = N->getOperand(OpNo);
17610 APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
17611 if (!SimplifyDemandedBits(Op, Mask, DCI))
17612 return false;
17613
17614 if (N->getOpcode() != ISD::DELETED_NODE)
17615 DCI.AddToWorklist(N);
17616 return true;
17617 };
17618
17619 switch (N->getOpcode()) {
17620 default:
17621 break;
17622 case RISCVISD::SplitF64: {
17623 SDValue Op0 = N->getOperand(0);
17624 // If the input to SplitF64 is just BuildPairF64 then the operation is
17625 // redundant. Instead, use BuildPairF64's operands directly.
17626 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
17627 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
17628
17629 if (Op0->isUndef()) {
17630 SDValue Lo = DAG.getUNDEF(MVT::i32);
17631 SDValue Hi = DAG.getUNDEF(MVT::i32);
17632 return DCI.CombineTo(N, Lo, Hi);
17633 }
17634
17635 // It's cheaper to materialise two 32-bit integers than to load a double
17636 // from the constant pool and transfer it to integer registers through the
17637 // stack.
17638 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
17639 APInt V = C->getValueAPF().bitcastToAPInt();
17640 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
17641 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
17642 return DCI.CombineTo(N, Lo, Hi);
17643 }
17644
17645 // This is a target-specific version of a DAGCombine performed in
17646 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17647 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17648 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17649 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17650 !Op0.getNode()->hasOneUse() || Subtarget.hasStdExtZdinx())
17651 break;
17652 SDValue NewSplitF64 =
17653 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
17654 Op0.getOperand(0));
17655 SDValue Lo = NewSplitF64.getValue(0);
17656 SDValue Hi = NewSplitF64.getValue(1);
17657 APInt SignBit = APInt::getSignMask(32);
17658 if (Op0.getOpcode() == ISD::FNEG) {
17659 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
17660 DAG.getConstant(SignBit, DL, MVT::i32));
17661 return DCI.CombineTo(N, Lo, NewHi);
17662 }
17663 assert(Op0.getOpcode() == ISD::FABS);
17664 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
17665 DAG.getConstant(~SignBit, DL, MVT::i32));
17666 return DCI.CombineTo(N, Lo, NewHi);
17667 }
17668 case RISCVISD::SLLW:
17669 case RISCVISD::SRAW:
17670 case RISCVISD::SRLW:
17671 case RISCVISD::RORW:
17672 case RISCVISD::ROLW: {
17673 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
17674 if (SimplifyDemandedLowBitsHelper(0, 32) ||
17675 SimplifyDemandedLowBitsHelper(1, 5))
17676 return SDValue(N, 0);
17677
17678 break;
17679 }
17680 case RISCVISD::CLZW:
17681 case RISCVISD::CTZW: {
17682 // Only the lower 32 bits of the first operand are read
17683 if (SimplifyDemandedLowBitsHelper(0, 32))
17684 return SDValue(N, 0);
17685 break;
17686 }
17688 // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
17689 // conversion is unnecessary and can be replaced with the
17690 // FMV_X_ANYEXTW_RV64 operand.
17691 SDValue Op0 = N->getOperand(0);
17693 return Op0.getOperand(0);
17694 break;
17695 }
17698 SDLoc DL(N);
17699 SDValue Op0 = N->getOperand(0);
17700 MVT VT = N->getSimpleValueType(0);
17701
17702 // Constant fold.
17703 if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op0)) {
17704 APInt Val = CFP->getValueAPF().bitcastToAPInt().sext(VT.getSizeInBits());
17705 return DAG.getConstant(Val, DL, VT);
17706 }
17707
17708 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
17709 // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
17710 // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
17711 if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
17712 Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
17713 (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
17714 Op0->getOpcode() == RISCVISD::FMV_H_X)) {
17715 assert(Op0.getOperand(0).getValueType() == VT &&
17716 "Unexpected value type!");
17717 return Op0.getOperand(0);
17718 }
17719
17720 if (ISD::isNormalLoad(Op0.getNode()) && Op0.hasOneUse() &&
17721 cast<LoadSDNode>(Op0)->isSimple()) {
17723 auto *LN0 = cast<LoadSDNode>(Op0);
17724 SDValue Load =
17725 DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(),
17726 LN0->getBasePtr(), IVT, LN0->getMemOperand());
17727 DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
17728 return Load;
17729 }
17730
17731 // This is a target-specific version of a DAGCombine performed in
17732 // DAGCombiner::visitBITCAST. It performs the equivalent of:
17733 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
17734 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
17735 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
17736 !Op0.getNode()->hasOneUse())
17737 break;
17738 SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
17739 unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
17740 APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
17741 if (Op0.getOpcode() == ISD::FNEG)
17742 return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
17743 DAG.getConstant(SignBit, DL, VT));
17744
17745 assert(Op0.getOpcode() == ISD::FABS);
17746 return DAG.getNode(ISD::AND, DL, VT, NewFMV,
17747 DAG.getConstant(~SignBit, DL, VT));
17748 }
17749 case ISD::ABS: {
17750 EVT VT = N->getValueType(0);
17751 SDValue N0 = N->getOperand(0);
17752 // abs (sext) -> zext (abs)
17753 // abs (zext) -> zext (handled elsewhere)
17754 if (VT.isVector() && N0.hasOneUse() && N0.getOpcode() == ISD::SIGN_EXTEND) {
17755 SDValue Src = N0.getOperand(0);
17756 SDLoc DL(N);
17757 return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
17758 DAG.getNode(ISD::ABS, DL, Src.getValueType(), Src));
17759 }
17760 break;
17761 }
17762 case ISD::ADD: {
17763 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17764 return V;
17765 if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
17766 return V;
17767 return performADDCombine(N, DCI, Subtarget);
17768 }
17769 case ISD::SUB: {
17770 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17771 return V;
17772 return performSUBCombine(N, DAG, Subtarget);
17773 }
17774 case ISD::AND:
17775 return performANDCombine(N, DCI, Subtarget);
17776 case ISD::OR: {
17777 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17778 return V;
17779 return performORCombine(N, DCI, Subtarget);
17780 }
17781 case ISD::XOR:
17782 return performXORCombine(N, DAG, Subtarget);
17783 case ISD::MUL:
17784 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
17785 return V;
17786 return performMULCombine(N, DAG, DCI, Subtarget);
17787 case ISD::SDIV:
17788 case ISD::UDIV:
17789 case ISD::SREM:
17790 case ISD::UREM:
17791 if (SDValue V = combineBinOpOfZExt(N, DAG))
17792 return V;
17793 break;
17794 case ISD::FMUL: {
17795 // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
17796 SDValue N0 = N->getOperand(0);
17797 SDValue N1 = N->getOperand(1);
17798 if (N0->getOpcode() != ISD::FCOPYSIGN)
17799 std::swap(N0, N1);
17800 if (N0->getOpcode() != ISD::FCOPYSIGN)
17801 return SDValue();
17802 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
17803 if (!C || !C->getValueAPF().isExactlyValue(+1.0))
17804 return SDValue();
17805 EVT VT = N->getValueType(0);
17806 if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
17807 return SDValue();
17808 SDValue Sign = N0->getOperand(1);
17809 if (Sign.getValueType() != VT)
17810 return SDValue();
17811 return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
17812 }
17813 case ISD::FADD:
17814 case ISD::UMAX:
17815 case ISD::UMIN:
17816 case ISD::SMAX:
17817 case ISD::SMIN:
17818 case ISD::FMAXNUM:
17819 case ISD::FMINNUM: {
17820 if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
17821 return V;
17822 if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
17823 return V;
17824 return SDValue();
17825 }
17826 case ISD::SETCC:
17827 return performSETCCCombine(N, DAG, Subtarget);
17829 return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
17830 case ISD::ZERO_EXTEND:
17831 // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
17832 // type legalization. This is safe because fp_to_uint produces poison if
17833 // it overflows.
17834 if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
17835 SDValue Src = N->getOperand(0);
17836 if (Src.getOpcode() == ISD::FP_TO_UINT &&
17837 isTypeLegal(Src.getOperand(0).getValueType()))
17838 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
17839 Src.getOperand(0));
17840 if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
17841 isTypeLegal(Src.getOperand(1).getValueType())) {
17842 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
17843 SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
17844 Src.getOperand(0), Src.getOperand(1));
17845 DCI.CombineTo(N, Res);
17846 DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
17847 DCI.recursivelyDeleteUnusedNodes(Src.getNode());
17848 return SDValue(N, 0); // Return N so it doesn't get rechecked.
17849 }
17850 }
17851 return SDValue();
17853 if (SDValue V = combineTruncOfSraSext(N, DAG))
17854 return V;
17855 return combineTruncToVnclip(N, DAG, Subtarget);
17856 case ISD::TRUNCATE:
17857 return performTRUNCATECombine(N, DAG, Subtarget);
17858 case ISD::SELECT:
17859 return performSELECTCombine(N, DAG, Subtarget);
17861 case RISCVISD::CZERO_NEZ: {
17862 SDValue Val = N->getOperand(0);
17863 SDValue Cond = N->getOperand(1);
17864
17865 unsigned Opc = N->getOpcode();
17866
17867 // czero_eqz x, x -> x
17868 if (Opc == RISCVISD::CZERO_EQZ && Val == Cond)
17869 return Val;
17870
17871 unsigned InvOpc =
17873
17874 // czero_eqz X, (xor Y, 1) -> czero_nez X, Y if Y is 0 or 1.
17875 // czero_nez X, (xor Y, 1) -> czero_eqz X, Y if Y is 0 or 1.
17876 if (Cond.getOpcode() == ISD::XOR && isOneConstant(Cond.getOperand(1))) {
17877 SDValue NewCond = Cond.getOperand(0);
17878 APInt Mask = APInt::getBitsSetFrom(NewCond.getValueSizeInBits(), 1);
17879 if (DAG.MaskedValueIsZero(NewCond, Mask))
17880 return DAG.getNode(InvOpc, SDLoc(N), N->getValueType(0), Val, NewCond);
17881 }
17882 // czero_eqz x, (setcc y, 0, ne) -> czero_eqz x, y
17883 // czero_nez x, (setcc y, 0, ne) -> czero_nez x, y
17884 // czero_eqz x, (setcc y, 0, eq) -> czero_nez x, y
17885 // czero_nez x, (setcc y, 0, eq) -> czero_eqz x, y
17886 if (Cond.getOpcode() == ISD::SETCC && isNullConstant(Cond.getOperand(1))) {
17887 ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17888 if (ISD::isIntEqualitySetCC(CCVal))
17889 return DAG.getNode(CCVal == ISD::SETNE ? Opc : InvOpc, SDLoc(N),
17890 N->getValueType(0), Val, Cond.getOperand(0));
17891 }
17892 return SDValue();
17893 }
17894 case RISCVISD::SELECT_CC: {
17895 // Transform
17896 SDValue LHS = N->getOperand(0);
17897 SDValue RHS = N->getOperand(1);
17898 SDValue CC = N->getOperand(2);
17899 ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
17900 SDValue TrueV = N->getOperand(3);
17901 SDValue FalseV = N->getOperand(4);
17902 SDLoc DL(N);
17903 EVT VT = N->getValueType(0);
17904
17905 // If the True and False values are the same, we don't need a select_cc.
17906 if (TrueV == FalseV)
17907 return TrueV;
17908
17909 // (select (x < 0), y, z) -> x >> (XLEN - 1) & (y - z) + z
17910 // (select (x >= 0), y, z) -> x >> (XLEN - 1) & (z - y) + y
17911 if (!Subtarget.hasShortForwardBranchOpt() && isa<ConstantSDNode>(TrueV) &&
17912 isa<ConstantSDNode>(FalseV) && isNullConstant(RHS) &&
17913 (CCVal == ISD::CondCode::SETLT || CCVal == ISD::CondCode::SETGE)) {
17914 if (CCVal == ISD::CondCode::SETGE)
17915 std::swap(TrueV, FalseV);
17916
17917 int64_t TrueSImm = cast<ConstantSDNode>(TrueV)->getSExtValue();
17918 int64_t FalseSImm = cast<ConstantSDNode>(FalseV)->getSExtValue();
17919 // Only handle simm12, if it is not in this range, it can be considered as
17920 // register.
17921 if (isInt<12>(TrueSImm) && isInt<12>(FalseSImm) &&
17922 isInt<12>(TrueSImm - FalseSImm)) {
17923 SDValue SRA =
17924 DAG.getNode(ISD::SRA, DL, VT, LHS,
17925 DAG.getConstant(Subtarget.getXLen() - 1, DL, VT));
17926 SDValue AND =
17927 DAG.getNode(ISD::AND, DL, VT, SRA,
17928 DAG.getSignedConstant(TrueSImm - FalseSImm, DL, VT));
17929 return DAG.getNode(ISD::ADD, DL, VT, AND, FalseV);
17930 }
17931
17932 if (CCVal == ISD::CondCode::SETGE)
17933 std::swap(TrueV, FalseV);
17934 }
17935
17936 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
17937 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
17938 {LHS, RHS, CC, TrueV, FalseV});
17939
17940 if (!Subtarget.hasConditionalMoveFusion()) {
17941 // (select c, -1, y) -> -c | y
17942 if (isAllOnesConstant(TrueV)) {
17943 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17944 SDValue Neg = DAG.getNegative(C, DL, VT);
17945 return DAG.getNode(ISD::OR, DL, VT, Neg, FalseV);
17946 }
17947 // (select c, y, -1) -> -!c | y
17948 if (isAllOnesConstant(FalseV)) {
17949 SDValue C =
17950 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17951 SDValue Neg = DAG.getNegative(C, DL, VT);
17952 return DAG.getNode(ISD::OR, DL, VT, Neg, TrueV);
17953 }
17954
17955 // (select c, 0, y) -> -!c & y
17956 if (isNullConstant(TrueV)) {
17957 SDValue C =
17958 DAG.getSetCC(DL, VT, LHS, RHS, ISD::getSetCCInverse(CCVal, VT));
17959 SDValue Neg = DAG.getNegative(C, DL, VT);
17960 return DAG.getNode(ISD::AND, DL, VT, Neg, FalseV);
17961 }
17962 // (select c, y, 0) -> -c & y
17963 if (isNullConstant(FalseV)) {
17964 SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, CCVal);
17965 SDValue Neg = DAG.getNegative(C, DL, VT);
17966 return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
17967 }
17968 // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
17969 // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
17970 if (((isOneConstant(FalseV) && LHS == TrueV &&
17971 CCVal == ISD::CondCode::SETNE) ||
17972 (isOneConstant(TrueV) && LHS == FalseV &&
17973 CCVal == ISD::CondCode::SETEQ)) &&
17975 // freeze it to be safe.
17976 LHS = DAG.getFreeze(LHS);
17978 return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
17979 }
17980 }
17981
17982 // If both true/false are an xor with 1, pull through the select.
17983 // This can occur after op legalization if both operands are setccs that
17984 // require an xor to invert.
17985 // FIXME: Generalize to other binary ops with identical operand?
17986 if (TrueV.getOpcode() == ISD::XOR && FalseV.getOpcode() == ISD::XOR &&
17987 TrueV.getOperand(1) == FalseV.getOperand(1) &&
17988 isOneConstant(TrueV.getOperand(1)) &&
17989 TrueV.hasOneUse() && FalseV.hasOneUse()) {
17990 SDValue NewSel = DAG.getNode(RISCVISD::SELECT_CC, DL, VT, LHS, RHS, CC,
17991 TrueV.getOperand(0), FalseV.getOperand(0));
17992 return DAG.getNode(ISD::XOR, DL, VT, NewSel, TrueV.getOperand(1));
17993 }
17994
17995 return SDValue();
17996 }
17997 case RISCVISD::BR_CC: {
17998 SDValue LHS = N->getOperand(1);
17999 SDValue RHS = N->getOperand(2);
18000 SDValue CC = N->getOperand(3);
18001 SDLoc DL(N);
18002
18003 if (combine_CC(LHS, RHS, CC, DL, DAG, Subtarget))
18004 return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
18005 N->getOperand(0), LHS, RHS, CC, N->getOperand(4));
18006
18007 return SDValue();
18008 }
18009 case ISD::BITREVERSE:
18010 return performBITREVERSECombine(N, DAG, Subtarget);
18011 case ISD::FP_TO_SINT:
18012 case ISD::FP_TO_UINT:
18013 return performFP_TO_INTCombine(N, DCI, Subtarget);
18016 return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
18017 case ISD::FCOPYSIGN: {
18018 EVT VT = N->getValueType(0);
18019 if (!VT.isVector())
18020 break;
18021 // There is a form of VFSGNJ which injects the negated sign of its second
18022 // operand. Try and bubble any FNEG up after the extend/round to produce
18023 // this optimized pattern. Avoid modifying cases where FP_ROUND and
18024 // TRUNC=1.
18025 SDValue In2 = N->getOperand(1);
18026 // Avoid cases where the extend/round has multiple uses, as duplicating
18027 // those is typically more expensive than removing a fneg.
18028 if (!In2.hasOneUse())
18029 break;
18030 if (In2.getOpcode() != ISD::FP_EXTEND &&
18031 (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
18032 break;
18033 In2 = In2.getOperand(0);
18034 if (In2.getOpcode() != ISD::FNEG)
18035 break;
18036 SDLoc DL(N);
18037 SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
18038 return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
18039 DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
18040 }
18041 case ISD::MGATHER: {
18042 const auto *MGN = cast<MaskedGatherSDNode>(N);
18043 const EVT VT = N->getValueType(0);
18044 SDValue Index = MGN->getIndex();
18045 SDValue ScaleOp = MGN->getScale();
18046 ISD::MemIndexType IndexType = MGN->getIndexType();
18047 assert(!MGN->isIndexScaled() &&
18048 "Scaled gather/scatter should not be formed");
18049
18050 SDLoc DL(N);
18051 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18052 return DAG.getMaskedGather(
18053 N->getVTList(), MGN->getMemoryVT(), DL,
18054 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18055 MGN->getBasePtr(), Index, ScaleOp},
18056 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18057
18058 if (narrowIndex(Index, IndexType, DAG))
18059 return DAG.getMaskedGather(
18060 N->getVTList(), MGN->getMemoryVT(), DL,
18061 {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
18062 MGN->getBasePtr(), Index, ScaleOp},
18063 MGN->getMemOperand(), IndexType, MGN->getExtensionType());
18064
18065 if (Index.getOpcode() == ISD::BUILD_VECTOR &&
18066 MGN->getExtensionType() == ISD::NON_EXTLOAD && isTypeLegal(VT)) {
18067 // The sequence will be XLenVT, not the type of Index. Tell
18068 // isSimpleVIDSequence this so we avoid overflow.
18069 if (std::optional<VIDSequence> SimpleVID =
18070 isSimpleVIDSequence(Index, Subtarget.getXLen());
18071 SimpleVID && SimpleVID->StepDenominator == 1) {
18072 const int64_t StepNumerator = SimpleVID->StepNumerator;
18073 const int64_t Addend = SimpleVID->Addend;
18074
18075 // Note: We don't need to check alignment here since (by assumption
18076 // from the existance of the gather), our offsets must be sufficiently
18077 // aligned.
18078
18079 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
18080 assert(MGN->getBasePtr()->getValueType(0) == PtrVT);
18081 assert(IndexType == ISD::UNSIGNED_SCALED);
18082 SDValue BasePtr = DAG.getNode(ISD::ADD, DL, PtrVT, MGN->getBasePtr(),
18083 DAG.getSignedConstant(Addend, DL, PtrVT));
18084
18085 SDValue EVL = DAG.getElementCount(DL, Subtarget.getXLenVT(),
18087 SDValue StridedLoad = DAG.getStridedLoadVP(
18088 VT, DL, MGN->getChain(), BasePtr,
18089 DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
18090 EVL, MGN->getMemOperand());
18091 SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
18092 StridedLoad, MGN->getPassThru(), EVL);
18093 return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
18094 DL);
18095 }
18096 }
18097
18098 SmallVector<int> ShuffleMask;
18099 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18100 matchIndexAsShuffle(VT, Index, MGN->getMask(), ShuffleMask)) {
18101 SDValue Load = DAG.getMaskedLoad(VT, DL, MGN->getChain(),
18102 MGN->getBasePtr(), DAG.getUNDEF(XLenVT),
18103 MGN->getMask(), DAG.getUNDEF(VT),
18104 MGN->getMemoryVT(), MGN->getMemOperand(),
18106 SDValue Shuffle =
18107 DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
18108 return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
18109 }
18110
18111 if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
18112 matchIndexAsWiderOp(VT, Index, MGN->getMask(),
18113 MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
18114 SmallVector<SDValue> NewIndices;
18115 for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
18116 NewIndices.push_back(Index.getOperand(i));
18117 EVT IndexVT = Index.getValueType()
18118 .getHalfNumVectorElementsVT(*DAG.getContext());
18119 Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
18120
18121 unsigned ElementSize = VT.getScalarStoreSize();
18122 EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
18123 auto EltCnt = VT.getVectorElementCount();
18124 assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
18125 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
18126 EltCnt.divideCoefficientBy(2));
18127 SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
18128 EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
18129 EltCnt.divideCoefficientBy(2));
18130 SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
18131
18132 SDValue Gather =
18133 DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
18134 {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
18135 Index, ScaleOp},
18136 MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
18137 SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
18138 return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
18139 }
18140 break;
18141 }
18142 case ISD::MSCATTER:{
18143 const auto *MSN = cast<MaskedScatterSDNode>(N);
18144 SDValue Index = MSN->getIndex();
18145 SDValue ScaleOp = MSN->getScale();
18146 ISD::MemIndexType IndexType = MSN->getIndexType();
18147 assert(!MSN->isIndexScaled() &&
18148 "Scaled gather/scatter should not be formed");
18149
18150 SDLoc DL(N);
18151 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18152 return DAG.getMaskedScatter(
18153 N->getVTList(), MSN->getMemoryVT(), DL,
18154 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18155 Index, ScaleOp},
18156 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18157
18158 if (narrowIndex(Index, IndexType, DAG))
18159 return DAG.getMaskedScatter(
18160 N->getVTList(), MSN->getMemoryVT(), DL,
18161 {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
18162 Index, ScaleOp},
18163 MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
18164
18165 EVT VT = MSN->getValue()->getValueType(0);
18166 SmallVector<int> ShuffleMask;
18167 if (!MSN->isTruncatingStore() &&
18168 matchIndexAsShuffle(VT, Index, MSN->getMask(), ShuffleMask)) {
18169 SDValue Shuffle = DAG.getVectorShuffle(VT, DL, MSN->getValue(),
18170 DAG.getUNDEF(VT), ShuffleMask);
18171 return DAG.getMaskedStore(MSN->getChain(), DL, Shuffle, MSN->getBasePtr(),
18172 DAG.getUNDEF(XLenVT), MSN->getMask(),
18173 MSN->getMemoryVT(), MSN->getMemOperand(),
18174 ISD::UNINDEXED, false);
18175 }
18176 break;
18177 }
18178 case ISD::VP_GATHER: {
18179 const auto *VPGN = cast<VPGatherSDNode>(N);
18180 SDValue Index = VPGN->getIndex();
18181 SDValue ScaleOp = VPGN->getScale();
18182 ISD::MemIndexType IndexType = VPGN->getIndexType();
18183 assert(!VPGN->isIndexScaled() &&
18184 "Scaled gather/scatter should not be formed");
18185
18186 SDLoc DL(N);
18187 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18188 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18189 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18190 ScaleOp, VPGN->getMask(),
18191 VPGN->getVectorLength()},
18192 VPGN->getMemOperand(), IndexType);
18193
18194 if (narrowIndex(Index, IndexType, DAG))
18195 return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
18196 {VPGN->getChain(), VPGN->getBasePtr(), Index,
18197 ScaleOp, VPGN->getMask(),
18198 VPGN->getVectorLength()},
18199 VPGN->getMemOperand(), IndexType);
18200
18201 break;
18202 }
18203 case ISD::VP_SCATTER: {
18204 const auto *VPSN = cast<VPScatterSDNode>(N);
18205 SDValue Index = VPSN->getIndex();
18206 SDValue ScaleOp = VPSN->getScale();
18207 ISD::MemIndexType IndexType = VPSN->getIndexType();
18208 assert(!VPSN->isIndexScaled() &&
18209 "Scaled gather/scatter should not be formed");
18210
18211 SDLoc DL(N);
18212 if (legalizeScatterGatherIndexType(DL, Index, IndexType, DCI))
18213 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18214 {VPSN->getChain(), VPSN->getValue(),
18215 VPSN->getBasePtr(), Index, ScaleOp,
18216 VPSN->getMask(), VPSN->getVectorLength()},
18217 VPSN->getMemOperand(), IndexType);
18218
18219 if (narrowIndex(Index, IndexType, DAG))
18220 return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
18221 {VPSN->getChain(), VPSN->getValue(),
18222 VPSN->getBasePtr(), Index, ScaleOp,
18223 VPSN->getMask(), VPSN->getVectorLength()},
18224 VPSN->getMemOperand(), IndexType);
18225 break;
18226 }
18227 case RISCVISD::SHL_VL:
18228 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18229 return V;
18230 [[fallthrough]];
18231 case RISCVISD::SRA_VL:
18232 case RISCVISD::SRL_VL: {
18233 SDValue ShAmt = N->getOperand(1);
18235 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18236 SDLoc DL(N);
18237 SDValue VL = N->getOperand(4);
18238 EVT VT = N->getValueType(0);
18239 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18240 ShAmt.getOperand(1), VL);
18241 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
18242 N->getOperand(2), N->getOperand(3), N->getOperand(4));
18243 }
18244 break;
18245 }
18246 case ISD::SRA:
18247 if (SDValue V = performSRACombine(N, DAG, Subtarget))
18248 return V;
18249 [[fallthrough]];
18250 case ISD::SRL:
18251 case ISD::SHL: {
18252 if (N->getOpcode() == ISD::SHL) {
18253 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18254 return V;
18255 }
18256 SDValue ShAmt = N->getOperand(1);
18258 // We don't need the upper 32 bits of a 64-bit element for a shift amount.
18259 SDLoc DL(N);
18260 EVT VT = N->getValueType(0);
18261 ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
18262 ShAmt.getOperand(1),
18263 DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
18264 return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
18265 }
18266 break;
18267 }
18268 case RISCVISD::ADD_VL:
18269 if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
18270 return V;
18271 return combineToVWMACC(N, DAG, Subtarget);
18276 return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
18277 case RISCVISD::SUB_VL:
18278 case RISCVISD::MUL_VL:
18279 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18288 return performVFMADD_VLCombine(N, DCI, Subtarget);
18289 case RISCVISD::FADD_VL:
18290 case RISCVISD::FSUB_VL:
18291 case RISCVISD::FMUL_VL:
18294 return combineOp_VLToVWOp_VL(N, DCI, Subtarget);
18295 case ISD::LOAD:
18296 case ISD::STORE: {
18297 if (DCI.isAfterLegalizeDAG())
18298 if (SDValue V = performMemPairCombine(N, DCI))
18299 return V;
18300
18301 if (N->getOpcode() != ISD::STORE)
18302 break;
18303
18304 auto *Store = cast<StoreSDNode>(N);
18305 SDValue Chain = Store->getChain();
18306 EVT MemVT = Store->getMemoryVT();
18307 SDValue Val = Store->getValue();
18308 SDLoc DL(N);
18309
18310 bool IsScalarizable =
18311 MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
18312 Store->isSimple() &&
18313 MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
18314 isPowerOf2_64(MemVT.getSizeInBits()) &&
18315 MemVT.getSizeInBits() <= Subtarget.getXLen();
18316
18317 // If sufficiently aligned we can scalarize stores of constant vectors of
18318 // any power-of-two size up to XLen bits, provided that they aren't too
18319 // expensive to materialize.
18320 // vsetivli zero, 2, e8, m1, ta, ma
18321 // vmv.v.i v8, 4
18322 // vse64.v v8, (a0)
18323 // ->
18324 // li a1, 1028
18325 // sh a1, 0(a0)
18326 if (DCI.isBeforeLegalize() && IsScalarizable &&
18328 // Get the constant vector bits
18329 APInt NewC(Val.getValueSizeInBits(), 0);
18330 uint64_t EltSize = Val.getScalarValueSizeInBits();
18331 for (unsigned i = 0; i < Val.getNumOperands(); i++) {
18332 if (Val.getOperand(i).isUndef())
18333 continue;
18334 NewC.insertBits(Val.getConstantOperandAPInt(i).trunc(EltSize),
18335 i * EltSize);
18336 }
18337 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18338
18339 if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(), Subtarget,
18340 true) <= 2 &&
18342 NewVT, *Store->getMemOperand())) {
18343 SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
18344 return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
18345 Store->getPointerInfo(), Store->getOriginalAlign(),
18346 Store->getMemOperand()->getFlags());
18347 }
18348 }
18349
18350 // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
18351 // vsetivli zero, 2, e16, m1, ta, ma
18352 // vle16.v v8, (a0)
18353 // vse16.v v8, (a1)
18354 if (auto *L = dyn_cast<LoadSDNode>(Val);
18355 L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
18356 L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
18357 Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
18358 L->getMemoryVT() == MemVT) {
18359 MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
18361 NewVT, *Store->getMemOperand()) &&
18363 NewVT, *L->getMemOperand())) {
18364 SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
18365 L->getPointerInfo(), L->getOriginalAlign(),
18366 L->getMemOperand()->getFlags());
18367 return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
18368 Store->getPointerInfo(), Store->getOriginalAlign(),
18369 Store->getMemOperand()->getFlags());
18370 }
18371 }
18372
18373 // Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
18374 // vfmv.f.s is represented as extract element from 0. Match it late to avoid
18375 // any illegal types.
18376 if (Val.getOpcode() == RISCVISD::VMV_X_S ||
18377 (DCI.isAfterLegalizeDAG() &&
18379 isNullConstant(Val.getOperand(1)))) {
18380 SDValue Src = Val.getOperand(0);
18381 MVT VecVT = Src.getSimpleValueType();
18382 // VecVT should be scalable and memory VT should match the element type.
18383 if (!Store->isIndexed() && VecVT.isScalableVector() &&
18384 MemVT == VecVT.getVectorElementType()) {
18385 SDLoc DL(N);
18386 MVT MaskVT = getMaskTypeFor(VecVT);
18387 return DAG.getStoreVP(
18388 Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
18389 DAG.getConstant(1, DL, MaskVT),
18390 DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
18391 Store->getMemOperand(), Store->getAddressingMode(),
18392 Store->isTruncatingStore(), /*IsCompress*/ false);
18393 }
18394 }
18395
18396 break;
18397 }
18398 case ISD::SPLAT_VECTOR: {
18399 EVT VT = N->getValueType(0);
18400 // Only perform this combine on legal MVT types.
18401 if (!isTypeLegal(VT))
18402 break;
18403 if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
18404 DAG, Subtarget))
18405 return Gather;
18406 break;
18407 }
18408 case ISD::BUILD_VECTOR:
18409 if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
18410 return V;
18411 break;
18413 if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
18414 return V;
18415 break;
18417 if (SDValue V = performVECTOR_SHUFFLECombine(N, DAG, Subtarget, *this))
18418 return V;
18419 break;
18421 if (SDValue V = performINSERT_VECTOR_ELTCombine(N, DAG, Subtarget, *this))
18422 return V;
18423 break;
18424 case RISCVISD::VFMV_V_F_VL: {
18425 const MVT VT = N->getSimpleValueType(0);
18426 SDValue Passthru = N->getOperand(0);
18427 SDValue Scalar = N->getOperand(1);
18428 SDValue VL = N->getOperand(2);
18429
18430 // If VL is 1, we can use vfmv.s.f.
18431 if (isOneConstant(VL))
18432 return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
18433 break;
18434 }
18435 case RISCVISD::VMV_V_X_VL: {
18436 const MVT VT = N->getSimpleValueType(0);
18437 SDValue Passthru = N->getOperand(0);
18438 SDValue Scalar = N->getOperand(1);
18439 SDValue VL = N->getOperand(2);
18440
18441 // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
18442 // scalar input.
18443 unsigned ScalarSize = Scalar.getValueSizeInBits();
18444 unsigned EltWidth = VT.getScalarSizeInBits();
18445 if (ScalarSize > EltWidth && Passthru.isUndef())
18446 if (SimplifyDemandedLowBitsHelper(1, EltWidth))
18447 return SDValue(N, 0);
18448
18449 // If VL is 1 and the scalar value won't benefit from immediate, we can
18450 // use vmv.s.x.
18451 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18452 if (isOneConstant(VL) &&
18453 (!Const || Const->isZero() ||
18454 !Const->getAPIntValue().sextOrTrunc(EltWidth).isSignedIntN(5)))
18455 return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
18456
18457 break;
18458 }
18459 case RISCVISD::VFMV_S_F_VL: {
18460 SDValue Src = N->getOperand(1);
18461 // Try to remove vector->scalar->vector if the scalar->vector is inserting
18462 // into an undef vector.
18463 // TODO: Could use a vslide or vmv.v.v for non-undef.
18464 if (N->getOperand(0).isUndef() &&
18465 Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18466 isNullConstant(Src.getOperand(1)) &&
18467 Src.getOperand(0).getValueType().isScalableVector()) {
18468 EVT VT = N->getValueType(0);
18469 EVT SrcVT = Src.getOperand(0).getValueType();
18471 // Widths match, just return the original vector.
18472 if (SrcVT == VT)
18473 return Src.getOperand(0);
18474 // TODO: Use insert_subvector/extract_subvector to change widen/narrow?
18475 }
18476 [[fallthrough]];
18477 }
18478 case RISCVISD::VMV_S_X_VL: {
18479 const MVT VT = N->getSimpleValueType(0);
18480 SDValue Passthru = N->getOperand(0);
18481 SDValue Scalar = N->getOperand(1);
18482 SDValue VL = N->getOperand(2);
18483
18484 if (Scalar.getOpcode() == RISCVISD::VMV_X_S && Passthru.isUndef() &&
18485 Scalar.getOperand(0).getValueType() == N->getValueType(0))
18486 return Scalar.getOperand(0);
18487
18488 // Use M1 or smaller to avoid over constraining register allocation
18489 const MVT M1VT = getLMUL1VT(VT);
18490 if (M1VT.bitsLT(VT)) {
18491 SDValue M1Passthru =
18492 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Passthru,
18493 DAG.getVectorIdxConstant(0, DL));
18494 SDValue Result =
18495 DAG.getNode(N->getOpcode(), DL, M1VT, M1Passthru, Scalar, VL);
18496 Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, Result,
18497 DAG.getVectorIdxConstant(0, DL));
18498 return Result;
18499 }
18500
18501 // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or
18502 // higher would involve overly constraining the register allocator for
18503 // no purpose.
18504 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
18505 Const && !Const->isZero() && isInt<5>(Const->getSExtValue()) &&
18506 VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef())
18507 return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
18508
18509 break;
18510 }
18511 case RISCVISD::VMV_X_S: {
18512 SDValue Vec = N->getOperand(0);
18513 MVT VecVT = N->getOperand(0).getSimpleValueType();
18514 const MVT M1VT = getLMUL1VT(VecVT);
18515 if (M1VT.bitsLT(VecVT)) {
18516 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
18517 DAG.getVectorIdxConstant(0, DL));
18518 return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
18519 }
18520 break;
18521 }
18525 unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
18526 unsigned IntNo = N->getConstantOperandVal(IntOpNo);
18527 switch (IntNo) {
18528 // By default we do not combine any intrinsic.
18529 default:
18530 return SDValue();
18531 case Intrinsic::riscv_vcpop:
18532 case Intrinsic::riscv_vcpop_mask:
18533 case Intrinsic::riscv_vfirst:
18534 case Intrinsic::riscv_vfirst_mask: {
18535 SDValue VL = N->getOperand(2);
18536 if (IntNo == Intrinsic::riscv_vcpop_mask ||
18537 IntNo == Intrinsic::riscv_vfirst_mask)
18538 VL = N->getOperand(3);
18539 if (!isNullConstant(VL))
18540 return SDValue();
18541 // If VL is 0, vcpop -> li 0, vfirst -> li -1.
18542 SDLoc DL(N);
18543 EVT VT = N->getValueType(0);
18544 if (IntNo == Intrinsic::riscv_vfirst ||
18545 IntNo == Intrinsic::riscv_vfirst_mask)
18546 return DAG.getAllOnesConstant(DL, VT);
18547 return DAG.getConstant(0, DL, VT);
18548 }
18549 }
18550 }
18551 case ISD::EXPERIMENTAL_VP_REVERSE:
18552 return performVP_REVERSECombine(N, DAG, Subtarget);
18553 case ISD::VP_STORE:
18554 return performVP_STORECombine(N, DAG, Subtarget);
18555 case ISD::BITCAST: {
18557 SDValue N0 = N->getOperand(0);
18558 EVT VT = N->getValueType(0);
18559 EVT SrcVT = N0.getValueType();
18560 if (VT.isRISCVVectorTuple() && N0->getOpcode() == ISD::SPLAT_VECTOR) {
18561 unsigned NF = VT.getRISCVVectorTupleNumFields();
18562 unsigned NumScalElts = VT.getSizeInBits().getKnownMinValue() / (NF * 8);
18563 SDValue EltVal = DAG.getConstant(0, DL, Subtarget.getXLenVT());
18564 MVT ScalTy = MVT::getScalableVectorVT(MVT::getIntegerVT(8), NumScalElts);
18565
18566 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, ScalTy, EltVal);
18567
18568 SDValue Result = DAG.getUNDEF(VT);
18569 for (unsigned i = 0; i < NF; ++i)
18570 Result = DAG.getNode(RISCVISD::TUPLE_INSERT, DL, VT, Result, Splat,
18571 DAG.getVectorIdxConstant(i, DL));
18572 return Result;
18573 }
18574 // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
18575 // type, widen both sides to avoid a trip through memory.
18576 if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
18577 VT.isScalarInteger()) {
18578 unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
18579 SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
18580 Ops[0] = N0;
18581 SDLoc DL(N);
18582 N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
18583 N0 = DAG.getBitcast(MVT::i8, N0);
18584 return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
18585 }
18586
18587 return SDValue();
18588 }
18589 case ISD::CTPOP:
18590 if (SDValue V = combineScalarCTPOPToVCPOP(N, DAG, Subtarget))
18591 return V;
18592 break;
18593 }
18594
18595 return SDValue();
18596}
18597
18599 EVT XVT, unsigned KeptBits) const {
18600 // For vectors, we don't have a preference..
18601 if (XVT.isVector())
18602 return false;
18603
18604 if (XVT != MVT::i32 && XVT != MVT::i64)
18605 return false;
18606
18607 // We can use sext.w for RV64 or an srai 31 on RV32.
18608 if (KeptBits == 32 || KeptBits == 64)
18609 return true;
18610
18611 // With Zbb we can use sext.h/sext.b.
18612 return Subtarget.hasStdExtZbb() &&
18613 ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
18614 KeptBits == 16);
18615}
18616
18618 const SDNode *N, CombineLevel Level) const {
18619 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
18620 N->getOpcode() == ISD::SRL) &&
18621 "Expected shift op");
18622
18623 // The following folds are only desirable if `(OP _, c1 << c2)` can be
18624 // materialised in fewer instructions than `(OP _, c1)`:
18625 //
18626 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
18627 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
18628 SDValue N0 = N->getOperand(0);
18629 EVT Ty = N0.getValueType();
18630
18631 // LD/ST will optimize constant Offset extraction, so when AddNode is used by
18632 // LD/ST, it can still complete the folding optimization operation performed
18633 // above.
18634 auto isUsedByLdSt = [](const SDNode *X, const SDNode *User) {
18635 for (SDNode *Use : X->users()) {
18636 // This use is the one we're on right now. Skip it
18637 if (Use == User || Use->getOpcode() == ISD::SELECT)
18638 continue;
18639 if (!isa<StoreSDNode>(Use) && !isa<LoadSDNode>(Use))
18640 return false;
18641 }
18642 return true;
18643 };
18644
18645 if (Ty.isScalarInteger() &&
18646 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
18647 if (N0.getOpcode() == ISD::ADD && !N0->hasOneUse())
18648 return isUsedByLdSt(N0.getNode(), N);
18649
18650 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18651 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
18652
18653 // Bail if we might break a sh{1,2,3}add pattern.
18654 if (Subtarget.hasStdExtZba() && C2 && C2->getZExtValue() >= 1 &&
18655 C2->getZExtValue() <= 3 && N->hasOneUse() &&
18656 N->user_begin()->getOpcode() == ISD::ADD &&
18657 !isUsedByLdSt(*N->user_begin(), nullptr) &&
18658 !isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
18659 return false;
18660
18661 if (C1 && C2) {
18662 const APInt &C1Int = C1->getAPIntValue();
18663 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
18664
18665 // We can materialise `c1 << c2` into an add immediate, so it's "free",
18666 // and the combine should happen, to potentially allow further combines
18667 // later.
18668 if (ShiftedC1Int.getSignificantBits() <= 64 &&
18669 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
18670 return true;
18671
18672 // We can materialise `c1` in an add immediate, so it's "free", and the
18673 // combine should be prevented.
18674 if (C1Int.getSignificantBits() <= 64 &&
18676 return false;
18677
18678 // Neither constant will fit into an immediate, so find materialisation
18679 // costs.
18680 int C1Cost =
18681 RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), Subtarget,
18682 /*CompressionCost*/ true);
18683 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
18684 ShiftedC1Int, Ty.getSizeInBits(), Subtarget,
18685 /*CompressionCost*/ true);
18686
18687 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
18688 // combine should be prevented.
18689 if (C1Cost < ShiftedC1Cost)
18690 return false;
18691 }
18692 }
18693
18694 if (!N0->hasOneUse())
18695 return false;
18696
18697 if (N0->getOpcode() == ISD::SIGN_EXTEND &&
18698 N0->getOperand(0)->getOpcode() == ISD::ADD &&
18699 !N0->getOperand(0)->hasOneUse())
18700 return isUsedByLdSt(N0->getOperand(0).getNode(), N0.getNode());
18701
18702 return true;
18703}
18704
18706 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
18707 TargetLoweringOpt &TLO) const {
18708 // Delay this optimization as late as possible.
18709 if (!TLO.LegalOps)
18710 return false;
18711
18712 EVT VT = Op.getValueType();
18713 if (VT.isVector())
18714 return false;
18715
18716 unsigned Opcode = Op.getOpcode();
18717 if (Opcode != ISD::AND && Opcode != ISD::OR && Opcode != ISD::XOR)
18718 return false;
18719
18720 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18721 if (!C)
18722 return false;
18723
18724 const APInt &Mask = C->getAPIntValue();
18725
18726 // Clear all non-demanded bits initially.
18727 APInt ShrunkMask = Mask & DemandedBits;
18728
18729 // Try to make a smaller immediate by setting undemanded bits.
18730
18731 APInt ExpandedMask = Mask | ~DemandedBits;
18732
18733 auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
18734 return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
18735 };
18736 auto UseMask = [Mask, Op, &TLO](const APInt &NewMask) -> bool {
18737 if (NewMask == Mask)
18738 return true;
18739 SDLoc DL(Op);
18740 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, Op.getValueType());
18741 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
18742 Op.getOperand(0), NewC);
18743 return TLO.CombineTo(Op, NewOp);
18744 };
18745
18746 // If the shrunk mask fits in sign extended 12 bits, let the target
18747 // independent code apply it.
18748 if (ShrunkMask.isSignedIntN(12))
18749 return false;
18750
18751 // And has a few special cases for zext.
18752 if (Opcode == ISD::AND) {
18753 // Preserve (and X, 0xffff), if zext.h exists use zext.h,
18754 // otherwise use SLLI + SRLI.
18755 APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
18756 if (IsLegalMask(NewMask))
18757 return UseMask(NewMask);
18758
18759 // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
18760 if (VT == MVT::i64) {
18761 APInt NewMask = APInt(64, 0xffffffff);
18762 if (IsLegalMask(NewMask))
18763 return UseMask(NewMask);
18764 }
18765 }
18766
18767 // For the remaining optimizations, we need to be able to make a negative
18768 // number through a combination of mask and undemanded bits.
18769 if (!ExpandedMask.isNegative())
18770 return false;
18771
18772 // What is the fewest number of bits we need to represent the negative number.
18773 unsigned MinSignedBits = ExpandedMask.getSignificantBits();
18774
18775 // Try to make a 12 bit negative immediate. If that fails try to make a 32
18776 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
18777 // If we can't create a simm12, we shouldn't change opaque constants.
18778 APInt NewMask = ShrunkMask;
18779 if (MinSignedBits <= 12)
18780 NewMask.setBitsFrom(11);
18781 else if (!C->isOpaque() && MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
18782 NewMask.setBitsFrom(31);
18783 else
18784 return false;
18785
18786 // Check that our new mask is a subset of the demanded mask.
18787 assert(IsLegalMask(NewMask));
18788 return UseMask(NewMask);
18789}
18790
18791static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
18792 static const uint64_t GREVMasks[] = {
18793 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
18794 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
18795
18796 for (unsigned Stage = 0; Stage != 6; ++Stage) {
18797 unsigned Shift = 1 << Stage;
18798 if (ShAmt & Shift) {
18799 uint64_t Mask = GREVMasks[Stage];
18800 uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
18801 if (IsGORC)
18802 Res |= x;
18803 x = Res;
18804 }
18805 }
18806
18807 return x;
18808}
18809
18811 KnownBits &Known,
18812 const APInt &DemandedElts,
18813 const SelectionDAG &DAG,
18814 unsigned Depth) const {
18815 unsigned BitWidth = Known.getBitWidth();
18816 unsigned Opc = Op.getOpcode();
18817 assert((Opc >= ISD::BUILTIN_OP_END ||
18818 Opc == ISD::INTRINSIC_WO_CHAIN ||
18819 Opc == ISD::INTRINSIC_W_CHAIN ||
18820 Opc == ISD::INTRINSIC_VOID) &&
18821 "Should use MaskedValueIsZero if you don't know whether Op"
18822 " is a target node!");
18823
18824 Known.resetAll();
18825 switch (Opc) {
18826 default: break;
18827 case RISCVISD::SELECT_CC: {
18828 Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
18829 // If we don't know any bits, early out.
18830 if (Known.isUnknown())
18831 break;
18832 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
18833
18834 // Only known if known in both the LHS and RHS.
18835 Known = Known.intersectWith(Known2);
18836 break;
18837 }
18840 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18841 // Result is either all zero or operand 0. We can propagate zeros, but not
18842 // ones.
18843 Known.One.clearAllBits();
18844 break;
18845 case RISCVISD::REMUW: {
18846 KnownBits Known2;
18847 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18848 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18849 // We only care about the lower 32 bits.
18850 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
18851 // Restore the original width by sign extending.
18852 Known = Known.sext(BitWidth);
18853 break;
18854 }
18855 case RISCVISD::DIVUW: {
18856 KnownBits Known2;
18857 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18858 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18859 // We only care about the lower 32 bits.
18860 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
18861 // Restore the original width by sign extending.
18862 Known = Known.sext(BitWidth);
18863 break;
18864 }
18865 case RISCVISD::SLLW: {
18866 KnownBits Known2;
18867 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
18868 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
18869 Known = KnownBits::shl(Known.trunc(32), Known2.trunc(5).zext(32));
18870 // Restore the original width by sign extending.
18871 Known = Known.sext(BitWidth);
18872 break;
18873 }
18874 case RISCVISD::CTZW: {
18875 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18876 unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
18877 unsigned LowBits = llvm::bit_width(PossibleTZ);
18878 Known.Zero.setBitsFrom(LowBits);
18879 break;
18880 }
18881 case RISCVISD::CLZW: {
18882 KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18883 unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
18884 unsigned LowBits = llvm::bit_width(PossibleLZ);
18885 Known.Zero.setBitsFrom(LowBits);
18886 break;
18887 }
18888 case RISCVISD::BREV8:
18889 case RISCVISD::ORC_B: {
18890 // FIXME: This is based on the non-ratified Zbp GREV and GORC where a
18891 // control value of 7 is equivalent to brev8 and orc.b.
18892 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
18893 bool IsGORC = Op.getOpcode() == RISCVISD::ORC_B;
18894 // To compute zeros, we need to invert the value and invert it back after.
18895 Known.Zero =
18896 ~computeGREVOrGORC(~Known.Zero.getZExtValue(), 7, IsGORC);
18897 Known.One = computeGREVOrGORC(Known.One.getZExtValue(), 7, IsGORC);
18898 break;
18899 }
18900 case RISCVISD::READ_VLENB: {
18901 // We can use the minimum and maximum VLEN values to bound VLENB. We
18902 // know VLEN must be a power of two.
18903 const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
18904 const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
18905 assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
18906 Known.Zero.setLowBits(Log2_32(MinVLenB));
18907 Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
18908 if (MaxVLenB == MinVLenB)
18909 Known.One.setBit(Log2_32(MinVLenB));
18910 break;
18911 }
18912 case RISCVISD::FCLASS: {
18913 // fclass will only set one of the low 10 bits.
18914 Known.Zero.setBitsFrom(10);
18915 break;
18916 }
18919 unsigned IntNo =
18920 Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
18921 switch (IntNo) {
18922 default:
18923 // We can't do anything for most intrinsics.
18924 break;
18925 case Intrinsic::riscv_vsetvli:
18926 case Intrinsic::riscv_vsetvlimax: {
18927 bool HasAVL = IntNo == Intrinsic::riscv_vsetvli;
18928 unsigned VSEW = Op.getConstantOperandVal(HasAVL + 1);
18929 RISCVII::VLMUL VLMUL =
18930 static_cast<RISCVII::VLMUL>(Op.getConstantOperandVal(HasAVL + 2));
18931 unsigned SEW = RISCVVType::decodeVSEW(VSEW);
18932 auto [LMul, Fractional] = RISCVVType::decodeVLMUL(VLMUL);
18933 uint64_t MaxVL = Subtarget.getRealMaxVLen() / SEW;
18934 MaxVL = (Fractional) ? MaxVL / LMul : MaxVL * LMul;
18935
18936 // Result of vsetvli must be not larger than AVL.
18937 if (HasAVL && isa<ConstantSDNode>(Op.getOperand(1)))
18938 MaxVL = std::min(MaxVL, Op.getConstantOperandVal(1));
18939
18940 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
18941 if (BitWidth > KnownZeroFirstBit)
18942 Known.Zero.setBitsFrom(KnownZeroFirstBit);
18943 break;
18944 }
18945 }
18946 break;
18947 }
18948 }
18949}
18950
18952 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
18953 unsigned Depth) const {
18954 switch (Op.getOpcode()) {
18955 default:
18956 break;
18957 case RISCVISD::SELECT_CC: {
18958 unsigned Tmp =
18959 DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
18960 if (Tmp == 1) return 1; // Early out.
18961 unsigned Tmp2 =
18962 DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
18963 return std::min(Tmp, Tmp2);
18964 }
18967 // Output is either all zero or operand 0. We can propagate sign bit count
18968 // from operand 0.
18969 return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18970 case RISCVISD::ABSW: {
18971 // We expand this at isel to negw+max. The result will have 33 sign bits
18972 // if the input has at least 33 sign bits.
18973 unsigned Tmp =
18974 DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
18975 if (Tmp < 33) return 1;
18976 return 33;
18977 }
18978 case RISCVISD::SLLW:
18979 case RISCVISD::SRAW:
18980 case RISCVISD::SRLW:
18981 case RISCVISD::DIVW:
18982 case RISCVISD::DIVUW:
18983 case RISCVISD::REMUW:
18984 case RISCVISD::ROLW:
18985 case RISCVISD::RORW:
18990 // TODO: As the result is sign-extended, this is conservatively correct. A
18991 // more precise answer could be calculated for SRAW depending on known
18992 // bits in the shift amount.
18993 return 33;
18994 case RISCVISD::VMV_X_S: {
18995 // The number of sign bits of the scalar result is computed by obtaining the
18996 // element type of the input vector operand, subtracting its width from the
18997 // XLEN, and then adding one (sign bit within the element type). If the
18998 // element type is wider than XLen, the least-significant XLEN bits are
18999 // taken.
19000 unsigned XLen = Subtarget.getXLen();
19001 unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
19002 if (EltBits <= XLen)
19003 return XLen - EltBits + 1;
19004 break;
19005 }
19007 unsigned IntNo = Op.getConstantOperandVal(1);
19008 switch (IntNo) {
19009 default:
19010 break;
19011 case Intrinsic::riscv_masked_atomicrmw_xchg_i64:
19012 case Intrinsic::riscv_masked_atomicrmw_add_i64:
19013 case Intrinsic::riscv_masked_atomicrmw_sub_i64:
19014 case Intrinsic::riscv_masked_atomicrmw_nand_i64:
19015 case Intrinsic::riscv_masked_atomicrmw_max_i64:
19016 case Intrinsic::riscv_masked_atomicrmw_min_i64:
19017 case Intrinsic::riscv_masked_atomicrmw_umax_i64:
19018 case Intrinsic::riscv_masked_atomicrmw_umin_i64:
19019 case Intrinsic::riscv_masked_cmpxchg_i64:
19020 // riscv_masked_{atomicrmw_*,cmpxchg} intrinsics represent an emulated
19021 // narrow atomic operation. These are implemented using atomic
19022 // operations at the minimum supported atomicrmw/cmpxchg width whose
19023 // result is then sign extended to XLEN. With +A, the minimum width is
19024 // 32 for both 64 and 32.
19025 assert(Subtarget.getXLen() == 64);
19027 assert(Subtarget.hasStdExtA());
19028 return 33;
19029 }
19030 break;
19031 }
19032 }
19033
19034 return 1;
19035}
19036
19038 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
19039 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
19040
19041 // TODO: Add more target nodes.
19042 switch (Op.getOpcode()) {
19044 // Integer select_cc cannot create poison.
19045 // TODO: What are the FP poison semantics?
19046 // TODO: This instruction blocks poison from the unselected operand, can
19047 // we do anything with that?
19048 return !Op.getValueType().isInteger();
19049 }
19051 Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);
19052}
19053
19054const Constant *
19056 assert(Ld && "Unexpected null LoadSDNode");
19057 if (!ISD::isNormalLoad(Ld))
19058 return nullptr;
19059
19060 SDValue Ptr = Ld->getBasePtr();
19061
19062 // Only constant pools with no offset are supported.
19063 auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
19064 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
19065 if (!CNode || CNode->isMachineConstantPoolEntry() ||
19066 CNode->getOffset() != 0)
19067 return nullptr;
19068
19069 return CNode;
19070 };
19071
19072 // Simple case, LLA.
19073 if (Ptr.getOpcode() == RISCVISD::LLA) {
19074 auto *CNode = GetSupportedConstantPool(Ptr);
19075 if (!CNode || CNode->getTargetFlags() != 0)
19076 return nullptr;
19077
19078 return CNode->getConstVal();
19079 }
19080
19081 // Look for a HI and ADD_LO pair.
19082 if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
19083 Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
19084 return nullptr;
19085
19086 auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
19087 auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
19088
19089 if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
19090 !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
19091 return nullptr;
19092
19093 if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
19094 return nullptr;
19095
19096 return CNodeLo->getConstVal();
19097}
19098
19100 MachineBasicBlock *BB) {
19101 assert(MI.getOpcode() == RISCV::ReadCounterWide && "Unexpected instruction");
19102
19103 // To read a 64-bit counter CSR on a 32-bit target, we read the two halves.
19104 // Should the count have wrapped while it was being read, we need to try
19105 // again.
19106 // For example:
19107 // ```
19108 // read:
19109 // csrrs x3, counterh # load high word of counter
19110 // csrrs x2, counter # load low word of counter
19111 // csrrs x4, counterh # load high word of counter
19112 // bne x3, x4, read # check if high word reads match, otherwise try again
19113 // ```
19114
19115 MachineFunction &MF = *BB->getParent();
19116 const BasicBlock *LLVMBB = BB->getBasicBlock();
19118
19119 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVMBB);
19120 MF.insert(It, LoopMBB);
19121
19122 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVMBB);
19123 MF.insert(It, DoneMBB);
19124
19125 // Transfer the remainder of BB and its successor edges to DoneMBB.
19126 DoneMBB->splice(DoneMBB->begin(), BB,
19127 std::next(MachineBasicBlock::iterator(MI)), BB->end());
19129
19130 BB->addSuccessor(LoopMBB);
19131
19133 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
19134 Register LoReg = MI.getOperand(0).getReg();
19135 Register HiReg = MI.getOperand(1).getReg();
19136 int64_t LoCounter = MI.getOperand(2).getImm();
19137 int64_t HiCounter = MI.getOperand(3).getImm();
19138 DebugLoc DL = MI.getDebugLoc();
19139
19141 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
19142 .addImm(HiCounter)
19143 .addReg(RISCV::X0);
19144 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
19145 .addImm(LoCounter)
19146 .addReg(RISCV::X0);
19147 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
19148 .addImm(HiCounter)
19149 .addReg(RISCV::X0);
19150
19151 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
19152 .addReg(HiReg)
19153 .addReg(ReadAgainReg)
19154 .addMBB(LoopMBB);
19155
19156 LoopMBB->addSuccessor(LoopMBB);
19157 LoopMBB->addSuccessor(DoneMBB);
19158
19159 MI.eraseFromParent();
19160
19161 return DoneMBB;
19162}
19163
19166 const RISCVSubtarget &Subtarget) {
19167 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
19168
19169 MachineFunction &MF = *BB->getParent();
19170 DebugLoc DL = MI.getDebugLoc();
19173 Register LoReg = MI.getOperand(0).getReg();
19174 Register HiReg = MI.getOperand(1).getReg();
19175 Register SrcReg = MI.getOperand(2).getReg();
19176
19177 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
19178 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19179
19180 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
19181 RI, Register());
19183 MachineMemOperand *MMOLo =
19187 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
19188 .addFrameIndex(FI)
19189 .addImm(0)
19190 .addMemOperand(MMOLo);
19191 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
19192 .addFrameIndex(FI)
19193 .addImm(4)
19194 .addMemOperand(MMOHi);
19195 MI.eraseFromParent(); // The pseudo instruction is gone now.
19196 return BB;
19197}
19198
19201 const RISCVSubtarget &Subtarget) {
19202 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
19203 "Unexpected instruction");
19204
19205 MachineFunction &MF = *BB->getParent();
19206 DebugLoc DL = MI.getDebugLoc();
19209 Register DstReg = MI.getOperand(0).getReg();
19210 Register LoReg = MI.getOperand(1).getReg();
19211 Register HiReg = MI.getOperand(2).getReg();
19212
19213 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
19214 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
19215
19217 MachineMemOperand *MMOLo =
19221 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19222 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
19223 .addFrameIndex(FI)
19224 .addImm(0)
19225 .addMemOperand(MMOLo);
19226 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
19227 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
19228 .addFrameIndex(FI)
19229 .addImm(4)
19230 .addMemOperand(MMOHi);
19231 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI, Register());
19232 MI.eraseFromParent(); // The pseudo instruction is gone now.
19233 return BB;
19234}
19235
19237 switch (MI.getOpcode()) {
19238 default:
19239 return false;
19240 case RISCV::Select_GPR_Using_CC_GPR:
19241 case RISCV::Select_GPR_Using_CC_Imm:
19242 case RISCV::Select_FPR16_Using_CC_GPR:
19243 case RISCV::Select_FPR16INX_Using_CC_GPR:
19244 case RISCV::Select_FPR32_Using_CC_GPR:
19245 case RISCV::Select_FPR32INX_Using_CC_GPR:
19246 case RISCV::Select_FPR64_Using_CC_GPR:
19247 case RISCV::Select_FPR64INX_Using_CC_GPR:
19248 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19249 return true;
19250 }
19251}
19252
19254 unsigned RelOpcode, unsigned EqOpcode,
19255 const RISCVSubtarget &Subtarget) {
19256 DebugLoc DL = MI.getDebugLoc();
19257 Register DstReg = MI.getOperand(0).getReg();
19258 Register Src1Reg = MI.getOperand(1).getReg();
19259 Register Src2Reg = MI.getOperand(2).getReg();
19261 Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19263
19264 // Save the current FFLAGS.
19265 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
19266
19267 auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
19268 .addReg(Src1Reg)
19269 .addReg(Src2Reg);
19272
19273 // Restore the FFLAGS.
19274 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19275 .addReg(SavedFFlags, RegState::Kill);
19276
19277 // Issue a dummy FEQ opcode to raise exception for signaling NaNs.
19278 auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
19279 .addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
19280 .addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
19283
19284 // Erase the pseudoinstruction.
19285 MI.eraseFromParent();
19286 return BB;
19287}
19288
19289static MachineBasicBlock *
19291 MachineBasicBlock *ThisMBB,
19292 const RISCVSubtarget &Subtarget) {
19293 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
19294 // Without this, custom-inserter would have generated:
19295 //
19296 // A
19297 // | \
19298 // | B
19299 // | /
19300 // C
19301 // | \
19302 // | D
19303 // | /
19304 // E
19305 //
19306 // A: X = ...; Y = ...
19307 // B: empty
19308 // C: Z = PHI [X, A], [Y, B]
19309 // D: empty
19310 // E: PHI [X, C], [Z, D]
19311 //
19312 // If we lower both Select_FPRX_ in a single step, we can instead generate:
19313 //
19314 // A
19315 // | \
19316 // | C
19317 // | /|
19318 // |/ |
19319 // | |
19320 // | D
19321 // | /
19322 // E
19323 //
19324 // A: X = ...; Y = ...
19325 // D: empty
19326 // E: PHI [X, A], [X, C], [Y, D]
19327
19328 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19329 const DebugLoc &DL = First.getDebugLoc();
19330 const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
19331 MachineFunction *F = ThisMBB->getParent();
19332 MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
19333 MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
19334 MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
19335 MachineFunction::iterator It = ++ThisMBB->getIterator();
19336 F->insert(It, FirstMBB);
19337 F->insert(It, SecondMBB);
19338 F->insert(It, SinkMBB);
19339
19340 // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
19341 SinkMBB->splice(SinkMBB->begin(), ThisMBB,
19343 ThisMBB->end());
19344 SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
19345
19346 // Fallthrough block for ThisMBB.
19347 ThisMBB->addSuccessor(FirstMBB);
19348 // Fallthrough block for FirstMBB.
19349 FirstMBB->addSuccessor(SecondMBB);
19350 ThisMBB->addSuccessor(SinkMBB);
19351 FirstMBB->addSuccessor(SinkMBB);
19352 // This is fallthrough.
19353 SecondMBB->addSuccessor(SinkMBB);
19354
19355 auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
19356 Register FLHS = First.getOperand(1).getReg();
19357 Register FRHS = First.getOperand(2).getReg();
19358 // Insert appropriate branch.
19359 BuildMI(FirstMBB, DL, TII.getBrCond(FirstCC))
19360 .addReg(FLHS)
19361 .addReg(FRHS)
19362 .addMBB(SinkMBB);
19363
19364 Register SLHS = Second.getOperand(1).getReg();
19365 Register SRHS = Second.getOperand(2).getReg();
19366 Register Op1Reg4 = First.getOperand(4).getReg();
19367 Register Op1Reg5 = First.getOperand(5).getReg();
19368
19369 auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
19370 // Insert appropriate branch.
19371 BuildMI(ThisMBB, DL, TII.getBrCond(SecondCC))
19372 .addReg(SLHS)
19373 .addReg(SRHS)
19374 .addMBB(SinkMBB);
19375
19376 Register DestReg = Second.getOperand(0).getReg();
19377 Register Op2Reg4 = Second.getOperand(4).getReg();
19378 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
19379 .addReg(Op2Reg4)
19380 .addMBB(ThisMBB)
19381 .addReg(Op1Reg4)
19382 .addMBB(FirstMBB)
19383 .addReg(Op1Reg5)
19384 .addMBB(SecondMBB);
19385
19386 // Now remove the Select_FPRX_s.
19387 First.eraseFromParent();
19388 Second.eraseFromParent();
19389 return SinkMBB;
19390}
19391
19394 const RISCVSubtarget &Subtarget) {
19395 // To "insert" Select_* instructions, we actually have to insert the triangle
19396 // control-flow pattern. The incoming instructions know the destination vreg
19397 // to set, the condition code register to branch on, the true/false values to
19398 // select between, and the condcode to use to select the appropriate branch.
19399 //
19400 // We produce the following control flow:
19401 // HeadMBB
19402 // | \
19403 // | IfFalseMBB
19404 // | /
19405 // TailMBB
19406 //
19407 // When we find a sequence of selects we attempt to optimize their emission
19408 // by sharing the control flow. Currently we only handle cases where we have
19409 // multiple selects with the exact same condition (same LHS, RHS and CC).
19410 // The selects may be interleaved with other instructions if the other
19411 // instructions meet some requirements we deem safe:
19412 // - They are not pseudo instructions.
19413 // - They are debug instructions. Otherwise,
19414 // - They do not have side-effects, do not access memory and their inputs do
19415 // not depend on the results of the select pseudo-instructions.
19416 // The TrueV/FalseV operands of the selects cannot depend on the result of
19417 // previous selects in the sequence.
19418 // These conditions could be further relaxed. See the X86 target for a
19419 // related approach and more information.
19420 //
19421 // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
19422 // is checked here and handled by a separate function -
19423 // EmitLoweredCascadedSelect.
19424
19425 auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
19426 if ((MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR &&
19427 MI.getOpcode() != RISCV::Select_GPR_Using_CC_Imm) &&
19428 Next != BB->end() && Next->getOpcode() == MI.getOpcode() &&
19429 Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
19430 Next->getOperand(5).isKill())
19431 return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
19432
19433 Register LHS = MI.getOperand(1).getReg();
19434 Register RHS;
19435 if (MI.getOperand(2).isReg())
19436 RHS = MI.getOperand(2).getReg();
19437 auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
19438
19439 SmallVector<MachineInstr *, 4> SelectDebugValues;
19440 SmallSet<Register, 4> SelectDests;
19441 SelectDests.insert(MI.getOperand(0).getReg());
19442
19443 MachineInstr *LastSelectPseudo = &MI;
19444 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
19445 SequenceMBBI != E; ++SequenceMBBI) {
19446 if (SequenceMBBI->isDebugInstr())
19447 continue;
19448 if (isSelectPseudo(*SequenceMBBI)) {
19449 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
19450 !SequenceMBBI->getOperand(2).isReg() ||
19451 SequenceMBBI->getOperand(2).getReg() != RHS ||
19452 SequenceMBBI->getOperand(3).getImm() != CC ||
19453 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
19454 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
19455 break;
19456 LastSelectPseudo = &*SequenceMBBI;
19457 SequenceMBBI->collectDebugValues(SelectDebugValues);
19458 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
19459 continue;
19460 }
19461 if (SequenceMBBI->hasUnmodeledSideEffects() ||
19462 SequenceMBBI->mayLoadOrStore() ||
19463 SequenceMBBI->usesCustomInsertionHook())
19464 break;
19465 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
19466 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
19467 }))
19468 break;
19469 }
19470
19471 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19472 const BasicBlock *LLVM_BB = BB->getBasicBlock();
19473 DebugLoc DL = MI.getDebugLoc();
19475
19476 MachineBasicBlock *HeadMBB = BB;
19477 MachineFunction *F = BB->getParent();
19478 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
19479 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
19480
19481 F->insert(I, IfFalseMBB);
19482 F->insert(I, TailMBB);
19483
19484 // Set the call frame size on entry to the new basic blocks.
19485 unsigned CallFrameSize = TII.getCallFrameSizeAt(*LastSelectPseudo);
19486 IfFalseMBB->setCallFrameSize(CallFrameSize);
19487 TailMBB->setCallFrameSize(CallFrameSize);
19488
19489 // Transfer debug instructions associated with the selects to TailMBB.
19490 for (MachineInstr *DebugInstr : SelectDebugValues) {
19491 TailMBB->push_back(DebugInstr->removeFromParent());
19492 }
19493
19494 // Move all instructions after the sequence to TailMBB.
19495 TailMBB->splice(TailMBB->end(), HeadMBB,
19496 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
19497 // Update machine-CFG edges by transferring all successors of the current
19498 // block to the new block which will contain the Phi nodes for the selects.
19499 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
19500 // Set the successors for HeadMBB.
19501 HeadMBB->addSuccessor(IfFalseMBB);
19502 HeadMBB->addSuccessor(TailMBB);
19503
19504 // Insert appropriate branch.
19505 if (MI.getOperand(2).isImm())
19506 BuildMI(HeadMBB, DL, TII.getBrCond(CC, MI.getOperand(2).isImm()))
19507 .addReg(LHS)
19508 .addImm(MI.getOperand(2).getImm())
19509 .addMBB(TailMBB);
19510 else
19511 BuildMI(HeadMBB, DL, TII.getBrCond(CC))
19512 .addReg(LHS)
19513 .addReg(RHS)
19514 .addMBB(TailMBB);
19515
19516 // IfFalseMBB just falls through to TailMBB.
19517 IfFalseMBB->addSuccessor(TailMBB);
19518
19519 // Create PHIs for all of the select pseudo-instructions.
19520 auto SelectMBBI = MI.getIterator();
19521 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
19522 auto InsertionPoint = TailMBB->begin();
19523 while (SelectMBBI != SelectEnd) {
19524 auto Next = std::next(SelectMBBI);
19525 if (isSelectPseudo(*SelectMBBI)) {
19526 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
19527 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
19528 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
19529 .addReg(SelectMBBI->getOperand(4).getReg())
19530 .addMBB(HeadMBB)
19531 .addReg(SelectMBBI->getOperand(5).getReg())
19532 .addMBB(IfFalseMBB);
19533 SelectMBBI->eraseFromParent();
19534 }
19535 SelectMBBI = Next;
19536 }
19537
19538 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
19539 return TailMBB;
19540}
19541
19542// Helper to find Masked Pseudo instruction from MC instruction, LMUL and SEW.
19543static const RISCV::RISCVMaskedPseudoInfo *
19544lookupMaskedIntrinsic(uint16_t MCOpcode, RISCVII::VLMUL LMul, unsigned SEW) {
19546 RISCVVInversePseudosTable::getBaseInfo(MCOpcode, LMul, SEW);
19547 assert(Inverse && "Unexpected LMUL and SEW pair for instruction");
19549 RISCV::lookupMaskedIntrinsicByUnmasked(Inverse->Pseudo);
19550 assert(Masked && "Could not find masked instruction for LMUL and SEW pair");
19551 return Masked;
19552}
19553
19556 unsigned CVTXOpc) {
19557 DebugLoc DL = MI.getDebugLoc();
19558
19560
19562 Register SavedFFLAGS = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19563
19564 // Save the old value of FFLAGS.
19565 BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFLAGS);
19566
19567 assert(MI.getNumOperands() == 7);
19568
19569 // Emit a VFCVT_X_F
19570 const TargetRegisterInfo *TRI =
19572 const TargetRegisterClass *RC = MI.getRegClassConstraint(0, &TII, TRI);
19573 Register Tmp = MRI.createVirtualRegister(RC);
19574 BuildMI(*BB, MI, DL, TII.get(CVTXOpc), Tmp)
19575 .add(MI.getOperand(1))
19576 .add(MI.getOperand(2))
19577 .add(MI.getOperand(3))
19578 .add(MachineOperand::CreateImm(7)) // frm = DYN
19579 .add(MI.getOperand(4))
19580 .add(MI.getOperand(5))
19581 .add(MI.getOperand(6))
19582 .add(MachineOperand::CreateReg(RISCV::FRM,
19583 /*IsDef*/ false,
19584 /*IsImp*/ true));
19585
19586 // Emit a VFCVT_F_X
19587 RISCVII::VLMUL LMul = RISCVII::getLMul(MI.getDesc().TSFlags);
19588 unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MI.getDesc())).getImm();
19589 // There is no E8 variant for VFCVT_F_X.
19590 assert(Log2SEW >= 4);
19591 unsigned CVTFOpc =
19592 lookupMaskedIntrinsic(RISCV::VFCVT_F_X_V, LMul, 1 << Log2SEW)
19593 ->MaskedPseudo;
19594
19595 BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
19596 .add(MI.getOperand(0))
19597 .add(MI.getOperand(1))
19598 .addReg(Tmp)
19599 .add(MI.getOperand(3))
19600 .add(MachineOperand::CreateImm(7)) // frm = DYN
19601 .add(MI.getOperand(4))
19602 .add(MI.getOperand(5))
19603 .add(MI.getOperand(6))
19604 .add(MachineOperand::CreateReg(RISCV::FRM,
19605 /*IsDef*/ false,
19606 /*IsImp*/ true));
19607
19608 // Restore FFLAGS.
19609 BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
19610 .addReg(SavedFFLAGS, RegState::Kill);
19611
19612 // Erase the pseudoinstruction.
19613 MI.eraseFromParent();
19614 return BB;
19615}
19616
19618 const RISCVSubtarget &Subtarget) {
19619 unsigned CmpOpc, F2IOpc, I2FOpc, FSGNJOpc, FSGNJXOpc;
19620 const TargetRegisterClass *RC;
19621 switch (MI.getOpcode()) {
19622 default:
19623 llvm_unreachable("Unexpected opcode");
19624 case RISCV::PseudoFROUND_H:
19625 CmpOpc = RISCV::FLT_H;
19626 F2IOpc = RISCV::FCVT_W_H;
19627 I2FOpc = RISCV::FCVT_H_W;
19628 FSGNJOpc = RISCV::FSGNJ_H;
19629 FSGNJXOpc = RISCV::FSGNJX_H;
19630 RC = &RISCV::FPR16RegClass;
19631 break;
19632 case RISCV::PseudoFROUND_H_INX:
19633 CmpOpc = RISCV::FLT_H_INX;
19634 F2IOpc = RISCV::FCVT_W_H_INX;
19635 I2FOpc = RISCV::FCVT_H_W_INX;
19636 FSGNJOpc = RISCV::FSGNJ_H_INX;
19637 FSGNJXOpc = RISCV::FSGNJX_H_INX;
19638 RC = &RISCV::GPRF16RegClass;
19639 break;
19640 case RISCV::PseudoFROUND_S:
19641 CmpOpc = RISCV::FLT_S;
19642 F2IOpc = RISCV::FCVT_W_S;
19643 I2FOpc = RISCV::FCVT_S_W;
19644 FSGNJOpc = RISCV::FSGNJ_S;
19645 FSGNJXOpc = RISCV::FSGNJX_S;
19646 RC = &RISCV::FPR32RegClass;
19647 break;
19648 case RISCV::PseudoFROUND_S_INX:
19649 CmpOpc = RISCV::FLT_S_INX;
19650 F2IOpc = RISCV::FCVT_W_S_INX;
19651 I2FOpc = RISCV::FCVT_S_W_INX;
19652 FSGNJOpc = RISCV::FSGNJ_S_INX;
19653 FSGNJXOpc = RISCV::FSGNJX_S_INX;
19654 RC = &RISCV::GPRF32RegClass;
19655 break;
19656 case RISCV::PseudoFROUND_D:
19657 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19658 CmpOpc = RISCV::FLT_D;
19659 F2IOpc = RISCV::FCVT_L_D;
19660 I2FOpc = RISCV::FCVT_D_L;
19661 FSGNJOpc = RISCV::FSGNJ_D;
19662 FSGNJXOpc = RISCV::FSGNJX_D;
19663 RC = &RISCV::FPR64RegClass;
19664 break;
19665 case RISCV::PseudoFROUND_D_INX:
19666 assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
19667 CmpOpc = RISCV::FLT_D_INX;
19668 F2IOpc = RISCV::FCVT_L_D_INX;
19669 I2FOpc = RISCV::FCVT_D_L_INX;
19670 FSGNJOpc = RISCV::FSGNJ_D_INX;
19671 FSGNJXOpc = RISCV::FSGNJX_D_INX;
19672 RC = &RISCV::GPRRegClass;
19673 break;
19674 }
19675
19676 const BasicBlock *BB = MBB->getBasicBlock();
19677 DebugLoc DL = MI.getDebugLoc();
19679
19681 MachineBasicBlock *CvtMBB = F->CreateMachineBasicBlock(BB);
19682 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB);
19683
19684 F->insert(I, CvtMBB);
19685 F->insert(I, DoneMBB);
19686 // Move all instructions after the sequence to DoneMBB.
19687 DoneMBB->splice(DoneMBB->end(), MBB, MachineBasicBlock::iterator(MI),
19688 MBB->end());
19689 // Update machine-CFG edges by transferring all successors of the current
19690 // block to the new block which will contain the Phi nodes for the selects.
19692 // Set the successors for MBB.
19693 MBB->addSuccessor(CvtMBB);
19694 MBB->addSuccessor(DoneMBB);
19695
19696 Register DstReg = MI.getOperand(0).getReg();
19697 Register SrcReg = MI.getOperand(1).getReg();
19698 Register MaxReg = MI.getOperand(2).getReg();
19699 int64_t FRM = MI.getOperand(3).getImm();
19700
19701 const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
19703
19704 Register FabsReg = MRI.createVirtualRegister(RC);
19705 BuildMI(MBB, DL, TII.get(FSGNJXOpc), FabsReg).addReg(SrcReg).addReg(SrcReg);
19706
19707 // Compare the FP value to the max value.
19708 Register CmpReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19709 auto MIB =
19710 BuildMI(MBB, DL, TII.get(CmpOpc), CmpReg).addReg(FabsReg).addReg(MaxReg);
19713
19714 // Insert branch.
19715 BuildMI(MBB, DL, TII.get(RISCV::BEQ))
19716 .addReg(CmpReg)
19717 .addReg(RISCV::X0)
19718 .addMBB(DoneMBB);
19719
19720 CvtMBB->addSuccessor(DoneMBB);
19721
19722 // Convert to integer.
19723 Register F2IReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
19724 MIB = BuildMI(CvtMBB, DL, TII.get(F2IOpc), F2IReg).addReg(SrcReg).addImm(FRM);
19727
19728 // Convert back to FP.
19729 Register I2FReg = MRI.createVirtualRegister(RC);
19730 MIB = BuildMI(CvtMBB, DL, TII.get(I2FOpc), I2FReg).addReg(F2IReg).addImm(FRM);
19733
19734 // Restore the sign bit.
19735 Register CvtReg = MRI.createVirtualRegister(RC);
19736 BuildMI(CvtMBB, DL, TII.get(FSGNJOpc), CvtReg).addReg(I2FReg).addReg(SrcReg);
19737
19738 // Merge the results.
19739 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(RISCV::PHI), DstReg)
19740 .addReg(SrcReg)
19741 .addMBB(MBB)
19742 .addReg(CvtReg)
19743 .addMBB(CvtMBB);
19744
19745 MI.eraseFromParent();
19746 return DoneMBB;
19747}
19748
19751 MachineBasicBlock *BB) const {
19752 switch (MI.getOpcode()) {
19753 default:
19754 llvm_unreachable("Unexpected instr type to insert");
19755 case RISCV::ReadCounterWide:
19756 assert(!Subtarget.is64Bit() &&
19757 "ReadCounterWide is only to be used on riscv32");
19758 return emitReadCounterWidePseudo(MI, BB);
19759 case RISCV::Select_GPR_Using_CC_GPR:
19760 case RISCV::Select_GPR_Using_CC_Imm:
19761 case RISCV::Select_FPR16_Using_CC_GPR:
19762 case RISCV::Select_FPR16INX_Using_CC_GPR:
19763 case RISCV::Select_FPR32_Using_CC_GPR:
19764 case RISCV::Select_FPR32INX_Using_CC_GPR:
19765 case RISCV::Select_FPR64_Using_CC_GPR:
19766 case RISCV::Select_FPR64INX_Using_CC_GPR:
19767 case RISCV::Select_FPR64IN32X_Using_CC_GPR:
19768 return emitSelectPseudo(MI, BB, Subtarget);
19769 case RISCV::BuildPairF64Pseudo:
19770 return emitBuildPairF64Pseudo(MI, BB, Subtarget);
19771 case RISCV::SplitF64Pseudo:
19772 return emitSplitF64Pseudo(MI, BB, Subtarget);
19773 case RISCV::PseudoQuietFLE_H:
19774 return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
19775 case RISCV::PseudoQuietFLE_H_INX:
19776 return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
19777 case RISCV::PseudoQuietFLT_H:
19778 return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
19779 case RISCV::PseudoQuietFLT_H_INX:
19780 return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
19781 case RISCV::PseudoQuietFLE_S:
19782 return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
19783 case RISCV::PseudoQuietFLE_S_INX:
19784 return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
19785 case RISCV::PseudoQuietFLT_S:
19786 return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
19787 case RISCV::PseudoQuietFLT_S_INX:
19788 return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
19789 case RISCV::PseudoQuietFLE_D:
19790 return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
19791 case RISCV::PseudoQuietFLE_D_INX:
19792 return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
19793 case RISCV::PseudoQuietFLE_D_IN32X:
19794 return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
19795 Subtarget);
19796 case RISCV::PseudoQuietFLT_D:
19797 return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
19798 case RISCV::PseudoQuietFLT_D_INX:
19799 return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
19800 case RISCV::PseudoQuietFLT_D_IN32X:
19801 return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
19802 Subtarget);
19803
19804 case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
19805 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
19806 case RISCV::PseudoVFROUND_NOEXCEPT_V_M2_MASK:
19807 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
19808 case RISCV::PseudoVFROUND_NOEXCEPT_V_M4_MASK:
19809 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
19810 case RISCV::PseudoVFROUND_NOEXCEPT_V_M8_MASK:
19811 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
19812 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF2_MASK:
19813 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
19814 case RISCV::PseudoVFROUND_NOEXCEPT_V_MF4_MASK:
19815 return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
19816 case RISCV::PseudoFROUND_H:
19817 case RISCV::PseudoFROUND_H_INX:
19818 case RISCV::PseudoFROUND_S:
19819 case RISCV::PseudoFROUND_S_INX:
19820 case RISCV::PseudoFROUND_D:
19821 case RISCV::PseudoFROUND_D_INX:
19822 case RISCV::PseudoFROUND_D_IN32X:
19823 return emitFROUND(MI, BB, Subtarget);
19824 case RISCV::PROBED_STACKALLOC_DYN:
19825 return emitDynamicProbedAlloc(MI, BB);
19826 case TargetOpcode::STATEPOINT:
19827 // STATEPOINT is a pseudo instruction which has no implicit defs/uses
19828 // while jal call instruction (where statepoint will be lowered at the end)
19829 // has implicit def. This def is early-clobber as it will be set at
19830 // the moment of the call and earlier than any use is read.
19831 // Add this implicit dead def here as a workaround.
19832 MI.addOperand(*MI.getMF(),
19834 RISCV::X1, /*isDef*/ true,
19835 /*isImp*/ true, /*isKill*/ false, /*isDead*/ true,
19836 /*isUndef*/ false, /*isEarlyClobber*/ true));
19837 [[fallthrough]];
19838 case TargetOpcode::STACKMAP:
19839 case TargetOpcode::PATCHPOINT:
19840 if (!Subtarget.is64Bit())
19841 report_fatal_error("STACKMAP, PATCHPOINT and STATEPOINT are only "
19842 "supported on 64-bit targets");
19843 return emitPatchPoint(MI, BB);
19844 }
19845}
19846
19848 SDNode *Node) const {
19849 // Add FRM dependency to any instructions with dynamic rounding mode.
19850 int Idx = RISCV::getNamedOperandIdx(MI.getOpcode(), RISCV::OpName::frm);
19851 if (Idx < 0) {
19852 // Vector pseudos have FRM index indicated by TSFlags.
19853 Idx = RISCVII::getFRMOpNum(MI.getDesc());
19854 if (Idx < 0)
19855 return;
19856 }
19857 if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
19858 return;
19859 // If the instruction already reads FRM, don't add another read.
19860 if (MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr))
19861 return;
19862 MI.addOperand(
19863 MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
19864}
19865
19866void RISCVTargetLowering::analyzeInputArgs(
19867 MachineFunction &MF, CCState &CCInfo,
19868 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
19869 RISCVCCAssignFn Fn) const {
19870 unsigned NumArgs = Ins.size();
19872
19873 for (unsigned i = 0; i != NumArgs; ++i) {
19874 MVT ArgVT = Ins[i].VT;
19875 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
19876
19877 Type *ArgTy = nullptr;
19878 if (IsRet)
19879 ArgTy = FType->getReturnType();
19880 else if (Ins[i].isOrigArg())
19881 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
19882
19883 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19884 /*IsFixed=*/true, IsRet, ArgTy)) {
19885 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
19886 << ArgVT << '\n');
19887 llvm_unreachable(nullptr);
19888 }
19889 }
19890}
19891
19892void RISCVTargetLowering::analyzeOutputArgs(
19893 MachineFunction &MF, CCState &CCInfo,
19894 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
19895 CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
19896 unsigned NumArgs = Outs.size();
19897
19898 for (unsigned i = 0; i != NumArgs; i++) {
19899 MVT ArgVT = Outs[i].VT;
19900 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
19901 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
19902
19903 if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo,
19904 Outs[i].IsFixed, IsRet, OrigTy)) {
19905 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
19906 << ArgVT << "\n");
19907 llvm_unreachable(nullptr);
19908 }
19909 }
19910}
19911
19912// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
19913// values.
19915 const CCValAssign &VA, const SDLoc &DL,
19916 const RISCVSubtarget &Subtarget) {
19917 if (VA.needsCustom()) {
19918 if (VA.getLocVT().isInteger() &&
19919 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19920 return DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
19921 if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
19922 return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
19924 return convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
19925 llvm_unreachable("Unexpected Custom handling.");
19926 }
19927
19928 switch (VA.getLocInfo()) {
19929 default:
19930 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19931 case CCValAssign::Full:
19932 break;
19933 case CCValAssign::BCvt:
19934 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
19935 break;
19936 }
19937 return Val;
19938}
19939
19940// The caller is responsible for loading the full value if the argument is
19941// passed with CCValAssign::Indirect.
19943 const CCValAssign &VA, const SDLoc &DL,
19944 const ISD::InputArg &In,
19945 const RISCVTargetLowering &TLI) {
19948 EVT LocVT = VA.getLocVT();
19949 SDValue Val;
19950 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
19951 Register VReg = RegInfo.createVirtualRegister(RC);
19952 RegInfo.addLiveIn(VA.getLocReg(), VReg);
19953 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
19954
19955 // If input is sign extended from 32 bits, note it for the SExtWRemoval pass.
19956 if (In.isOrigArg()) {
19957 Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
19958 if (OrigArg->getType()->isIntegerTy()) {
19959 unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
19960 // An input zero extended from i31 can also be considered sign extended.
19961 if ((BitWidth <= 32 && In.Flags.isSExt()) ||
19962 (BitWidth < 32 && In.Flags.isZExt())) {
19964 RVFI->addSExt32Register(VReg);
19965 }
19966 }
19967 }
19968
19970 return Val;
19971
19972 return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
19973}
19974
19976 const CCValAssign &VA, const SDLoc &DL,
19977 const RISCVSubtarget &Subtarget) {
19978 EVT LocVT = VA.getLocVT();
19979
19980 if (VA.needsCustom()) {
19981 if (LocVT.isInteger() &&
19982 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
19983 return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, LocVT, Val);
19984 if (LocVT == MVT::i64 && VA.getValVT() == MVT::f32)
19985 return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
19986 if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
19987 return convertToScalableVector(LocVT, Val, DAG, Subtarget);
19988 llvm_unreachable("Unexpected Custom handling.");
19989 }
19990
19991 switch (VA.getLocInfo()) {
19992 default:
19993 llvm_unreachable("Unexpected CCValAssign::LocInfo");
19994 case CCValAssign::Full:
19995 break;
19996 case CCValAssign::BCvt:
19997 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
19998 break;
19999 }
20000 return Val;
20001}
20002
20003// The caller is responsible for loading the full value if the argument is
20004// passed with CCValAssign::Indirect.
20006 const CCValAssign &VA, const SDLoc &DL) {
20008 MachineFrameInfo &MFI = MF.getFrameInfo();
20009 EVT LocVT = VA.getLocVT();
20010 EVT ValVT = VA.getValVT();
20012 if (VA.getLocInfo() == CCValAssign::Indirect) {
20013 // When the value is a scalable vector, we save the pointer which points to
20014 // the scalable vector value in the stack. The ValVT will be the pointer
20015 // type, instead of the scalable vector type.
20016 ValVT = LocVT;
20017 }
20018 int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
20019 /*IsImmutable=*/true);
20020 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20021 SDValue Val;
20022
20024 switch (VA.getLocInfo()) {
20025 default:
20026 llvm_unreachable("Unexpected CCValAssign::LocInfo");
20027 case CCValAssign::Full:
20029 case CCValAssign::BCvt:
20030 break;
20031 }
20032 Val = DAG.getExtLoad(
20033 ExtType, DL, LocVT, Chain, FIN,
20035 return Val;
20036}
20037
20039 const CCValAssign &VA,
20040 const CCValAssign &HiVA,
20041 const SDLoc &DL) {
20042 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
20043 "Unexpected VA");
20045 MachineFrameInfo &MFI = MF.getFrameInfo();
20047
20048 assert(VA.isRegLoc() && "Expected register VA assignment");
20049
20050 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20051 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
20052 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
20053 SDValue Hi;
20054 if (HiVA.isMemLoc()) {
20055 // Second half of f64 is passed on the stack.
20056 int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
20057 /*IsImmutable=*/true);
20058 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
20059 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
20061 } else {
20062 // Second half of f64 is passed in another GPR.
20063 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
20064 RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
20065 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
20066 }
20067 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
20068}
20069
20070// Transform physical registers into virtual registers.
20072 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
20073 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
20074 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
20075
20077
20078 switch (CallConv) {
20079 default:
20080 report_fatal_error("Unsupported calling convention");
20081 case CallingConv::C:
20082 case CallingConv::Fast:
20084 case CallingConv::GRAAL:
20086 break;
20087 case CallingConv::GHC:
20088 if (Subtarget.hasStdExtE())
20089 report_fatal_error("GHC calling convention is not supported on RVE!");
20090 if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
20091 report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
20092 "(Zdinx/D) instruction set extensions");
20093 }
20094
20095 const Function &Func = MF.getFunction();
20096 if (Func.hasFnAttribute("interrupt")) {
20097 if (!Func.arg_empty())
20099 "Functions with the interrupt attribute cannot have arguments!");
20100
20101 StringRef Kind =
20102 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20103
20104 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
20106 "Function interrupt attribute argument not supported!");
20107 }
20108
20109 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20110 MVT XLenVT = Subtarget.getXLenVT();
20111 unsigned XLenInBytes = Subtarget.getXLen() / 8;
20112 // Used with vargs to acumulate store chains.
20113 std::vector<SDValue> OutChains;
20114
20115 // Assign locations to all of the incoming arguments.
20117 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20118
20119 if (CallConv == CallingConv::GHC)
20121 else
20122 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
20124 : CC_RISCV);
20125
20126 for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
20127 CCValAssign &VA = ArgLocs[i];
20128 SDValue ArgValue;
20129 // Passing f64 on RV32D with a soft float ABI must be handled as a special
20130 // case.
20131 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20132 assert(VA.needsCustom());
20133 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
20134 } else if (VA.isRegLoc())
20135 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
20136 else
20137 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
20138
20139 if (VA.getLocInfo() == CCValAssign::Indirect) {
20140 // If the original argument was split and passed by reference (e.g. i128
20141 // on RV32), we need to load all parts of it here (using the same
20142 // address). Vectors may be partly split to registers and partly to the
20143 // stack, in which case the base address is partly offset and subsequent
20144 // stores are relative to that.
20145 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
20147 unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
20148 unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
20149 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20150 while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
20151 CCValAssign &PartVA = ArgLocs[i + 1];
20152 unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
20153 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20154 if (PartVA.getValVT().isScalableVector())
20155 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20156 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
20157 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
20159 ++i;
20160 ++InsIdx;
20161 }
20162 continue;
20163 }
20164 InVals.push_back(ArgValue);
20165 }
20166
20167 if (any_of(ArgLocs,
20168 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20169 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20170
20171 if (IsVarArg) {
20172 ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
20173 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
20174 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
20175 MachineFrameInfo &MFI = MF.getFrameInfo();
20176 MachineRegisterInfo &RegInfo = MF.getRegInfo();
20178
20179 // Size of the vararg save area. For now, the varargs save area is either
20180 // zero or large enough to hold a0-a7.
20181 int VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
20182 int FI;
20183
20184 // If all registers are allocated, then all varargs must be passed on the
20185 // stack and we don't need to save any argregs.
20186 if (VarArgsSaveSize == 0) {
20187 int VaArgOffset = CCInfo.getStackSize();
20188 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
20189 } else {
20190 int VaArgOffset = -VarArgsSaveSize;
20191 FI = MFI.CreateFixedObject(VarArgsSaveSize, VaArgOffset, true);
20192
20193 // If saving an odd number of registers then create an extra stack slot to
20194 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
20195 // offsets to even-numbered registered remain 2*XLEN-aligned.
20196 if (Idx % 2) {
20198 XLenInBytes, VaArgOffset - static_cast<int>(XLenInBytes), true);
20199 VarArgsSaveSize += XLenInBytes;
20200 }
20201
20202 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
20203
20204 // Copy the integer registers that may have been used for passing varargs
20205 // to the vararg save area.
20206 for (unsigned I = Idx; I < ArgRegs.size(); ++I) {
20207 const Register Reg = RegInfo.createVirtualRegister(RC);
20208 RegInfo.addLiveIn(ArgRegs[I], Reg);
20209 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
20210 SDValue Store = DAG.getStore(
20211 Chain, DL, ArgValue, FIN,
20212 MachinePointerInfo::getFixedStack(MF, FI, (I - Idx) * XLenInBytes));
20213 OutChains.push_back(Store);
20214 FIN =
20215 DAG.getMemBasePlusOffset(FIN, TypeSize::getFixed(XLenInBytes), DL);
20216 }
20217 }
20218
20219 // Record the frame index of the first variable argument
20220 // which is a value necessary to VASTART.
20221 RVFI->setVarArgsFrameIndex(FI);
20222 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
20223 }
20224
20225 // All stores are grouped in one node to allow the matching between
20226 // the size of Ins and InVals. This only happens for vararg functions.
20227 if (!OutChains.empty()) {
20228 OutChains.push_back(Chain);
20229 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
20230 }
20231
20232 return Chain;
20233}
20234
20235/// isEligibleForTailCallOptimization - Check whether the call is eligible
20236/// for tail call optimization.
20237/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
20238bool RISCVTargetLowering::isEligibleForTailCallOptimization(
20239 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
20240 const SmallVector<CCValAssign, 16> &ArgLocs) const {
20241
20242 auto CalleeCC = CLI.CallConv;
20243 auto &Outs = CLI.Outs;
20244 auto &Caller = MF.getFunction();
20245 auto CallerCC = Caller.getCallingConv();
20246
20247 // Exception-handling functions need a special set of instructions to
20248 // indicate a return to the hardware. Tail-calling another function would
20249 // probably break this.
20250 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
20251 // should be expanded as new function attributes are introduced.
20252 if (Caller.hasFnAttribute("interrupt"))
20253 return false;
20254
20255 // Do not tail call opt if the stack is used to pass parameters.
20256 if (CCInfo.getStackSize() != 0)
20257 return false;
20258
20259 // Do not tail call opt if any parameters need to be passed indirectly.
20260 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
20261 // passed indirectly. So the address of the value will be passed in a
20262 // register, or if not available, then the address is put on the stack. In
20263 // order to pass indirectly, space on the stack often needs to be allocated
20264 // in order to store the value. In this case the CCInfo.getNextStackOffset()
20265 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
20266 // are passed CCValAssign::Indirect.
20267 for (auto &VA : ArgLocs)
20268 if (VA.getLocInfo() == CCValAssign::Indirect)
20269 return false;
20270
20271 // Do not tail call opt if either caller or callee uses struct return
20272 // semantics.
20273 auto IsCallerStructRet = Caller.hasStructRetAttr();
20274 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
20275 if (IsCallerStructRet || IsCalleeStructRet)
20276 return false;
20277
20278 // The callee has to preserve all registers the caller needs to preserve.
20279 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
20280 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
20281 if (CalleeCC != CallerCC) {
20282 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
20283 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
20284 return false;
20285 }
20286
20287 // Byval parameters hand the function a pointer directly into the stack area
20288 // we want to reuse during a tail call. Working around this *is* possible
20289 // but less efficient and uglier in LowerCall.
20290 for (auto &Arg : Outs)
20291 if (Arg.Flags.isByVal())
20292 return false;
20293
20294 return true;
20295}
20296
20298 return DAG.getDataLayout().getPrefTypeAlign(
20299 VT.getTypeForEVT(*DAG.getContext()));
20300}
20301
20302// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
20303// and output parameter nodes.
20305 SmallVectorImpl<SDValue> &InVals) const {
20306 SelectionDAG &DAG = CLI.DAG;
20307 SDLoc &DL = CLI.DL;
20309 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
20311 SDValue Chain = CLI.Chain;
20312 SDValue Callee = CLI.Callee;
20313 bool &IsTailCall = CLI.IsTailCall;
20314 CallingConv::ID CallConv = CLI.CallConv;
20315 bool IsVarArg = CLI.IsVarArg;
20316 EVT PtrVT = getPointerTy(DAG.getDataLayout());
20317 MVT XLenVT = Subtarget.getXLenVT();
20318
20320
20321 // Analyze the operands of the call, assigning locations to each operand.
20323 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
20324
20325 if (CallConv == CallingConv::GHC) {
20326 if (Subtarget.hasStdExtE())
20327 report_fatal_error("GHC calling convention is not supported on RVE!");
20328 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
20329 } else
20330 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
20332 : CC_RISCV);
20333
20334 // Check if it's really possible to do a tail call.
20335 if (IsTailCall)
20336 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
20337
20338 if (IsTailCall)
20339 ++NumTailCalls;
20340 else if (CLI.CB && CLI.CB->isMustTailCall())
20341 report_fatal_error("failed to perform tail call elimination on a call "
20342 "site marked musttail");
20343
20344 // Get a count of how many bytes are to be pushed on the stack.
20345 unsigned NumBytes = ArgCCInfo.getStackSize();
20346
20347 // Create local copies for byval args
20348 SmallVector<SDValue, 8> ByValArgs;
20349 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20350 ISD::ArgFlagsTy Flags = Outs[i].Flags;
20351 if (!Flags.isByVal())
20352 continue;
20353
20354 SDValue Arg = OutVals[i];
20355 unsigned Size = Flags.getByValSize();
20356 Align Alignment = Flags.getNonZeroByValAlign();
20357
20358 int FI =
20359 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
20360 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
20361 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
20362
20363 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
20364 /*IsVolatile=*/false,
20365 /*AlwaysInline=*/false, /*CI*/ nullptr, IsTailCall,
20367 ByValArgs.push_back(FIPtr);
20368 }
20369
20370 if (!IsTailCall)
20371 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
20372
20373 // Copy argument values to their designated locations.
20375 SmallVector<SDValue, 8> MemOpChains;
20376 SDValue StackPtr;
20377 for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
20378 ++i, ++OutIdx) {
20379 CCValAssign &VA = ArgLocs[i];
20380 SDValue ArgValue = OutVals[OutIdx];
20381 ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
20382
20383 // Handle passing f64 on RV32D with a soft float ABI as a special case.
20384 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20385 assert(VA.isRegLoc() && "Expected register VA assignment");
20386 assert(VA.needsCustom());
20387 SDValue SplitF64 = DAG.getNode(
20388 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
20389 SDValue Lo = SplitF64.getValue(0);
20390 SDValue Hi = SplitF64.getValue(1);
20391
20392 Register RegLo = VA.getLocReg();
20393 RegsToPass.push_back(std::make_pair(RegLo, Lo));
20394
20395 // Get the CCValAssign for the Hi part.
20396 CCValAssign &HiVA = ArgLocs[++i];
20397
20398 if (HiVA.isMemLoc()) {
20399 // Second half of f64 is passed on the stack.
20400 if (!StackPtr.getNode())
20401 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20403 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20404 DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
20405 // Emit the store.
20406 MemOpChains.push_back(DAG.getStore(
20407 Chain, DL, Hi, Address,
20409 } else {
20410 // Second half of f64 is passed in another GPR.
20411 Register RegHigh = HiVA.getLocReg();
20412 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
20413 }
20414 continue;
20415 }
20416
20417 // Promote the value if needed.
20418 // For now, only handle fully promoted and indirect arguments.
20419 if (VA.getLocInfo() == CCValAssign::Indirect) {
20420 // Store the argument in a stack slot and pass its address.
20421 Align StackAlign =
20422 std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
20423 getPrefTypeAlign(ArgValue.getValueType(), DAG));
20424 TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
20425 // If the original argument was split (e.g. i128), we need
20426 // to store the required parts of it here (and pass just one address).
20427 // Vectors may be partly split to registers and partly to the stack, in
20428 // which case the base address is partly offset and subsequent stores are
20429 // relative to that.
20430 unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
20431 unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
20432 assert(VA.getValVT().isVector() || ArgPartOffset == 0);
20433 // Calculate the total size to store. We don't have access to what we're
20434 // actually storing other than performing the loop and collecting the
20435 // info.
20437 while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
20438 SDValue PartValue = OutVals[OutIdx + 1];
20439 unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
20440 SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
20441 EVT PartVT = PartValue.getValueType();
20442 if (PartVT.isScalableVector())
20443 Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
20444 StoredSize += PartVT.getStoreSize();
20445 StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
20446 Parts.push_back(std::make_pair(PartValue, Offset));
20447 ++i;
20448 ++OutIdx;
20449 }
20450 SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
20451 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
20452 MemOpChains.push_back(
20453 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
20455 for (const auto &Part : Parts) {
20456 SDValue PartValue = Part.first;
20457 SDValue PartOffset = Part.second;
20459 DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
20460 MemOpChains.push_back(
20461 DAG.getStore(Chain, DL, PartValue, Address,
20463 }
20464 ArgValue = SpillSlot;
20465 } else {
20466 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
20467 }
20468
20469 // Use local copy if it is a byval arg.
20470 if (Flags.isByVal())
20471 ArgValue = ByValArgs[j++];
20472
20473 if (VA.isRegLoc()) {
20474 // Queue up the argument copies and emit them at the end.
20475 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
20476 } else {
20477 assert(VA.isMemLoc() && "Argument not register or memory");
20478 assert(!IsTailCall && "Tail call not allowed if stack is used "
20479 "for passing parameters");
20480
20481 // Work out the address of the stack slot.
20482 if (!StackPtr.getNode())
20483 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
20485 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
20487
20488 // Emit the store.
20489 MemOpChains.push_back(
20490 DAG.getStore(Chain, DL, ArgValue, Address,
20492 }
20493 }
20494
20495 // Join the stores, which are independent of one another.
20496 if (!MemOpChains.empty())
20497 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
20498
20499 SDValue Glue;
20500
20501 // Build a sequence of copy-to-reg nodes, chained and glued together.
20502 for (auto &Reg : RegsToPass) {
20503 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
20504 Glue = Chain.getValue(1);
20505 }
20506
20507 // Validate that none of the argument registers have been marked as
20508 // reserved, if so report an error. Do the same for the return address if this
20509 // is not a tailcall.
20510 validateCCReservedRegs(RegsToPass, MF);
20511 if (!IsTailCall && MF.getSubtarget().isRegisterReservedByUser(RISCV::X1))
20513 MF.getFunction(),
20514 "Return address register required, but has been reserved."});
20515
20516 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
20517 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
20518 // split it and then direct call can be matched by PseudoCALL.
20519 bool CalleeIsLargeExternalSymbol = false;
20521 if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
20522 Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
20523 else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20524 Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
20525 CalleeIsLargeExternalSymbol = true;
20526 }
20527 } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
20528 const GlobalValue *GV = S->getGlobal();
20529 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
20530 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
20531 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, RISCVII::MO_CALL);
20532 }
20533
20534 // The first call operand is the chain and the second is the target address.
20536 Ops.push_back(Chain);
20537 Ops.push_back(Callee);
20538
20539 // Add argument registers to the end of the list so that they are
20540 // known live into the call.
20541 for (auto &Reg : RegsToPass)
20542 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
20543
20544 // Add a register mask operand representing the call-preserved registers.
20545 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
20546 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
20547 assert(Mask && "Missing call preserved mask for calling convention");
20548 Ops.push_back(DAG.getRegisterMask(Mask));
20549
20550 // Glue the call to the argument copies, if any.
20551 if (Glue.getNode())
20552 Ops.push_back(Glue);
20553
20554 assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
20555 "Unexpected CFI type for a direct call");
20556
20557 // Emit the call.
20558 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20559
20560 // Use software guarded branch for large code model non-indirect calls
20561 // Tail call to external symbol will have a null CLI.CB and we need another
20562 // way to determine the callsite type
20563 bool NeedSWGuarded = false;
20565 Subtarget.hasStdExtZicfilp() &&
20566 ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
20567 NeedSWGuarded = true;
20568
20569 if (IsTailCall) {
20571 unsigned CallOpc =
20572 NeedSWGuarded ? RISCVISD::SW_GUARDED_TAIL : RISCVISD::TAIL;
20573 SDValue Ret = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20574 if (CLI.CFIType)
20575 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20576 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
20577 return Ret;
20578 }
20579
20580 unsigned CallOpc = NeedSWGuarded ? RISCVISD::SW_GUARDED_CALL : RISCVISD::CALL;
20581 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
20582 if (CLI.CFIType)
20583 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
20584 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
20585 Glue = Chain.getValue(1);
20586
20587 // Mark the end of the call, which is glued to the call itself.
20588 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
20589 Glue = Chain.getValue(1);
20590
20591 // Assign locations to each value returned by this call.
20593 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
20594 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
20595
20596 // Copy all of the result registers out of their specified physreg.
20597 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
20598 auto &VA = RVLocs[i];
20599 // Copy the value out
20600 SDValue RetValue =
20601 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
20602 // Glue the RetValue to the end of the call sequence
20603 Chain = RetValue.getValue(1);
20604 Glue = RetValue.getValue(2);
20605
20606 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20607 assert(VA.needsCustom());
20608 SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
20609 MVT::i32, Glue);
20610 Chain = RetValue2.getValue(1);
20611 Glue = RetValue2.getValue(2);
20612 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
20613 RetValue2);
20614 } else
20615 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
20616
20617 InVals.push_back(RetValue);
20618 }
20619
20620 return Chain;
20621}
20622
20624 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
20625 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
20626 const Type *RetTy) const {
20628 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
20629
20630 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
20631 MVT VT = Outs[i].VT;
20632 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
20633 if (CC_RISCV(i, VT, VT, CCValAssign::Full, ArgFlags, CCInfo,
20634 /*IsFixed=*/true, /*IsRet=*/true, nullptr))
20635 return false;
20636 }
20637 return true;
20638}
20639
20640SDValue
20642 bool IsVarArg,
20644 const SmallVectorImpl<SDValue> &OutVals,
20645 const SDLoc &DL, SelectionDAG &DAG) const {
20647 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20648
20649 // Stores the assignment of the return value to a location.
20651
20652 // Info about the registers and stack slot.
20653 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
20654 *DAG.getContext());
20655
20656 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
20657 nullptr, CC_RISCV);
20658
20659 if (CallConv == CallingConv::GHC && !RVLocs.empty())
20660 report_fatal_error("GHC functions return void only");
20661
20662 SDValue Glue;
20663 SmallVector<SDValue, 4> RetOps(1, Chain);
20664
20665 // Copy the result values into the output registers.
20666 for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
20667 SDValue Val = OutVals[OutIdx];
20668 CCValAssign &VA = RVLocs[i];
20669 assert(VA.isRegLoc() && "Can only return in registers!");
20670
20671 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
20672 // Handle returning f64 on RV32D with a soft float ABI.
20673 assert(VA.isRegLoc() && "Expected return via registers");
20674 assert(VA.needsCustom());
20675 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
20676 DAG.getVTList(MVT::i32, MVT::i32), Val);
20677 SDValue Lo = SplitF64.getValue(0);
20678 SDValue Hi = SplitF64.getValue(1);
20679 Register RegLo = VA.getLocReg();
20680 Register RegHi = RVLocs[++i].getLocReg();
20681
20682 if (STI.isRegisterReservedByUser(RegLo) ||
20683 STI.isRegisterReservedByUser(RegHi))
20685 MF.getFunction(),
20686 "Return value register required, but has been reserved."});
20687
20688 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
20689 Glue = Chain.getValue(1);
20690 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
20691 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
20692 Glue = Chain.getValue(1);
20693 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
20694 } else {
20695 // Handle a 'normal' return.
20696 Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
20697 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
20698
20699 if (STI.isRegisterReservedByUser(VA.getLocReg()))
20701 MF.getFunction(),
20702 "Return value register required, but has been reserved."});
20703
20704 // Guarantee that all emitted copies are stuck together.
20705 Glue = Chain.getValue(1);
20706 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
20707 }
20708 }
20709
20710 RetOps[0] = Chain; // Update chain.
20711
20712 // Add the glue node if we have it.
20713 if (Glue.getNode()) {
20714 RetOps.push_back(Glue);
20715 }
20716
20717 if (any_of(RVLocs,
20718 [](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
20719 MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
20720
20721 unsigned RetOpc = RISCVISD::RET_GLUE;
20722 // Interrupt service routines use different return instructions.
20723 const Function &Func = DAG.getMachineFunction().getFunction();
20724 if (Func.hasFnAttribute("interrupt")) {
20725 if (!Func.getReturnType()->isVoidTy())
20727 "Functions with the interrupt attribute must have void return type!");
20728
20730 StringRef Kind =
20731 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
20732
20733 if (Kind == "supervisor")
20734 RetOpc = RISCVISD::SRET_GLUE;
20735 else
20736 RetOpc = RISCVISD::MRET_GLUE;
20737 }
20738
20739 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
20740}
20741
20742void RISCVTargetLowering::validateCCReservedRegs(
20743 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
20744 MachineFunction &MF) const {
20745 const Function &F = MF.getFunction();
20746 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
20747
20748 if (llvm::any_of(Regs, [&STI](auto Reg) {
20749 return STI.isRegisterReservedByUser(Reg.first);
20750 }))
20751 F.getContext().diagnose(DiagnosticInfoUnsupported{
20752 F, "Argument register required, but has been reserved."});
20753}
20754
20755// Check if the result of the node is only used as a return value, as
20756// otherwise we can't perform a tail-call.
20758 if (N->getNumValues() != 1)
20759 return false;
20760 if (!N->hasNUsesOfValue(1, 0))
20761 return false;
20762
20763 SDNode *Copy = *N->user_begin();
20764
20765 if (Copy->getOpcode() == ISD::BITCAST) {
20766 return isUsedByReturnOnly(Copy, Chain);
20767 }
20768
20769 // TODO: Handle additional opcodes in order to support tail-calling libcalls
20770 // with soft float ABIs.
20771 if (Copy->getOpcode() != ISD::CopyToReg) {
20772 return false;
20773 }
20774
20775 // If the ISD::CopyToReg has a glue operand, we conservatively assume it
20776 // isn't safe to perform a tail call.
20777 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
20778 return false;
20779
20780 // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
20781 bool HasRet = false;
20782 for (SDNode *Node : Copy->users()) {
20783 if (Node->getOpcode() != RISCVISD::RET_GLUE)
20784 return false;
20785 HasRet = true;
20786 }
20787 if (!HasRet)
20788 return false;
20789
20790 Chain = Copy->getOperand(0);
20791 return true;
20792}
20793
20795 return CI->isTailCall();
20796}
20797
20798const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
20799#define NODE_NAME_CASE(NODE) \
20800 case RISCVISD::NODE: \
20801 return "RISCVISD::" #NODE;
20802 // clang-format off
20803 switch ((RISCVISD::NodeType)Opcode) {
20805 break;
20806 NODE_NAME_CASE(RET_GLUE)
20807 NODE_NAME_CASE(SRET_GLUE)
20808 NODE_NAME_CASE(MRET_GLUE)
20809 NODE_NAME_CASE(CALL)
20810 NODE_NAME_CASE(TAIL)
20811 NODE_NAME_CASE(SELECT_CC)
20812 NODE_NAME_CASE(BR_CC)
20813 NODE_NAME_CASE(BuildGPRPair)
20814 NODE_NAME_CASE(SplitGPRPair)
20815 NODE_NAME_CASE(BuildPairF64)
20816 NODE_NAME_CASE(SplitF64)
20817 NODE_NAME_CASE(ADD_LO)
20818 NODE_NAME_CASE(HI)
20819 NODE_NAME_CASE(LLA)
20820 NODE_NAME_CASE(ADD_TPREL)
20821 NODE_NAME_CASE(MULHSU)
20822 NODE_NAME_CASE(SHL_ADD)
20823 NODE_NAME_CASE(SLLW)
20824 NODE_NAME_CASE(SRAW)
20825 NODE_NAME_CASE(SRLW)
20826 NODE_NAME_CASE(DIVW)
20827 NODE_NAME_CASE(DIVUW)
20828 NODE_NAME_CASE(REMUW)
20829 NODE_NAME_CASE(ROLW)
20830 NODE_NAME_CASE(RORW)
20831 NODE_NAME_CASE(CLZW)
20832 NODE_NAME_CASE(CTZW)
20833 NODE_NAME_CASE(ABSW)
20834 NODE_NAME_CASE(FMV_H_X)
20835 NODE_NAME_CASE(FMV_X_ANYEXTH)
20836 NODE_NAME_CASE(FMV_X_SIGNEXTH)
20837 NODE_NAME_CASE(FMV_W_X_RV64)
20838 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
20839 NODE_NAME_CASE(FCVT_X)
20840 NODE_NAME_CASE(FCVT_XU)
20841 NODE_NAME_CASE(FCVT_W_RV64)
20842 NODE_NAME_CASE(FCVT_WU_RV64)
20843 NODE_NAME_CASE(STRICT_FCVT_W_RV64)
20844 NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
20845 NODE_NAME_CASE(FROUND)
20846 NODE_NAME_CASE(FCLASS)
20847 NODE_NAME_CASE(FSGNJX)
20848 NODE_NAME_CASE(FMAX)
20849 NODE_NAME_CASE(FMIN)
20850 NODE_NAME_CASE(FLI)
20851 NODE_NAME_CASE(READ_COUNTER_WIDE)
20852 NODE_NAME_CASE(BREV8)
20853 NODE_NAME_CASE(ORC_B)
20854 NODE_NAME_CASE(ZIP)
20855 NODE_NAME_CASE(UNZIP)
20856 NODE_NAME_CASE(CLMUL)
20857 NODE_NAME_CASE(CLMULH)
20858 NODE_NAME_CASE(CLMULR)
20859 NODE_NAME_CASE(MOPR)
20860 NODE_NAME_CASE(MOPRR)
20861 NODE_NAME_CASE(SHA256SIG0)
20862 NODE_NAME_CASE(SHA256SIG1)
20863 NODE_NAME_CASE(SHA256SUM0)
20864 NODE_NAME_CASE(SHA256SUM1)
20865 NODE_NAME_CASE(SM4KS)
20866 NODE_NAME_CASE(SM4ED)
20867 NODE_NAME_CASE(SM3P0)
20868 NODE_NAME_CASE(SM3P1)
20869 NODE_NAME_CASE(TH_LWD)
20870 NODE_NAME_CASE(TH_LWUD)
20871 NODE_NAME_CASE(TH_LDD)
20872 NODE_NAME_CASE(TH_SWD)
20873 NODE_NAME_CASE(TH_SDD)
20874 NODE_NAME_CASE(VMV_V_V_VL)
20875 NODE_NAME_CASE(VMV_V_X_VL)
20876 NODE_NAME_CASE(VFMV_V_F_VL)
20877 NODE_NAME_CASE(VMV_X_S)
20878 NODE_NAME_CASE(VMV_S_X_VL)
20879 NODE_NAME_CASE(VFMV_S_F_VL)
20880 NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
20881 NODE_NAME_CASE(READ_VLENB)
20882 NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
20883 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_SSAT)
20884 NODE_NAME_CASE(TRUNCATE_VECTOR_VL_USAT)
20885 NODE_NAME_CASE(VSLIDEUP_VL)
20886 NODE_NAME_CASE(VSLIDE1UP_VL)
20887 NODE_NAME_CASE(VSLIDEDOWN_VL)
20888 NODE_NAME_CASE(VSLIDE1DOWN_VL)
20889 NODE_NAME_CASE(VFSLIDE1UP_VL)
20890 NODE_NAME_CASE(VFSLIDE1DOWN_VL)
20891 NODE_NAME_CASE(VID_VL)
20892 NODE_NAME_CASE(VFNCVT_ROD_VL)
20893 NODE_NAME_CASE(VECREDUCE_ADD_VL)
20894 NODE_NAME_CASE(VECREDUCE_UMAX_VL)
20895 NODE_NAME_CASE(VECREDUCE_SMAX_VL)
20896 NODE_NAME_CASE(VECREDUCE_UMIN_VL)
20897 NODE_NAME_CASE(VECREDUCE_SMIN_VL)
20898 NODE_NAME_CASE(VECREDUCE_AND_VL)
20899 NODE_NAME_CASE(VECREDUCE_OR_VL)
20900 NODE_NAME_CASE(VECREDUCE_XOR_VL)
20901 NODE_NAME_CASE(VECREDUCE_FADD_VL)
20902 NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
20903 NODE_NAME_CASE(VECREDUCE_FMIN_VL)
20904 NODE_NAME_CASE(VECREDUCE_FMAX_VL)
20905 NODE_NAME_CASE(ADD_VL)
20906 NODE_NAME_CASE(AND_VL)
20907 NODE_NAME_CASE(MUL_VL)
20908 NODE_NAME_CASE(OR_VL)
20909 NODE_NAME_CASE(SDIV_VL)
20910 NODE_NAME_CASE(SHL_VL)
20911 NODE_NAME_CASE(SREM_VL)
20912 NODE_NAME_CASE(SRA_VL)
20913 NODE_NAME_CASE(SRL_VL)
20914 NODE_NAME_CASE(ROTL_VL)
20915 NODE_NAME_CASE(ROTR_VL)
20916 NODE_NAME_CASE(SUB_VL)
20917 NODE_NAME_CASE(UDIV_VL)
20918 NODE_NAME_CASE(UREM_VL)
20919 NODE_NAME_CASE(XOR_VL)
20920 NODE_NAME_CASE(AVGFLOORS_VL)
20921 NODE_NAME_CASE(AVGFLOORU_VL)
20922 NODE_NAME_CASE(AVGCEILS_VL)
20923 NODE_NAME_CASE(AVGCEILU_VL)
20924 NODE_NAME_CASE(SADDSAT_VL)
20925 NODE_NAME_CASE(UADDSAT_VL)
20926 NODE_NAME_CASE(SSUBSAT_VL)
20927 NODE_NAME_CASE(USUBSAT_VL)
20928 NODE_NAME_CASE(FADD_VL)
20929 NODE_NAME_CASE(FSUB_VL)
20930 NODE_NAME_CASE(FMUL_VL)
20931 NODE_NAME_CASE(FDIV_VL)
20932 NODE_NAME_CASE(FNEG_VL)
20933 NODE_NAME_CASE(FABS_VL)
20934 NODE_NAME_CASE(FSQRT_VL)
20935 NODE_NAME_CASE(FCLASS_VL)
20936 NODE_NAME_CASE(VFMADD_VL)
20937 NODE_NAME_CASE(VFNMADD_VL)
20938 NODE_NAME_CASE(VFMSUB_VL)
20939 NODE_NAME_CASE(VFNMSUB_VL)
20940 NODE_NAME_CASE(VFWMADD_VL)
20941 NODE_NAME_CASE(VFWNMADD_VL)
20942 NODE_NAME_CASE(VFWMSUB_VL)
20943 NODE_NAME_CASE(VFWNMSUB_VL)
20944 NODE_NAME_CASE(FCOPYSIGN_VL)
20945 NODE_NAME_CASE(SMIN_VL)
20946 NODE_NAME_CASE(SMAX_VL)
20947 NODE_NAME_CASE(UMIN_VL)
20948 NODE_NAME_CASE(UMAX_VL)
20949 NODE_NAME_CASE(BITREVERSE_VL)
20950 NODE_NAME_CASE(BSWAP_VL)
20951 NODE_NAME_CASE(CTLZ_VL)
20952 NODE_NAME_CASE(CTTZ_VL)
20953 NODE_NAME_CASE(CTPOP_VL)
20954 NODE_NAME_CASE(VFMIN_VL)
20955 NODE_NAME_CASE(VFMAX_VL)
20956 NODE_NAME_CASE(MULHS_VL)
20957 NODE_NAME_CASE(MULHU_VL)
20958 NODE_NAME_CASE(VFCVT_RTZ_X_F_VL)
20959 NODE_NAME_CASE(VFCVT_RTZ_XU_F_VL)
20960 NODE_NAME_CASE(VFCVT_RM_X_F_VL)
20961 NODE_NAME_CASE(VFCVT_RM_XU_F_VL)
20962 NODE_NAME_CASE(VFROUND_NOEXCEPT_VL)
20963 NODE_NAME_CASE(SINT_TO_FP_VL)
20964 NODE_NAME_CASE(UINT_TO_FP_VL)
20965 NODE_NAME_CASE(VFCVT_RM_F_XU_VL)
20966 NODE_NAME_CASE(VFCVT_RM_F_X_VL)
20967 NODE_NAME_CASE(FP_EXTEND_VL)
20968 NODE_NAME_CASE(FP_ROUND_VL)
20969 NODE_NAME_CASE(STRICT_FADD_VL)
20970 NODE_NAME_CASE(STRICT_FSUB_VL)
20971 NODE_NAME_CASE(STRICT_FMUL_VL)
20972 NODE_NAME_CASE(STRICT_FDIV_VL)
20973 NODE_NAME_CASE(STRICT_FSQRT_VL)
20974 NODE_NAME_CASE(STRICT_VFMADD_VL)
20975 NODE_NAME_CASE(STRICT_VFNMADD_VL)
20976 NODE_NAME_CASE(STRICT_VFMSUB_VL)
20977 NODE_NAME_CASE(STRICT_VFNMSUB_VL)
20978 NODE_NAME_CASE(STRICT_FP_ROUND_VL)
20979 NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
20980 NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
20981 NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
20982 NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
20983 NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
20984 NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
20985 NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
20986 NODE_NAME_CASE(STRICT_FSETCC_VL)
20987 NODE_NAME_CASE(STRICT_FSETCCS_VL)
20988 NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
20989 NODE_NAME_CASE(VWMUL_VL)
20990 NODE_NAME_CASE(VWMULU_VL)
20991 NODE_NAME_CASE(VWMULSU_VL)
20992 NODE_NAME_CASE(VWADD_VL)
20993 NODE_NAME_CASE(VWADDU_VL)
20994 NODE_NAME_CASE(VWSUB_VL)
20995 NODE_NAME_CASE(VWSUBU_VL)
20996 NODE_NAME_CASE(VWADD_W_VL)
20997 NODE_NAME_CASE(VWADDU_W_VL)
20998 NODE_NAME_CASE(VWSUB_W_VL)
20999 NODE_NAME_CASE(VWSUBU_W_VL)
21000 NODE_NAME_CASE(VWSLL_VL)
21001 NODE_NAME_CASE(VFWMUL_VL)
21002 NODE_NAME_CASE(VFWADD_VL)
21003 NODE_NAME_CASE(VFWSUB_VL)
21004 NODE_NAME_CASE(VFWADD_W_VL)
21005 NODE_NAME_CASE(VFWSUB_W_VL)
21006 NODE_NAME_CASE(VWMACC_VL)
21007 NODE_NAME_CASE(VWMACCU_VL)
21008 NODE_NAME_CASE(VWMACCSU_VL)
21009 NODE_NAME_CASE(SETCC_VL)
21010 NODE_NAME_CASE(VMERGE_VL)
21011 NODE_NAME_CASE(VMAND_VL)
21012 NODE_NAME_CASE(VMOR_VL)
21013 NODE_NAME_CASE(VMXOR_VL)